1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: AMD 24 * 25 */ 26 27 #define UNIT_TEST 0 28 #if !UNIT_TEST 29 #include "dc.h" 30 #endif 31 #include "../display_mode_lib.h" 32 #include "display_mode_vba_314.h" 33 #include "../dml_inline_defs.h" 34 35 /* 36 * NOTE: 37 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 38 * 39 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 40 * ways. Unless there is something clearly wrong with it the code should 41 * remain as-is as it provides us with a guarantee from HW that it is correct. 42 */ 43 44 #define BPP_INVALID 0 45 #define BPP_BLENDED_PIPE 0xffffffff 46 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184 47 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096 48 49 // For DML-C changes that hasn't been propagated to VBA yet 50 //#define __DML_VBA_ALLOW_DELTA__ 51 52 // Move these to ip parameters/constant 53 54 // At which vstartup the DML start to try if the mode can be supported 55 #define __DML_VBA_MIN_VSTARTUP__ 9 56 57 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 58 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 59 60 // fudge factor for min dcfclk calclation 61 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 62 63 typedef struct { 64 double DPPCLK; 65 double DISPCLK; 66 double PixelClock; 67 double DCFCLKDeepSleep; 68 unsigned int DPPPerPlane; 69 bool ScalerEnabled; 70 double VRatio; 71 double VRatioChroma; 72 enum scan_direction_class SourceScan; 73 unsigned int BlockWidth256BytesY; 74 unsigned int BlockHeight256BytesY; 75 unsigned int BlockWidth256BytesC; 76 unsigned int BlockHeight256BytesC; 77 unsigned int InterlaceEnable; 78 unsigned int NumberOfCursors; 79 unsigned int VBlank; 80 unsigned int HTotal; 81 unsigned int DCCEnable; 82 bool ODMCombineIsEnabled; 83 enum source_format_class SourcePixelFormat; 84 int BytePerPixelY; 85 int BytePerPixelC; 86 bool ProgressiveToInterlaceUnitInOPP; 87 } Pipe; 88 89 #define BPP_INVALID 0 90 #define BPP_BLENDED_PIPE 0xffffffff 91 92 static bool CalculateBytePerPixelAnd256BBlockSizes( 93 enum source_format_class SourcePixelFormat, 94 enum dm_swizzle_mode SurfaceTiling, 95 unsigned int *BytePerPixelY, 96 unsigned int *BytePerPixelC, 97 double *BytePerPixelDETY, 98 double *BytePerPixelDETC, 99 unsigned int *BlockHeight256BytesY, 100 unsigned int *BlockHeight256BytesC, 101 unsigned int *BlockWidth256BytesY, 102 unsigned int *BlockWidth256BytesC); 103 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 104 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 105 static unsigned int dscceComputeDelay( 106 unsigned int bpc, 107 double BPP, 108 unsigned int sliceWidth, 109 unsigned int numSlices, 110 enum output_format_class pixelFormat, 111 enum output_encoder_class Output); 112 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 113 static bool CalculatePrefetchSchedule( 114 struct display_mode_lib *mode_lib, 115 double HostVMInefficiencyFactor, 116 Pipe *myPipe, 117 unsigned int DSCDelay, 118 double DPPCLKDelaySubtotalPlusCNVCFormater, 119 double DPPCLKDelaySCL, 120 double DPPCLKDelaySCLLBOnly, 121 double DPPCLKDelayCNVCCursor, 122 double DISPCLKDelaySubtotal, 123 unsigned int DPP_RECOUT_WIDTH, 124 enum output_format_class OutputFormat, 125 unsigned int MaxInterDCNTileRepeaters, 126 unsigned int VStartup, 127 unsigned int MaxVStartup, 128 unsigned int GPUVMPageTableLevels, 129 bool GPUVMEnable, 130 bool HostVMEnable, 131 unsigned int HostVMMaxNonCachedPageTableLevels, 132 double HostVMMinPageSize, 133 bool DynamicMetadataEnable, 134 bool DynamicMetadataVMEnabled, 135 int DynamicMetadataLinesBeforeActiveRequired, 136 unsigned int DynamicMetadataTransmittedBytes, 137 double UrgentLatency, 138 double UrgentExtraLatency, 139 double TCalc, 140 unsigned int PDEAndMetaPTEBytesFrame, 141 unsigned int MetaRowByte, 142 unsigned int PixelPTEBytesPerRow, 143 double PrefetchSourceLinesY, 144 unsigned int SwathWidthY, 145 double VInitPreFillY, 146 unsigned int MaxNumSwathY, 147 double PrefetchSourceLinesC, 148 unsigned int SwathWidthC, 149 double VInitPreFillC, 150 unsigned int MaxNumSwathC, 151 int swath_width_luma_ub, 152 int swath_width_chroma_ub, 153 unsigned int SwathHeightY, 154 unsigned int SwathHeightC, 155 double TWait, 156 double *DSTXAfterScaler, 157 double *DSTYAfterScaler, 158 double *DestinationLinesForPrefetch, 159 double *PrefetchBandwidth, 160 double *DestinationLinesToRequestVMInVBlank, 161 double *DestinationLinesToRequestRowInVBlank, 162 double *VRatioPrefetchY, 163 double *VRatioPrefetchC, 164 double *RequiredPrefetchPixDataBWLuma, 165 double *RequiredPrefetchPixDataBWChroma, 166 bool *NotEnoughTimeForDynamicMetadata, 167 double *Tno_bw, 168 double *prefetch_vmrow_bw, 169 double *Tdmdl_vm, 170 double *Tdmdl, 171 double *TSetup, 172 int *VUpdateOffsetPix, 173 double *VUpdateWidthPix, 174 double *VReadyOffsetPix); 175 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 176 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 177 static void CalculateDCCConfiguration( 178 bool DCCEnabled, 179 bool DCCProgrammingAssumesScanDirectionUnknown, 180 enum source_format_class SourcePixelFormat, 181 unsigned int SurfaceWidthLuma, 182 unsigned int SurfaceWidthChroma, 183 unsigned int SurfaceHeightLuma, 184 unsigned int SurfaceHeightChroma, 185 double DETBufferSize, 186 unsigned int RequestHeight256ByteLuma, 187 unsigned int RequestHeight256ByteChroma, 188 enum dm_swizzle_mode TilingFormat, 189 unsigned int BytePerPixelY, 190 unsigned int BytePerPixelC, 191 double BytePerPixelDETY, 192 double BytePerPixelDETC, 193 enum scan_direction_class ScanOrientation, 194 unsigned int *MaxUncompressedBlockLuma, 195 unsigned int *MaxUncompressedBlockChroma, 196 unsigned int *MaxCompressedBlockLuma, 197 unsigned int *MaxCompressedBlockChroma, 198 unsigned int *IndependentBlockLuma, 199 unsigned int *IndependentBlockChroma); 200 static double CalculatePrefetchSourceLines( 201 struct display_mode_lib *mode_lib, 202 double VRatio, 203 double vtaps, 204 bool Interlace, 205 bool ProgressiveToInterlaceUnitInOPP, 206 unsigned int SwathHeight, 207 unsigned int ViewportYStart, 208 double *VInitPreFill, 209 unsigned int *MaxNumSwath); 210 static unsigned int CalculateVMAndRowBytes( 211 struct display_mode_lib *mode_lib, 212 bool DCCEnable, 213 unsigned int BlockHeight256Bytes, 214 unsigned int BlockWidth256Bytes, 215 enum source_format_class SourcePixelFormat, 216 unsigned int SurfaceTiling, 217 unsigned int BytePerPixel, 218 enum scan_direction_class ScanDirection, 219 unsigned int SwathWidth, 220 unsigned int ViewportHeight, 221 bool GPUVMEnable, 222 bool HostVMEnable, 223 unsigned int HostVMMaxNonCachedPageTableLevels, 224 unsigned int GPUVMMinPageSize, 225 unsigned int HostVMMinPageSize, 226 unsigned int PTEBufferSizeInRequests, 227 unsigned int Pitch, 228 unsigned int DCCMetaPitch, 229 unsigned int *MacroTileWidth, 230 unsigned int *MetaRowByte, 231 unsigned int *PixelPTEBytesPerRow, 232 bool *PTEBufferSizeNotExceeded, 233 int *dpte_row_width_ub, 234 unsigned int *dpte_row_height, 235 unsigned int *MetaRequestWidth, 236 unsigned int *MetaRequestHeight, 237 unsigned int *meta_row_width, 238 unsigned int *meta_row_height, 239 int *vm_group_bytes, 240 unsigned int *dpte_group_bytes, 241 unsigned int *PixelPTEReqWidth, 242 unsigned int *PixelPTEReqHeight, 243 unsigned int *PTERequestSize, 244 int *DPDE0BytesFrame, 245 int *MetaPTEBytesFrame); 246 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 247 static void CalculateRowBandwidth( 248 bool GPUVMEnable, 249 enum source_format_class SourcePixelFormat, 250 double VRatio, 251 double VRatioChroma, 252 bool DCCEnable, 253 double LineTime, 254 unsigned int MetaRowByteLuma, 255 unsigned int MetaRowByteChroma, 256 unsigned int meta_row_height_luma, 257 unsigned int meta_row_height_chroma, 258 unsigned int PixelPTEBytesPerRowLuma, 259 unsigned int PixelPTEBytesPerRowChroma, 260 unsigned int dpte_row_height_luma, 261 unsigned int dpte_row_height_chroma, 262 double *meta_row_bw, 263 double *dpte_row_bw); 264 265 static void CalculateFlipSchedule( 266 struct display_mode_lib *mode_lib, 267 unsigned int k, 268 double HostVMInefficiencyFactor, 269 double UrgentExtraLatency, 270 double UrgentLatency, 271 double PDEAndMetaPTEBytesPerFrame, 272 double MetaRowBytes, 273 double DPTEBytesPerRow); 274 static double CalculateWriteBackDelay( 275 enum source_format_class WritebackPixelFormat, 276 double WritebackHRatio, 277 double WritebackVRatio, 278 unsigned int WritebackVTaps, 279 int WritebackDestinationWidth, 280 int WritebackDestinationHeight, 281 int WritebackSourceHeight, 282 unsigned int HTotal); 283 284 static void CalculateVupdateAndDynamicMetadataParameters( 285 int MaxInterDCNTileRepeaters, 286 double DPPCLK, 287 double DISPCLK, 288 double DCFClkDeepSleep, 289 double PixelClock, 290 int HTotal, 291 int VBlank, 292 int DynamicMetadataTransmittedBytes, 293 int DynamicMetadataLinesBeforeActiveRequired, 294 int InterlaceEnable, 295 bool ProgressiveToInterlaceUnitInOPP, 296 double *TSetup, 297 double *Tdmbf, 298 double *Tdmec, 299 double *Tdmsks, 300 int *VUpdateOffsetPix, 301 double *VUpdateWidthPix, 302 double *VReadyOffsetPix); 303 304 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 305 struct display_mode_lib *mode_lib, 306 unsigned int PrefetchMode, 307 double DCFCLK, 308 double ReturnBW, 309 double UrgentLatency, 310 double ExtraLatency, 311 double SOCCLK, 312 double DCFCLKDeepSleep, 313 unsigned int DETBufferSizeY[], 314 unsigned int DETBufferSizeC[], 315 unsigned int SwathHeightY[], 316 unsigned int SwathHeightC[], 317 double SwathWidthY[], 318 double SwathWidthC[], 319 unsigned int DPPPerPlane[], 320 double BytePerPixelDETY[], 321 double BytePerPixelDETC[], 322 bool UnboundedRequestEnabled, 323 unsigned int CompressedBufferSizeInkByte, 324 enum clock_change_support *DRAMClockChangeSupport, 325 double *StutterExitWatermark, 326 double *StutterEnterPlusExitWatermark, 327 double *Z8StutterExitWatermark, 328 double *Z8StutterEnterPlusExitWatermark); 329 330 static void CalculateDCFCLKDeepSleep( 331 struct display_mode_lib *mode_lib, 332 unsigned int NumberOfActivePlanes, 333 int BytePerPixelY[], 334 int BytePerPixelC[], 335 double VRatio[], 336 double VRatioChroma[], 337 double SwathWidthY[], 338 double SwathWidthC[], 339 unsigned int DPPPerPlane[], 340 double HRatio[], 341 double HRatioChroma[], 342 double PixelClock[], 343 double PSCL_THROUGHPUT[], 344 double PSCL_THROUGHPUT_CHROMA[], 345 double DPPCLK[], 346 double ReadBandwidthLuma[], 347 double ReadBandwidthChroma[], 348 int ReturnBusWidth, 349 double *DCFCLKDeepSleep); 350 351 static void CalculateUrgentBurstFactor( 352 int swath_width_luma_ub, 353 int swath_width_chroma_ub, 354 unsigned int SwathHeightY, 355 unsigned int SwathHeightC, 356 double LineTime, 357 double UrgentLatency, 358 double CursorBufferSize, 359 unsigned int CursorWidth, 360 unsigned int CursorBPP, 361 double VRatio, 362 double VRatioC, 363 double BytePerPixelInDETY, 364 double BytePerPixelInDETC, 365 double DETBufferSizeY, 366 double DETBufferSizeC, 367 double *UrgentBurstFactorCursor, 368 double *UrgentBurstFactorLuma, 369 double *UrgentBurstFactorChroma, 370 bool *NotEnoughUrgentLatencyHiding); 371 372 static void UseMinimumDCFCLK( 373 struct display_mode_lib *mode_lib, 374 int MaxPrefetchMode, 375 int ReorderingBytes); 376 377 static void CalculatePixelDeliveryTimes( 378 unsigned int NumberOfActivePlanes, 379 double VRatio[], 380 double VRatioChroma[], 381 double VRatioPrefetchY[], 382 double VRatioPrefetchC[], 383 unsigned int swath_width_luma_ub[], 384 unsigned int swath_width_chroma_ub[], 385 unsigned int DPPPerPlane[], 386 double HRatio[], 387 double HRatioChroma[], 388 double PixelClock[], 389 double PSCL_THROUGHPUT[], 390 double PSCL_THROUGHPUT_CHROMA[], 391 double DPPCLK[], 392 int BytePerPixelC[], 393 enum scan_direction_class SourceScan[], 394 unsigned int NumberOfCursors[], 395 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 396 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 397 unsigned int BlockWidth256BytesY[], 398 unsigned int BlockHeight256BytesY[], 399 unsigned int BlockWidth256BytesC[], 400 unsigned int BlockHeight256BytesC[], 401 double DisplayPipeLineDeliveryTimeLuma[], 402 double DisplayPipeLineDeliveryTimeChroma[], 403 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 404 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 405 double DisplayPipeRequestDeliveryTimeLuma[], 406 double DisplayPipeRequestDeliveryTimeChroma[], 407 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 408 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 409 double CursorRequestDeliveryTime[], 410 double CursorRequestDeliveryTimePrefetch[]); 411 412 static void CalculateMetaAndPTETimes( 413 int NumberOfActivePlanes, 414 bool GPUVMEnable, 415 int MetaChunkSize, 416 int MinMetaChunkSizeBytes, 417 int HTotal[], 418 double VRatio[], 419 double VRatioChroma[], 420 double DestinationLinesToRequestRowInVBlank[], 421 double DestinationLinesToRequestRowInImmediateFlip[], 422 bool DCCEnable[], 423 double PixelClock[], 424 int BytePerPixelY[], 425 int BytePerPixelC[], 426 enum scan_direction_class SourceScan[], 427 int dpte_row_height[], 428 int dpte_row_height_chroma[], 429 int meta_row_width[], 430 int meta_row_width_chroma[], 431 int meta_row_height[], 432 int meta_row_height_chroma[], 433 int meta_req_width[], 434 int meta_req_width_chroma[], 435 int meta_req_height[], 436 int meta_req_height_chroma[], 437 int dpte_group_bytes[], 438 int PTERequestSizeY[], 439 int PTERequestSizeC[], 440 int PixelPTEReqWidthY[], 441 int PixelPTEReqHeightY[], 442 int PixelPTEReqWidthC[], 443 int PixelPTEReqHeightC[], 444 int dpte_row_width_luma_ub[], 445 int dpte_row_width_chroma_ub[], 446 double DST_Y_PER_PTE_ROW_NOM_L[], 447 double DST_Y_PER_PTE_ROW_NOM_C[], 448 double DST_Y_PER_META_ROW_NOM_L[], 449 double DST_Y_PER_META_ROW_NOM_C[], 450 double TimePerMetaChunkNominal[], 451 double TimePerChromaMetaChunkNominal[], 452 double TimePerMetaChunkVBlank[], 453 double TimePerChromaMetaChunkVBlank[], 454 double TimePerMetaChunkFlip[], 455 double TimePerChromaMetaChunkFlip[], 456 double time_per_pte_group_nom_luma[], 457 double time_per_pte_group_vblank_luma[], 458 double time_per_pte_group_flip_luma[], 459 double time_per_pte_group_nom_chroma[], 460 double time_per_pte_group_vblank_chroma[], 461 double time_per_pte_group_flip_chroma[]); 462 463 static void CalculateVMGroupAndRequestTimes( 464 unsigned int NumberOfActivePlanes, 465 bool GPUVMEnable, 466 unsigned int GPUVMMaxPageTableLevels, 467 unsigned int HTotal[], 468 int BytePerPixelC[], 469 double DestinationLinesToRequestVMInVBlank[], 470 double DestinationLinesToRequestVMInImmediateFlip[], 471 bool DCCEnable[], 472 double PixelClock[], 473 int dpte_row_width_luma_ub[], 474 int dpte_row_width_chroma_ub[], 475 int vm_group_bytes[], 476 unsigned int dpde0_bytes_per_frame_ub_l[], 477 unsigned int dpde0_bytes_per_frame_ub_c[], 478 int meta_pte_bytes_per_frame_ub_l[], 479 int meta_pte_bytes_per_frame_ub_c[], 480 double TimePerVMGroupVBlank[], 481 double TimePerVMGroupFlip[], 482 double TimePerVMRequestVBlank[], 483 double TimePerVMRequestFlip[]); 484 485 static void CalculateStutterEfficiency( 486 struct display_mode_lib *mode_lib, 487 int CompressedBufferSizeInkByte, 488 bool UnboundedRequestEnabled, 489 int ConfigReturnBufferSizeInKByte, 490 int MetaFIFOSizeInKEntries, 491 int ZeroSizeBufferEntries, 492 int NumberOfActivePlanes, 493 int ROBBufferSizeInKByte, 494 double TotalDataReadBandwidth, 495 double DCFCLK, 496 double ReturnBW, 497 double COMPBUF_RESERVED_SPACE_64B, 498 double COMPBUF_RESERVED_SPACE_ZS, 499 double SRExitTime, 500 double SRExitZ8Time, 501 bool SynchronizedVBlank, 502 double Z8StutterEnterPlusExitWatermark, 503 double StutterEnterPlusExitWatermark, 504 bool ProgressiveToInterlaceUnitInOPP, 505 bool Interlace[], 506 double MinTTUVBlank[], 507 int DPPPerPlane[], 508 unsigned int DETBufferSizeY[], 509 int BytePerPixelY[], 510 double BytePerPixelDETY[], 511 double SwathWidthY[], 512 int SwathHeightY[], 513 int SwathHeightC[], 514 double NetDCCRateLuma[], 515 double NetDCCRateChroma[], 516 double DCCFractionOfZeroSizeRequestsLuma[], 517 double DCCFractionOfZeroSizeRequestsChroma[], 518 int HTotal[], 519 int VTotal[], 520 double PixelClock[], 521 double VRatio[], 522 enum scan_direction_class SourceScan[], 523 int BlockHeight256BytesY[], 524 int BlockWidth256BytesY[], 525 int BlockHeight256BytesC[], 526 int BlockWidth256BytesC[], 527 int DCCYMaxUncompressedBlock[], 528 int DCCCMaxUncompressedBlock[], 529 int VActive[], 530 bool DCCEnable[], 531 bool WritebackEnable[], 532 double ReadBandwidthPlaneLuma[], 533 double ReadBandwidthPlaneChroma[], 534 double meta_row_bw[], 535 double dpte_row_bw[], 536 double *StutterEfficiencyNotIncludingVBlank, 537 double *StutterEfficiency, 538 int *NumberOfStutterBurstsPerFrame, 539 double *Z8StutterEfficiencyNotIncludingVBlank, 540 double *Z8StutterEfficiency, 541 int *Z8NumberOfStutterBurstsPerFrame, 542 double *StutterPeriod); 543 544 static void CalculateSwathAndDETConfiguration( 545 bool ForceSingleDPP, 546 int NumberOfActivePlanes, 547 unsigned int DETBufferSizeInKByte, 548 double MaximumSwathWidthLuma[], 549 double MaximumSwathWidthChroma[], 550 enum scan_direction_class SourceScan[], 551 enum source_format_class SourcePixelFormat[], 552 enum dm_swizzle_mode SurfaceTiling[], 553 int ViewportWidth[], 554 int ViewportHeight[], 555 int SurfaceWidthY[], 556 int SurfaceWidthC[], 557 int SurfaceHeightY[], 558 int SurfaceHeightC[], 559 int Read256BytesBlockHeightY[], 560 int Read256BytesBlockHeightC[], 561 int Read256BytesBlockWidthY[], 562 int Read256BytesBlockWidthC[], 563 enum odm_combine_mode ODMCombineEnabled[], 564 int BlendingAndTiming[], 565 int BytePerPixY[], 566 int BytePerPixC[], 567 double BytePerPixDETY[], 568 double BytePerPixDETC[], 569 int HActive[], 570 double HRatio[], 571 double HRatioChroma[], 572 int DPPPerPlane[], 573 int swath_width_luma_ub[], 574 int swath_width_chroma_ub[], 575 double SwathWidth[], 576 double SwathWidthChroma[], 577 int SwathHeightY[], 578 int SwathHeightC[], 579 unsigned int DETBufferSizeY[], 580 unsigned int DETBufferSizeC[], 581 bool ViewportSizeSupportPerPlane[], 582 bool *ViewportSizeSupport); 583 static void CalculateSwathWidth( 584 bool ForceSingleDPP, 585 int NumberOfActivePlanes, 586 enum source_format_class SourcePixelFormat[], 587 enum scan_direction_class SourceScan[], 588 int ViewportWidth[], 589 int ViewportHeight[], 590 int SurfaceWidthY[], 591 int SurfaceWidthC[], 592 int SurfaceHeightY[], 593 int SurfaceHeightC[], 594 enum odm_combine_mode ODMCombineEnabled[], 595 int BytePerPixY[], 596 int BytePerPixC[], 597 int Read256BytesBlockHeightY[], 598 int Read256BytesBlockHeightC[], 599 int Read256BytesBlockWidthY[], 600 int Read256BytesBlockWidthC[], 601 int BlendingAndTiming[], 602 int HActive[], 603 double HRatio[], 604 int DPPPerPlane[], 605 double SwathWidthSingleDPPY[], 606 double SwathWidthSingleDPPC[], 607 double SwathWidthY[], 608 double SwathWidthC[], 609 int MaximumSwathHeightY[], 610 int MaximumSwathHeightC[], 611 int swath_width_luma_ub[], 612 int swath_width_chroma_ub[]); 613 614 static double CalculateExtraLatency( 615 int RoundTripPingLatencyCycles, 616 int ReorderingBytes, 617 double DCFCLK, 618 int TotalNumberOfActiveDPP, 619 int PixelChunkSizeInKByte, 620 int TotalNumberOfDCCActiveDPP, 621 int MetaChunkSize, 622 double ReturnBW, 623 bool GPUVMEnable, 624 bool HostVMEnable, 625 int NumberOfActivePlanes, 626 int NumberOfDPP[], 627 int dpte_group_bytes[], 628 double HostVMInefficiencyFactor, 629 double HostVMMinPageSize, 630 int HostVMMaxNonCachedPageTableLevels); 631 632 static double CalculateExtraLatencyBytes( 633 int ReorderingBytes, 634 int TotalNumberOfActiveDPP, 635 int PixelChunkSizeInKByte, 636 int TotalNumberOfDCCActiveDPP, 637 int MetaChunkSize, 638 bool GPUVMEnable, 639 bool HostVMEnable, 640 int NumberOfActivePlanes, 641 int NumberOfDPP[], 642 int dpte_group_bytes[], 643 double HostVMInefficiencyFactor, 644 double HostVMMinPageSize, 645 int HostVMMaxNonCachedPageTableLevels); 646 647 static double CalculateUrgentLatency( 648 double UrgentLatencyPixelDataOnly, 649 double UrgentLatencyPixelMixedWithVMData, 650 double UrgentLatencyVMDataOnly, 651 bool DoUrgentLatencyAdjustment, 652 double UrgentLatencyAdjustmentFabricClockComponent, 653 double UrgentLatencyAdjustmentFabricClockReference, 654 double FabricClockSingle); 655 656 static void CalculateUnboundedRequestAndCompressedBufferSize( 657 unsigned int DETBufferSizeInKByte, 658 int ConfigReturnBufferSizeInKByte, 659 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 660 int TotalActiveDPP, 661 bool NoChromaPlanes, 662 int MaxNumDPP, 663 int CompressedBufferSegmentSizeInkByteFinal, 664 enum output_encoder_class *Output, 665 bool *UnboundedRequestEnabled, 666 int *CompressedBufferSizeInkByte); 667 668 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 669 static unsigned int CalculateMaxVStartup( 670 unsigned int VTotal, 671 unsigned int VActive, 672 unsigned int VBlankNom, 673 unsigned int HTotal, 674 double PixelClock, 675 bool ProgressiveTointerlaceUnitinOPP, 676 bool Interlace, 677 unsigned int VBlankNomDefaultUS, 678 double WritebackDelayTime); 679 680 void dml314_recalculate(struct display_mode_lib *mode_lib) 681 { 682 ModeSupportAndSystemConfiguration(mode_lib); 683 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 684 DisplayPipeConfiguration(mode_lib); 685 #ifdef __DML_VBA_DEBUG__ 686 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 687 #endif 688 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 689 } 690 691 static unsigned int dscceComputeDelay( 692 unsigned int bpc, 693 double BPP, 694 unsigned int sliceWidth, 695 unsigned int numSlices, 696 enum output_format_class pixelFormat, 697 enum output_encoder_class Output) 698 { 699 // valid bpc = source bits per component in the set of {8, 10, 12} 700 // valid bpp = increments of 1/16 of a bit 701 // min = 6/7/8 in N420/N422/444, respectively 702 // max = such that compression is 1:1 703 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 704 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 705 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 706 707 // fixed value 708 unsigned int rcModelSize = 8192; 709 710 // N422/N420 operate at 2 pixels per clock 711 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 712 713 if (pixelFormat == dm_420) 714 pixelsPerClock = 2; 715 else if (pixelFormat == dm_444) 716 pixelsPerClock = 1; 717 else if (pixelFormat == dm_n422) 718 pixelsPerClock = 2; 719 // #all other modes operate at 1 pixel per clock 720 else 721 pixelsPerClock = 1; 722 723 //initial transmit delay as per PPS 724 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 725 726 //compute ssm delay 727 if (bpc == 8) 728 D = 81; 729 else if (bpc == 10) 730 D = 89; 731 else 732 D = 113; 733 734 //divide by pixel per cycle to compute slice width as seen by DSC 735 w = sliceWidth / pixelsPerClock; 736 737 //422 mode has an additional cycle of delay 738 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 739 s = 0; 740 else 741 s = 1; 742 743 //main calculation for the dscce 744 ix = initalXmitDelay + 45; 745 wx = (w + 2) / 3; 746 P = 3 * wx - w; 747 l0 = ix / w; 748 a = ix + P * l0; 749 ax = (a + 2) / 3 + D + 6 + 1; 750 L = (ax + wx - 1) / wx; 751 if ((ix % w) == 0 && P != 0) 752 lstall = 1; 753 else 754 lstall = 0; 755 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 756 757 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 758 pixels = Delay * 3 * pixelsPerClock; 759 return pixels; 760 } 761 762 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 763 { 764 unsigned int Delay = 0; 765 766 if (pixelFormat == dm_420) { 767 // sfr 768 Delay = Delay + 2; 769 // dsccif 770 Delay = Delay + 0; 771 // dscc - input deserializer 772 Delay = Delay + 3; 773 // dscc gets pixels every other cycle 774 Delay = Delay + 2; 775 // dscc - input cdc fifo 776 Delay = Delay + 12; 777 // dscc gets pixels every other cycle 778 Delay = Delay + 13; 779 // dscc - cdc uncertainty 780 Delay = Delay + 2; 781 // dscc - output cdc fifo 782 Delay = Delay + 7; 783 // dscc gets pixels every other cycle 784 Delay = Delay + 3; 785 // dscc - cdc uncertainty 786 Delay = Delay + 2; 787 // dscc - output serializer 788 Delay = Delay + 1; 789 // sft 790 Delay = Delay + 1; 791 } else if (pixelFormat == dm_n422) { 792 // sfr 793 Delay = Delay + 2; 794 // dsccif 795 Delay = Delay + 1; 796 // dscc - input deserializer 797 Delay = Delay + 5; 798 // dscc - input cdc fifo 799 Delay = Delay + 25; 800 // dscc - cdc uncertainty 801 Delay = Delay + 2; 802 // dscc - output cdc fifo 803 Delay = Delay + 10; 804 // dscc - cdc uncertainty 805 Delay = Delay + 2; 806 // dscc - output serializer 807 Delay = Delay + 1; 808 // sft 809 Delay = Delay + 1; 810 } else { 811 // sfr 812 Delay = Delay + 2; 813 // dsccif 814 Delay = Delay + 0; 815 // dscc - input deserializer 816 Delay = Delay + 3; 817 // dscc - input cdc fifo 818 Delay = Delay + 12; 819 // dscc - cdc uncertainty 820 Delay = Delay + 2; 821 // dscc - output cdc fifo 822 Delay = Delay + 7; 823 // dscc - output serializer 824 Delay = Delay + 1; 825 // dscc - cdc uncertainty 826 Delay = Delay + 2; 827 // sft 828 Delay = Delay + 1; 829 } 830 831 return Delay; 832 } 833 834 static bool CalculatePrefetchSchedule( 835 struct display_mode_lib *mode_lib, 836 double HostVMInefficiencyFactor, 837 Pipe *myPipe, 838 unsigned int DSCDelay, 839 double DPPCLKDelaySubtotalPlusCNVCFormater, 840 double DPPCLKDelaySCL, 841 double DPPCLKDelaySCLLBOnly, 842 double DPPCLKDelayCNVCCursor, 843 double DISPCLKDelaySubtotal, 844 unsigned int DPP_RECOUT_WIDTH, 845 enum output_format_class OutputFormat, 846 unsigned int MaxInterDCNTileRepeaters, 847 unsigned int VStartup, 848 unsigned int MaxVStartup, 849 unsigned int GPUVMPageTableLevels, 850 bool GPUVMEnable, 851 bool HostVMEnable, 852 unsigned int HostVMMaxNonCachedPageTableLevels, 853 double HostVMMinPageSize, 854 bool DynamicMetadataEnable, 855 bool DynamicMetadataVMEnabled, 856 int DynamicMetadataLinesBeforeActiveRequired, 857 unsigned int DynamicMetadataTransmittedBytes, 858 double UrgentLatency, 859 double UrgentExtraLatency, 860 double TCalc, 861 unsigned int PDEAndMetaPTEBytesFrame, 862 unsigned int MetaRowByte, 863 unsigned int PixelPTEBytesPerRow, 864 double PrefetchSourceLinesY, 865 unsigned int SwathWidthY, 866 double VInitPreFillY, 867 unsigned int MaxNumSwathY, 868 double PrefetchSourceLinesC, 869 unsigned int SwathWidthC, 870 double VInitPreFillC, 871 unsigned int MaxNumSwathC, 872 int swath_width_luma_ub, 873 int swath_width_chroma_ub, 874 unsigned int SwathHeightY, 875 unsigned int SwathHeightC, 876 double TWait, 877 double *DSTXAfterScaler, 878 double *DSTYAfterScaler, 879 double *DestinationLinesForPrefetch, 880 double *PrefetchBandwidth, 881 double *DestinationLinesToRequestVMInVBlank, 882 double *DestinationLinesToRequestRowInVBlank, 883 double *VRatioPrefetchY, 884 double *VRatioPrefetchC, 885 double *RequiredPrefetchPixDataBWLuma, 886 double *RequiredPrefetchPixDataBWChroma, 887 bool *NotEnoughTimeForDynamicMetadata, 888 double *Tno_bw, 889 double *prefetch_vmrow_bw, 890 double *Tdmdl_vm, 891 double *Tdmdl, 892 double *TSetup, 893 int *VUpdateOffsetPix, 894 double *VUpdateWidthPix, 895 double *VReadyOffsetPix) 896 { 897 bool MyError = false; 898 unsigned int DPPCycles, DISPCLKCycles; 899 double DSTTotalPixelsAfterScaler; 900 double LineTime; 901 double dst_y_prefetch_equ; 902 #ifdef __DML_VBA_DEBUG__ 903 double Tsw_oto; 904 #endif 905 double prefetch_bw_oto; 906 double prefetch_bw_pr; 907 double Tvm_oto; 908 double Tr0_oto; 909 double Tvm_oto_lines; 910 double Tr0_oto_lines; 911 double dst_y_prefetch_oto; 912 double TimeForFetchingMetaPTE = 0; 913 double TimeForFetchingRowInVBlank = 0; 914 double LinesToRequestPrefetchPixelData = 0; 915 unsigned int HostVMDynamicLevelsTrips; 916 double trip_to_mem; 917 double Tvm_trips; 918 double Tr0_trips; 919 double Tvm_trips_rounded; 920 double Tr0_trips_rounded; 921 double Lsw_oto; 922 double Tpre_rounded; 923 double prefetch_bw_equ; 924 double Tvm_equ; 925 double Tr0_equ; 926 double Tdmbf; 927 double Tdmec; 928 double Tdmsks; 929 double prefetch_sw_bytes; 930 double bytes_pp; 931 double dep_bytes; 932 int max_vratio_pre = 4; 933 double min_Lsw; 934 double Tsw_est1 = 0; 935 double Tsw_est3 = 0; 936 double max_Tsw = 0; 937 938 if (GPUVMEnable == true && HostVMEnable == true) { 939 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 940 } else { 941 HostVMDynamicLevelsTrips = 0; 942 } 943 #ifdef __DML_VBA_DEBUG__ 944 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 945 #endif 946 CalculateVupdateAndDynamicMetadataParameters( 947 MaxInterDCNTileRepeaters, 948 myPipe->DPPCLK, 949 myPipe->DISPCLK, 950 myPipe->DCFCLKDeepSleep, 951 myPipe->PixelClock, 952 myPipe->HTotal, 953 myPipe->VBlank, 954 DynamicMetadataTransmittedBytes, 955 DynamicMetadataLinesBeforeActiveRequired, 956 myPipe->InterlaceEnable, 957 myPipe->ProgressiveToInterlaceUnitInOPP, 958 TSetup, 959 &Tdmbf, 960 &Tdmec, 961 &Tdmsks, 962 VUpdateOffsetPix, 963 VUpdateWidthPix, 964 VReadyOffsetPix); 965 966 LineTime = myPipe->HTotal / myPipe->PixelClock; 967 trip_to_mem = UrgentLatency; 968 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 969 970 #ifdef __DML_VBA_ALLOW_DELTA__ 971 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 972 #else 973 if (DynamicMetadataVMEnabled == true) { 974 #endif 975 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 976 } else { 977 *Tdmdl = TWait + UrgentExtraLatency; 978 } 979 980 #ifdef __DML_VBA_ALLOW_DELTA__ 981 if (DynamicMetadataEnable == false) { 982 *Tdmdl = 0.0; 983 } 984 #endif 985 986 if (DynamicMetadataEnable == true) { 987 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 988 *NotEnoughTimeForDynamicMetadata = true; 989 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 990 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 991 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 992 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 993 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 994 } else { 995 *NotEnoughTimeForDynamicMetadata = false; 996 } 997 } else { 998 *NotEnoughTimeForDynamicMetadata = false; 999 } 1000 1001 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 1002 1003 if (myPipe->ScalerEnabled) 1004 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 1005 else 1006 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 1007 1008 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 1009 1010 DISPCLKCycles = DISPCLKDelaySubtotal; 1011 1012 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 1013 return true; 1014 1015 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 1016 1017 #ifdef __DML_VBA_DEBUG__ 1018 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 1019 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 1020 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 1021 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 1022 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1023 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1024 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1025 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1026 #endif 1027 1028 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1029 1030 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1031 *DSTYAfterScaler = 1; 1032 else 1033 *DSTYAfterScaler = 0; 1034 1035 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1036 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1037 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1038 1039 #ifdef __DML_VBA_DEBUG__ 1040 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1041 #endif 1042 1043 MyError = false; 1044 1045 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1046 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1047 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1048 1049 #ifdef __DML_VBA_ALLOW_DELTA__ 1050 if (!myPipe->DCCEnable) { 1051 Tr0_trips = 0.0; 1052 Tr0_trips_rounded = 0.0; 1053 } 1054 #endif 1055 1056 if (!GPUVMEnable) { 1057 Tvm_trips = 0.0; 1058 Tvm_trips_rounded = 0.0; 1059 } 1060 1061 if (GPUVMEnable) { 1062 if (GPUVMPageTableLevels >= 3) { 1063 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1064 } else { 1065 *Tno_bw = 0; 1066 } 1067 } else if (!myPipe->DCCEnable) { 1068 *Tno_bw = LineTime; 1069 } else { 1070 *Tno_bw = LineTime / 4; 1071 } 1072 1073 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1074 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1075 else 1076 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1077 /*rev 99*/ 1078 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane; 1079 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr; 1080 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; 1081 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1082 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); 1083 1084 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); 1085 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1086 #ifdef __DML_VBA_DEBUG__ 1087 Tsw_oto = Lsw_oto * LineTime; 1088 #endif 1089 1090 1091 #ifdef __DML_VBA_DEBUG__ 1092 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1093 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1094 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1095 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1096 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1097 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1098 #endif 1099 1100 if (GPUVMEnable == true) 1101 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1102 else 1103 Tvm_oto = LineTime / 4.0; 1104 1105 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1106 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1107 LineTime - Tvm_oto, 1108 LineTime / 4); 1109 } else { 1110 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1111 } 1112 1113 #ifdef __DML_VBA_DEBUG__ 1114 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1115 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1116 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1117 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1118 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1119 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1120 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1121 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1122 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1123 #endif 1124 1125 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1126 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1127 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1128 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1129 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1130 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1131 1132 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1133 1134 if (prefetch_sw_bytes < dep_bytes) 1135 prefetch_sw_bytes = 2 * dep_bytes; 1136 1137 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1138 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1139 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1140 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1141 dml_print("DML: LineTime: %f\n", LineTime); 1142 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1143 1144 dml_print("DML: LineTime: %f\n", LineTime); 1145 dml_print("DML: VStartup: %d\n", VStartup); 1146 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1147 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1148 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1149 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1150 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1151 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1152 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1153 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm); 1154 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl); 1155 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler); 1156 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler); 1157 1158 *PrefetchBandwidth = 0; 1159 *DestinationLinesToRequestVMInVBlank = 0; 1160 *DestinationLinesToRequestRowInVBlank = 0; 1161 *VRatioPrefetchY = 0; 1162 *VRatioPrefetchC = 0; 1163 *RequiredPrefetchPixDataBWLuma = 0; 1164 if (dst_y_prefetch_equ > 1) { 1165 double PrefetchBandwidth1; 1166 double PrefetchBandwidth2; 1167 double PrefetchBandwidth3; 1168 double PrefetchBandwidth4; 1169 1170 if (Tpre_rounded - *Tno_bw > 0) { 1171 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1172 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1173 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1174 } else { 1175 PrefetchBandwidth1 = 0; 1176 } 1177 1178 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1179 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1180 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1181 } 1182 1183 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1184 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1185 else 1186 PrefetchBandwidth2 = 0; 1187 1188 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1189 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1190 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1191 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1192 } else { 1193 PrefetchBandwidth3 = 0; 1194 } 1195 1196 #ifdef __DML_VBA_DEBUG__ 1197 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1198 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1199 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1200 #endif 1201 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1202 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1203 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1204 } 1205 1206 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1207 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1208 else 1209 PrefetchBandwidth4 = 0; 1210 1211 { 1212 bool Case1OK; 1213 bool Case2OK; 1214 bool Case3OK; 1215 1216 if (PrefetchBandwidth1 > 0) { 1217 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1218 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1219 Case1OK = true; 1220 } else { 1221 Case1OK = false; 1222 } 1223 } else { 1224 Case1OK = false; 1225 } 1226 1227 if (PrefetchBandwidth2 > 0) { 1228 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1229 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1230 Case2OK = true; 1231 } else { 1232 Case2OK = false; 1233 } 1234 } else { 1235 Case2OK = false; 1236 } 1237 1238 if (PrefetchBandwidth3 > 0) { 1239 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1240 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1241 Case3OK = true; 1242 } else { 1243 Case3OK = false; 1244 } 1245 } else { 1246 Case3OK = false; 1247 } 1248 1249 if (Case1OK) { 1250 prefetch_bw_equ = PrefetchBandwidth1; 1251 } else if (Case2OK) { 1252 prefetch_bw_equ = PrefetchBandwidth2; 1253 } else if (Case3OK) { 1254 prefetch_bw_equ = PrefetchBandwidth3; 1255 } else { 1256 prefetch_bw_equ = PrefetchBandwidth4; 1257 } 1258 1259 #ifdef __DML_VBA_DEBUG__ 1260 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1261 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1262 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1263 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1264 #endif 1265 1266 if (prefetch_bw_equ > 0) { 1267 if (GPUVMEnable == true) { 1268 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1269 } else { 1270 Tvm_equ = LineTime / 4; 1271 } 1272 1273 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1274 Tr0_equ = dml_max4( 1275 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1276 Tr0_trips, 1277 (LineTime - Tvm_equ) / 2, 1278 LineTime / 4); 1279 } else { 1280 Tr0_equ = (LineTime - Tvm_equ) / 2; 1281 } 1282 } else { 1283 Tvm_equ = 0; 1284 Tr0_equ = 0; 1285 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1286 } 1287 } 1288 1289 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1290 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1291 TimeForFetchingMetaPTE = Tvm_oto; 1292 TimeForFetchingRowInVBlank = Tr0_oto; 1293 *PrefetchBandwidth = prefetch_bw_oto; 1294 } else { 1295 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1296 TimeForFetchingMetaPTE = Tvm_equ; 1297 TimeForFetchingRowInVBlank = Tr0_equ; 1298 *PrefetchBandwidth = prefetch_bw_equ; 1299 } 1300 1301 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1302 1303 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1304 1305 #ifdef __DML_VBA_ALLOW_DELTA__ 1306 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1307 // See note above dated 5/30/2018 1308 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1309 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1310 #else 1311 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1312 #endif 1313 1314 #ifdef __DML_VBA_DEBUG__ 1315 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1316 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1317 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1318 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1319 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1320 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1321 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1322 #endif 1323 1324 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1325 1326 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1327 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1328 #ifdef __DML_VBA_DEBUG__ 1329 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1330 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1331 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1332 #endif 1333 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1334 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1335 *VRatioPrefetchY = dml_max( 1336 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1337 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1338 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1339 } else { 1340 MyError = true; 1341 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1342 *VRatioPrefetchY = 0; 1343 } 1344 #ifdef __DML_VBA_DEBUG__ 1345 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1346 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1347 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1348 #endif 1349 } 1350 1351 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1352 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1353 1354 #ifdef __DML_VBA_DEBUG__ 1355 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1356 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1357 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1358 #endif 1359 if ((SwathHeightC > 4) || VInitPreFillC > 3) { 1360 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1361 *VRatioPrefetchC = dml_max( 1362 *VRatioPrefetchC, 1363 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1364 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1365 } else { 1366 MyError = true; 1367 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1368 *VRatioPrefetchC = 0; 1369 } 1370 #ifdef __DML_VBA_DEBUG__ 1371 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1372 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1373 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1374 #endif 1375 } 1376 1377 #ifdef __DML_VBA_DEBUG__ 1378 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1379 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1380 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1381 #endif 1382 1383 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1384 1385 #ifdef __DML_VBA_DEBUG__ 1386 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1387 #endif 1388 1389 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1390 / LineTime; 1391 } else { 1392 MyError = true; 1393 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1394 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1395 *VRatioPrefetchY = 0; 1396 *VRatioPrefetchC = 0; 1397 *RequiredPrefetchPixDataBWLuma = 0; 1398 *RequiredPrefetchPixDataBWChroma = 0; 1399 } 1400 1401 dml_print( 1402 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1403 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1404 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1405 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1406 dml_print( 1407 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1408 (double) LinesToRequestPrefetchPixelData * LineTime); 1409 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 1410 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1411 dml_print( 1412 "DML: Tslack(pre): %fus - time left over in schedule\n", 1413 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1414 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1415 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1416 1417 } else { 1418 MyError = true; 1419 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1420 } 1421 1422 { 1423 double prefetch_vm_bw; 1424 double prefetch_row_bw; 1425 1426 if (PDEAndMetaPTEBytesFrame == 0) { 1427 prefetch_vm_bw = 0; 1428 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1429 #ifdef __DML_VBA_DEBUG__ 1430 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1431 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1432 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1433 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1434 #endif 1435 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1436 #ifdef __DML_VBA_DEBUG__ 1437 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1438 #endif 1439 } else { 1440 prefetch_vm_bw = 0; 1441 MyError = true; 1442 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1443 } 1444 1445 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1446 prefetch_row_bw = 0; 1447 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1448 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1449 1450 #ifdef __DML_VBA_DEBUG__ 1451 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1452 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1453 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1454 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1455 #endif 1456 } else { 1457 prefetch_row_bw = 0; 1458 MyError = true; 1459 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1460 } 1461 1462 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1463 } 1464 1465 if (MyError) { 1466 *PrefetchBandwidth = 0; 1467 TimeForFetchingMetaPTE = 0; 1468 TimeForFetchingRowInVBlank = 0; 1469 *DestinationLinesToRequestVMInVBlank = 0; 1470 *DestinationLinesToRequestRowInVBlank = 0; 1471 *DestinationLinesForPrefetch = 0; 1472 LinesToRequestPrefetchPixelData = 0; 1473 *VRatioPrefetchY = 0; 1474 *VRatioPrefetchC = 0; 1475 *RequiredPrefetchPixDataBWLuma = 0; 1476 *RequiredPrefetchPixDataBWChroma = 0; 1477 } 1478 1479 return MyError; 1480 } 1481 1482 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1483 { 1484 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1485 } 1486 1487 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1488 { 1489 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1490 } 1491 1492 static void CalculateDCCConfiguration( 1493 bool DCCEnabled, 1494 bool DCCProgrammingAssumesScanDirectionUnknown, 1495 enum source_format_class SourcePixelFormat, 1496 unsigned int SurfaceWidthLuma, 1497 unsigned int SurfaceWidthChroma, 1498 unsigned int SurfaceHeightLuma, 1499 unsigned int SurfaceHeightChroma, 1500 double DETBufferSize, 1501 unsigned int RequestHeight256ByteLuma, 1502 unsigned int RequestHeight256ByteChroma, 1503 enum dm_swizzle_mode TilingFormat, 1504 unsigned int BytePerPixelY, 1505 unsigned int BytePerPixelC, 1506 double BytePerPixelDETY, 1507 double BytePerPixelDETC, 1508 enum scan_direction_class ScanOrientation, 1509 unsigned int *MaxUncompressedBlockLuma, 1510 unsigned int *MaxUncompressedBlockChroma, 1511 unsigned int *MaxCompressedBlockLuma, 1512 unsigned int *MaxCompressedBlockChroma, 1513 unsigned int *IndependentBlockLuma, 1514 unsigned int *IndependentBlockChroma) 1515 { 1516 int yuv420; 1517 int horz_div_l; 1518 int horz_div_c; 1519 int vert_div_l; 1520 int vert_div_c; 1521 1522 int swath_buf_size; 1523 double detile_buf_vp_horz_limit; 1524 double detile_buf_vp_vert_limit; 1525 1526 int MAS_vp_horz_limit; 1527 int MAS_vp_vert_limit; 1528 int max_vp_horz_width; 1529 int max_vp_vert_height; 1530 int eff_surf_width_l; 1531 int eff_surf_width_c; 1532 int eff_surf_height_l; 1533 int eff_surf_height_c; 1534 1535 int full_swath_bytes_horz_wc_l; 1536 int full_swath_bytes_horz_wc_c; 1537 int full_swath_bytes_vert_wc_l; 1538 int full_swath_bytes_vert_wc_c; 1539 int req128_horz_wc_l; 1540 int req128_horz_wc_c; 1541 int req128_vert_wc_l; 1542 int req128_vert_wc_c; 1543 int segment_order_horz_contiguous_luma; 1544 int segment_order_horz_contiguous_chroma; 1545 int segment_order_vert_contiguous_luma; 1546 int segment_order_vert_contiguous_chroma; 1547 1548 typedef enum { 1549 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1550 } RequestType; 1551 RequestType RequestLuma; 1552 RequestType RequestChroma; 1553 1554 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1555 horz_div_l = 1; 1556 horz_div_c = 1; 1557 vert_div_l = 1; 1558 vert_div_c = 1; 1559 1560 if (BytePerPixelY == 1) 1561 vert_div_l = 0; 1562 if (BytePerPixelC == 1) 1563 vert_div_c = 0; 1564 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1565 horz_div_l = 0; 1566 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1567 horz_div_c = 0; 1568 1569 if (BytePerPixelC == 0) { 1570 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1571 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1572 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1573 } else { 1574 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1575 detile_buf_vp_horz_limit = (double) swath_buf_size 1576 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1577 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1578 detile_buf_vp_vert_limit = (double) swath_buf_size 1579 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1580 } 1581 1582 if (SourcePixelFormat == dm_420_10) { 1583 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1584 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1585 } 1586 1587 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1588 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1589 1590 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1591 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1592 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1593 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1594 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1595 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1596 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1597 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1598 1599 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1600 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1601 if (BytePerPixelC > 0) { 1602 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1603 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1604 } else { 1605 full_swath_bytes_horz_wc_c = 0; 1606 full_swath_bytes_vert_wc_c = 0; 1607 } 1608 1609 if (SourcePixelFormat == dm_420_10) { 1610 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1611 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1612 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1613 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1614 } 1615 1616 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1617 req128_horz_wc_l = 0; 1618 req128_horz_wc_c = 0; 1619 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1620 req128_horz_wc_l = 0; 1621 req128_horz_wc_c = 1; 1622 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1623 req128_horz_wc_l = 1; 1624 req128_horz_wc_c = 0; 1625 } else { 1626 req128_horz_wc_l = 1; 1627 req128_horz_wc_c = 1; 1628 } 1629 1630 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1631 req128_vert_wc_l = 0; 1632 req128_vert_wc_c = 0; 1633 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1634 req128_vert_wc_l = 0; 1635 req128_vert_wc_c = 1; 1636 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1637 req128_vert_wc_l = 1; 1638 req128_vert_wc_c = 0; 1639 } else { 1640 req128_vert_wc_l = 1; 1641 req128_vert_wc_c = 1; 1642 } 1643 1644 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1645 segment_order_horz_contiguous_luma = 0; 1646 } else { 1647 segment_order_horz_contiguous_luma = 1; 1648 } 1649 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1650 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1651 segment_order_vert_contiguous_luma = 0; 1652 } else { 1653 segment_order_vert_contiguous_luma = 1; 1654 } 1655 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1656 segment_order_horz_contiguous_chroma = 0; 1657 } else { 1658 segment_order_horz_contiguous_chroma = 1; 1659 } 1660 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1661 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1662 segment_order_vert_contiguous_chroma = 0; 1663 } else { 1664 segment_order_vert_contiguous_chroma = 1; 1665 } 1666 1667 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1668 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1669 RequestLuma = REQ_256Bytes; 1670 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1671 RequestLuma = REQ_128BytesNonContiguous; 1672 } else { 1673 RequestLuma = REQ_128BytesContiguous; 1674 } 1675 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1676 RequestChroma = REQ_256Bytes; 1677 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1678 RequestChroma = REQ_128BytesNonContiguous; 1679 } else { 1680 RequestChroma = REQ_128BytesContiguous; 1681 } 1682 } else if (ScanOrientation != dm_vert) { 1683 if (req128_horz_wc_l == 0) { 1684 RequestLuma = REQ_256Bytes; 1685 } else if (segment_order_horz_contiguous_luma == 0) { 1686 RequestLuma = REQ_128BytesNonContiguous; 1687 } else { 1688 RequestLuma = REQ_128BytesContiguous; 1689 } 1690 if (req128_horz_wc_c == 0) { 1691 RequestChroma = REQ_256Bytes; 1692 } else if (segment_order_horz_contiguous_chroma == 0) { 1693 RequestChroma = REQ_128BytesNonContiguous; 1694 } else { 1695 RequestChroma = REQ_128BytesContiguous; 1696 } 1697 } else { 1698 if (req128_vert_wc_l == 0) { 1699 RequestLuma = REQ_256Bytes; 1700 } else if (segment_order_vert_contiguous_luma == 0) { 1701 RequestLuma = REQ_128BytesNonContiguous; 1702 } else { 1703 RequestLuma = REQ_128BytesContiguous; 1704 } 1705 if (req128_vert_wc_c == 0) { 1706 RequestChroma = REQ_256Bytes; 1707 } else if (segment_order_vert_contiguous_chroma == 0) { 1708 RequestChroma = REQ_128BytesNonContiguous; 1709 } else { 1710 RequestChroma = REQ_128BytesContiguous; 1711 } 1712 } 1713 1714 if (RequestLuma == REQ_256Bytes) { 1715 *MaxUncompressedBlockLuma = 256; 1716 *MaxCompressedBlockLuma = 256; 1717 *IndependentBlockLuma = 0; 1718 } else if (RequestLuma == REQ_128BytesContiguous) { 1719 *MaxUncompressedBlockLuma = 256; 1720 *MaxCompressedBlockLuma = 128; 1721 *IndependentBlockLuma = 128; 1722 } else { 1723 *MaxUncompressedBlockLuma = 256; 1724 *MaxCompressedBlockLuma = 64; 1725 *IndependentBlockLuma = 64; 1726 } 1727 1728 if (RequestChroma == REQ_256Bytes) { 1729 *MaxUncompressedBlockChroma = 256; 1730 *MaxCompressedBlockChroma = 256; 1731 *IndependentBlockChroma = 0; 1732 } else if (RequestChroma == REQ_128BytesContiguous) { 1733 *MaxUncompressedBlockChroma = 256; 1734 *MaxCompressedBlockChroma = 128; 1735 *IndependentBlockChroma = 128; 1736 } else { 1737 *MaxUncompressedBlockChroma = 256; 1738 *MaxCompressedBlockChroma = 64; 1739 *IndependentBlockChroma = 64; 1740 } 1741 1742 if (DCCEnabled != true || BytePerPixelC == 0) { 1743 *MaxUncompressedBlockChroma = 0; 1744 *MaxCompressedBlockChroma = 0; 1745 *IndependentBlockChroma = 0; 1746 } 1747 1748 if (DCCEnabled != true) { 1749 *MaxUncompressedBlockLuma = 0; 1750 *MaxCompressedBlockLuma = 0; 1751 *IndependentBlockLuma = 0; 1752 } 1753 } 1754 1755 static double CalculatePrefetchSourceLines( 1756 struct display_mode_lib *mode_lib, 1757 double VRatio, 1758 double vtaps, 1759 bool Interlace, 1760 bool ProgressiveToInterlaceUnitInOPP, 1761 unsigned int SwathHeight, 1762 unsigned int ViewportYStart, 1763 double *VInitPreFill, 1764 unsigned int *MaxNumSwath) 1765 { 1766 struct vba_vars_st *v = &mode_lib->vba; 1767 unsigned int MaxPartialSwath; 1768 1769 if (ProgressiveToInterlaceUnitInOPP) 1770 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1771 else 1772 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1773 1774 if (!v->IgnoreViewportPositioning) { 1775 1776 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1777 1778 if (*VInitPreFill > 1.0) 1779 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1780 else 1781 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1782 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1783 1784 } else { 1785 1786 if (ViewportYStart != 0) 1787 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1788 1789 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1790 1791 if (*VInitPreFill > 1.0) 1792 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1793 else 1794 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1795 } 1796 1797 #ifdef __DML_VBA_DEBUG__ 1798 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1799 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1800 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1801 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1802 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1803 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1804 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1805 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1806 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1807 #endif 1808 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1809 } 1810 1811 static unsigned int CalculateVMAndRowBytes( 1812 struct display_mode_lib *mode_lib, 1813 bool DCCEnable, 1814 unsigned int BlockHeight256Bytes, 1815 unsigned int BlockWidth256Bytes, 1816 enum source_format_class SourcePixelFormat, 1817 unsigned int SurfaceTiling, 1818 unsigned int BytePerPixel, 1819 enum scan_direction_class ScanDirection, 1820 unsigned int SwathWidth, 1821 unsigned int ViewportHeight, 1822 bool GPUVMEnable, 1823 bool HostVMEnable, 1824 unsigned int HostVMMaxNonCachedPageTableLevels, 1825 unsigned int GPUVMMinPageSize, 1826 unsigned int HostVMMinPageSize, 1827 unsigned int PTEBufferSizeInRequests, 1828 unsigned int Pitch, 1829 unsigned int DCCMetaPitch, 1830 unsigned int *MacroTileWidth, 1831 unsigned int *MetaRowByte, 1832 unsigned int *PixelPTEBytesPerRow, 1833 bool *PTEBufferSizeNotExceeded, 1834 int *dpte_row_width_ub, 1835 unsigned int *dpte_row_height, 1836 unsigned int *MetaRequestWidth, 1837 unsigned int *MetaRequestHeight, 1838 unsigned int *meta_row_width, 1839 unsigned int *meta_row_height, 1840 int *vm_group_bytes, 1841 unsigned int *dpte_group_bytes, 1842 unsigned int *PixelPTEReqWidth, 1843 unsigned int *PixelPTEReqHeight, 1844 unsigned int *PTERequestSize, 1845 int *DPDE0BytesFrame, 1846 int *MetaPTEBytesFrame) 1847 { 1848 struct vba_vars_st *v = &mode_lib->vba; 1849 unsigned int MPDEBytesFrame; 1850 unsigned int DCCMetaSurfaceBytes; 1851 unsigned int MacroTileSizeBytes; 1852 unsigned int MacroTileHeight; 1853 unsigned int ExtraDPDEBytesFrame; 1854 unsigned int PDEAndMetaPTEBytesFrame; 1855 unsigned int PixelPTEReqHeightPTEs = 0; 1856 unsigned int HostVMDynamicLevels = 0; 1857 double FractionOfPTEReturnDrop; 1858 1859 if (GPUVMEnable == true && HostVMEnable == true) { 1860 if (HostVMMinPageSize < 2048) { 1861 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1862 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1863 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1864 } else { 1865 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1866 } 1867 } 1868 1869 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1870 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1871 if (ScanDirection != dm_vert) { 1872 *meta_row_height = *MetaRequestHeight; 1873 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1874 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1875 } else { 1876 *meta_row_height = *MetaRequestWidth; 1877 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1878 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1879 } 1880 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1881 if (GPUVMEnable == true) { 1882 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1883 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1884 } else { 1885 *MetaPTEBytesFrame = 0; 1886 MPDEBytesFrame = 0; 1887 } 1888 1889 if (DCCEnable != true) { 1890 *MetaPTEBytesFrame = 0; 1891 MPDEBytesFrame = 0; 1892 *MetaRowByte = 0; 1893 } 1894 1895 if (SurfaceTiling == dm_sw_linear) { 1896 MacroTileSizeBytes = 256; 1897 MacroTileHeight = BlockHeight256Bytes; 1898 } else { 1899 MacroTileSizeBytes = 65536; 1900 MacroTileHeight = 16 * BlockHeight256Bytes; 1901 } 1902 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1903 1904 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1905 if (ScanDirection != dm_vert) { 1906 *DPDE0BytesFrame = 64 1907 * (dml_ceil( 1908 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1909 / (8 * 2097152), 1910 1) + 1); 1911 } else { 1912 *DPDE0BytesFrame = 64 1913 * (dml_ceil( 1914 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1915 / (8 * 2097152), 1916 1) + 1); 1917 } 1918 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 1919 } else { 1920 *DPDE0BytesFrame = 0; 1921 ExtraDPDEBytesFrame = 0; 1922 } 1923 1924 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 1925 1926 #ifdef __DML_VBA_DEBUG__ 1927 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 1928 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 1929 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 1930 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 1931 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1932 #endif 1933 1934 if (HostVMEnable == true) { 1935 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 1936 } 1937 #ifdef __DML_VBA_DEBUG__ 1938 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1939 #endif 1940 1941 if (SurfaceTiling == dm_sw_linear) { 1942 PixelPTEReqHeightPTEs = 1; 1943 *PixelPTEReqHeight = 1; 1944 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 1945 *PTERequestSize = 64; 1946 FractionOfPTEReturnDrop = 0; 1947 } else if (MacroTileSizeBytes == 4096) { 1948 PixelPTEReqHeightPTEs = 1; 1949 *PixelPTEReqHeight = MacroTileHeight; 1950 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1951 *PTERequestSize = 64; 1952 if (ScanDirection != dm_vert) 1953 FractionOfPTEReturnDrop = 0; 1954 else 1955 FractionOfPTEReturnDrop = 7 / 8; 1956 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 1957 PixelPTEReqHeightPTEs = 16; 1958 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 1959 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 1960 *PTERequestSize = 128; 1961 FractionOfPTEReturnDrop = 0; 1962 } else { 1963 PixelPTEReqHeightPTEs = 1; 1964 *PixelPTEReqHeight = MacroTileHeight; 1965 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1966 *PTERequestSize = 64; 1967 FractionOfPTEReturnDrop = 0; 1968 } 1969 1970 if (SurfaceTiling == dm_sw_linear) { 1971 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 1972 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1973 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1974 } else if (ScanDirection != dm_vert) { 1975 *dpte_row_height = *PixelPTEReqHeight; 1976 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1977 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1978 } else { 1979 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 1980 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 1981 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 1982 } 1983 1984 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 1985 *PTEBufferSizeNotExceeded = true; 1986 } else { 1987 *PTEBufferSizeNotExceeded = false; 1988 } 1989 1990 if (GPUVMEnable != true) { 1991 *PixelPTEBytesPerRow = 0; 1992 *PTEBufferSizeNotExceeded = true; 1993 } 1994 1995 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 1996 1997 if (HostVMEnable == true) { 1998 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 1999 } 2000 2001 if (HostVMEnable == true) { 2002 *vm_group_bytes = 512; 2003 *dpte_group_bytes = 512; 2004 } else if (GPUVMEnable == true) { 2005 *vm_group_bytes = 2048; 2006 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 2007 *dpte_group_bytes = 512; 2008 } else { 2009 *dpte_group_bytes = 2048; 2010 } 2011 } else { 2012 *vm_group_bytes = 0; 2013 *dpte_group_bytes = 0; 2014 } 2015 return PDEAndMetaPTEBytesFrame; 2016 } 2017 2018 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 2019 { 2020 struct vba_vars_st *v = &mode_lib->vba; 2021 unsigned int j, k; 2022 double HostVMInefficiencyFactor = 1.0; 2023 bool NoChromaPlanes = true; 2024 int ReorderBytes; 2025 double VMDataOnlyReturnBW; 2026 double MaxTotalRDBandwidth = 0; 2027 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2028 2029 v->WritebackDISPCLK = 0.0; 2030 v->DISPCLKWithRamping = 0; 2031 v->DISPCLKWithoutRamping = 0; 2032 v->GlobalDPPCLK = 0.0; 2033 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */ 2034 { 2035 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2036 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2037 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2038 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2039 2040 if (v->HostVMEnable != true) { 2041 v->ReturnBW = dml_min( 2042 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2043 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2044 } else { 2045 v->ReturnBW = dml_min( 2046 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2047 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2048 } 2049 } 2050 /* End DAL custom code */ 2051 2052 // DISPCLK and DPPCLK Calculation 2053 // 2054 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2055 if (v->WritebackEnable[k]) { 2056 v->WritebackDISPCLK = dml_max( 2057 v->WritebackDISPCLK, 2058 dml314_CalculateWriteBackDISPCLK( 2059 v->WritebackPixelFormat[k], 2060 v->PixelClock[k], 2061 v->WritebackHRatio[k], 2062 v->WritebackVRatio[k], 2063 v->WritebackHTaps[k], 2064 v->WritebackVTaps[k], 2065 v->WritebackSourceWidth[k], 2066 v->WritebackDestinationWidth[k], 2067 v->HTotal[k], 2068 v->WritebackLineBufferSize)); 2069 } 2070 } 2071 2072 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2073 if (v->HRatio[k] > 1) { 2074 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2075 v->MaxDCHUBToPSCLThroughput, 2076 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2077 } else { 2078 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2079 } 2080 2081 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2082 * dml_max( 2083 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2084 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2085 2086 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2087 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2088 } 2089 2090 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2091 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2092 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2093 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2094 } else { 2095 if (v->HRatioChroma[k] > 1) { 2096 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2097 v->MaxDCHUBToPSCLThroughput, 2098 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2099 } else { 2100 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2101 } 2102 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2103 * dml_max3( 2104 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2105 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2106 1.0); 2107 2108 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2109 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2110 } 2111 2112 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2113 } 2114 } 2115 2116 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2117 if (v->BlendingAndTiming[k] != k) 2118 continue; 2119 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2120 v->DISPCLKWithRamping = dml_max( 2121 v->DISPCLKWithRamping, 2122 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2123 * (1 + v->DISPCLKRampingMargin / 100)); 2124 v->DISPCLKWithoutRamping = dml_max( 2125 v->DISPCLKWithoutRamping, 2126 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2127 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2128 v->DISPCLKWithRamping = dml_max( 2129 v->DISPCLKWithRamping, 2130 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2131 * (1 + v->DISPCLKRampingMargin / 100)); 2132 v->DISPCLKWithoutRamping = dml_max( 2133 v->DISPCLKWithoutRamping, 2134 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2135 } else { 2136 v->DISPCLKWithRamping = dml_max( 2137 v->DISPCLKWithRamping, 2138 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2139 v->DISPCLKWithoutRamping = dml_max( 2140 v->DISPCLKWithoutRamping, 2141 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2142 } 2143 } 2144 2145 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2146 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2147 2148 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2149 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2150 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2151 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2152 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2153 v->DISPCLKDPPCLKVCOSpeed); 2154 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2155 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2156 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2157 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2158 } else { 2159 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2160 } 2161 v->DISPCLK = v->DISPCLK_calculated; 2162 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2163 2164 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2165 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2166 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2167 } 2168 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2169 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2170 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2171 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2172 } 2173 2174 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2175 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2176 } 2177 2178 // Urgent and B P-State/DRAM Clock Change Watermark 2179 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2180 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2181 2182 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2183 CalculateBytePerPixelAnd256BBlockSizes( 2184 v->SourcePixelFormat[k], 2185 v->SurfaceTiling[k], 2186 &v->BytePerPixelY[k], 2187 &v->BytePerPixelC[k], 2188 &v->BytePerPixelDETY[k], 2189 &v->BytePerPixelDETC[k], 2190 &v->BlockHeight256BytesY[k], 2191 &v->BlockHeight256BytesC[k], 2192 &v->BlockWidth256BytesY[k], 2193 &v->BlockWidth256BytesC[k]); 2194 } 2195 2196 CalculateSwathWidth( 2197 false, 2198 v->NumberOfActivePlanes, 2199 v->SourcePixelFormat, 2200 v->SourceScan, 2201 v->ViewportWidth, 2202 v->ViewportHeight, 2203 v->SurfaceWidthY, 2204 v->SurfaceWidthC, 2205 v->SurfaceHeightY, 2206 v->SurfaceHeightC, 2207 v->ODMCombineEnabled, 2208 v->BytePerPixelY, 2209 v->BytePerPixelC, 2210 v->BlockHeight256BytesY, 2211 v->BlockHeight256BytesC, 2212 v->BlockWidth256BytesY, 2213 v->BlockWidth256BytesC, 2214 v->BlendingAndTiming, 2215 v->HActive, 2216 v->HRatio, 2217 v->DPPPerPlane, 2218 v->SwathWidthSingleDPPY, 2219 v->SwathWidthSingleDPPC, 2220 v->SwathWidthY, 2221 v->SwathWidthC, 2222 v->dummyinteger3, 2223 v->dummyinteger4, 2224 v->swath_width_luma_ub, 2225 v->swath_width_chroma_ub); 2226 2227 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2228 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2229 * v->VRatio[k]; 2230 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2231 * v->VRatioChroma[k]; 2232 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2233 } 2234 2235 // DCFCLK Deep Sleep 2236 CalculateDCFCLKDeepSleep( 2237 mode_lib, 2238 v->NumberOfActivePlanes, 2239 v->BytePerPixelY, 2240 v->BytePerPixelC, 2241 v->VRatio, 2242 v->VRatioChroma, 2243 v->SwathWidthY, 2244 v->SwathWidthC, 2245 v->DPPPerPlane, 2246 v->HRatio, 2247 v->HRatioChroma, 2248 v->PixelClock, 2249 v->PSCL_THROUGHPUT_LUMA, 2250 v->PSCL_THROUGHPUT_CHROMA, 2251 v->DPPCLK, 2252 v->ReadBandwidthPlaneLuma, 2253 v->ReadBandwidthPlaneChroma, 2254 v->ReturnBusWidth, 2255 &v->DCFCLKDeepSleep); 2256 2257 // DSCCLK 2258 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2259 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2260 v->DSCCLK_calculated[k] = 0.0; 2261 } else { 2262 if (v->OutputFormat[k] == dm_420) 2263 v->DSCFormatFactor = 2; 2264 else if (v->OutputFormat[k] == dm_444) 2265 v->DSCFormatFactor = 1; 2266 else if (v->OutputFormat[k] == dm_n422) 2267 v->DSCFormatFactor = 2; 2268 else 2269 v->DSCFormatFactor = 1; 2270 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2271 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2272 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2273 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2274 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2275 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2276 else 2277 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2278 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2279 } 2280 } 2281 2282 // DSC Delay 2283 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2284 double BPP = v->OutputBpp[k]; 2285 2286 if (v->DSCEnabled[k] && BPP != 0) { 2287 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2288 v->DSCDelay[k] = dscceComputeDelay( 2289 v->DSCInputBitPerComponent[k], 2290 BPP, 2291 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2292 v->NumberOfDSCSlices[k], 2293 v->OutputFormat[k], 2294 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2295 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2296 v->DSCDelay[k] = 2 2297 * (dscceComputeDelay( 2298 v->DSCInputBitPerComponent[k], 2299 BPP, 2300 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2301 v->NumberOfDSCSlices[k] / 2.0, 2302 v->OutputFormat[k], 2303 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2304 } else { 2305 v->DSCDelay[k] = 4 2306 * (dscceComputeDelay( 2307 v->DSCInputBitPerComponent[k], 2308 BPP, 2309 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2310 v->NumberOfDSCSlices[k] / 4.0, 2311 v->OutputFormat[k], 2312 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2313 } 2314 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2315 } else { 2316 v->DSCDelay[k] = 0; 2317 } 2318 } 2319 2320 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2321 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2322 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2323 v->DSCDelay[k] = v->DSCDelay[j]; 2324 2325 // Prefetch 2326 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2327 unsigned int PDEAndMetaPTEBytesFrameY; 2328 unsigned int PixelPTEBytesPerRowY; 2329 unsigned int MetaRowByteY; 2330 unsigned int MetaRowByteC; 2331 unsigned int PDEAndMetaPTEBytesFrameC; 2332 unsigned int PixelPTEBytesPerRowC; 2333 bool PTEBufferSizeNotExceededY; 2334 bool PTEBufferSizeNotExceededC; 2335 2336 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2337 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2338 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2339 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2340 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2341 } else { 2342 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2343 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2344 } 2345 2346 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2347 mode_lib, 2348 v->DCCEnable[k], 2349 v->BlockHeight256BytesC[k], 2350 v->BlockWidth256BytesC[k], 2351 v->SourcePixelFormat[k], 2352 v->SurfaceTiling[k], 2353 v->BytePerPixelC[k], 2354 v->SourceScan[k], 2355 v->SwathWidthC[k], 2356 v->ViewportHeightChroma[k], 2357 v->GPUVMEnable, 2358 v->HostVMEnable, 2359 v->HostVMMaxNonCachedPageTableLevels, 2360 v->GPUVMMinPageSize, 2361 v->HostVMMinPageSize, 2362 v->PTEBufferSizeInRequestsForChroma, 2363 v->PitchC[k], 2364 v->DCCMetaPitchC[k], 2365 &v->MacroTileWidthC[k], 2366 &MetaRowByteC, 2367 &PixelPTEBytesPerRowC, 2368 &PTEBufferSizeNotExceededC, 2369 &v->dpte_row_width_chroma_ub[k], 2370 &v->dpte_row_height_chroma[k], 2371 &v->meta_req_width_chroma[k], 2372 &v->meta_req_height_chroma[k], 2373 &v->meta_row_width_chroma[k], 2374 &v->meta_row_height_chroma[k], 2375 &v->dummyinteger1, 2376 &v->dummyinteger2, 2377 &v->PixelPTEReqWidthC[k], 2378 &v->PixelPTEReqHeightC[k], 2379 &v->PTERequestSizeC[k], 2380 &v->dpde0_bytes_per_frame_ub_c[k], 2381 &v->meta_pte_bytes_per_frame_ub_c[k]); 2382 2383 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2384 mode_lib, 2385 v->VRatioChroma[k], 2386 v->VTAPsChroma[k], 2387 v->Interlace[k], 2388 v->ProgressiveToInterlaceUnitInOPP, 2389 v->SwathHeightC[k], 2390 v->ViewportYStartC[k], 2391 &v->VInitPreFillC[k], 2392 &v->MaxNumSwathC[k]); 2393 } else { 2394 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2395 v->PTEBufferSizeInRequestsForChroma = 0; 2396 PixelPTEBytesPerRowC = 0; 2397 PDEAndMetaPTEBytesFrameC = 0; 2398 MetaRowByteC = 0; 2399 v->MaxNumSwathC[k] = 0; 2400 v->PrefetchSourceLinesC[k] = 0; 2401 } 2402 2403 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2404 mode_lib, 2405 v->DCCEnable[k], 2406 v->BlockHeight256BytesY[k], 2407 v->BlockWidth256BytesY[k], 2408 v->SourcePixelFormat[k], 2409 v->SurfaceTiling[k], 2410 v->BytePerPixelY[k], 2411 v->SourceScan[k], 2412 v->SwathWidthY[k], 2413 v->ViewportHeight[k], 2414 v->GPUVMEnable, 2415 v->HostVMEnable, 2416 v->HostVMMaxNonCachedPageTableLevels, 2417 v->GPUVMMinPageSize, 2418 v->HostVMMinPageSize, 2419 v->PTEBufferSizeInRequestsForLuma, 2420 v->PitchY[k], 2421 v->DCCMetaPitchY[k], 2422 &v->MacroTileWidthY[k], 2423 &MetaRowByteY, 2424 &PixelPTEBytesPerRowY, 2425 &PTEBufferSizeNotExceededY, 2426 &v->dpte_row_width_luma_ub[k], 2427 &v->dpte_row_height[k], 2428 &v->meta_req_width[k], 2429 &v->meta_req_height[k], 2430 &v->meta_row_width[k], 2431 &v->meta_row_height[k], 2432 &v->vm_group_bytes[k], 2433 &v->dpte_group_bytes[k], 2434 &v->PixelPTEReqWidthY[k], 2435 &v->PixelPTEReqHeightY[k], 2436 &v->PTERequestSizeY[k], 2437 &v->dpde0_bytes_per_frame_ub_l[k], 2438 &v->meta_pte_bytes_per_frame_ub_l[k]); 2439 2440 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2441 mode_lib, 2442 v->VRatio[k], 2443 v->vtaps[k], 2444 v->Interlace[k], 2445 v->ProgressiveToInterlaceUnitInOPP, 2446 v->SwathHeightY[k], 2447 v->ViewportYStartY[k], 2448 &v->VInitPreFillY[k], 2449 &v->MaxNumSwathY[k]); 2450 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2451 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2452 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2453 2454 CalculateRowBandwidth( 2455 v->GPUVMEnable, 2456 v->SourcePixelFormat[k], 2457 v->VRatio[k], 2458 v->VRatioChroma[k], 2459 v->DCCEnable[k], 2460 v->HTotal[k] / v->PixelClock[k], 2461 MetaRowByteY, 2462 MetaRowByteC, 2463 v->meta_row_height[k], 2464 v->meta_row_height_chroma[k], 2465 PixelPTEBytesPerRowY, 2466 PixelPTEBytesPerRowC, 2467 v->dpte_row_height[k], 2468 v->dpte_row_height_chroma[k], 2469 &v->meta_row_bw[k], 2470 &v->dpte_row_bw[k]); 2471 } 2472 2473 v->TotalDCCActiveDPP = 0; 2474 v->TotalActiveDPP = 0; 2475 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2476 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2477 if (v->DCCEnable[k]) 2478 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2479 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2480 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2481 NoChromaPlanes = false; 2482 } 2483 2484 ReorderBytes = v->NumberOfChannels 2485 * dml_max3( 2486 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2487 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2488 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2489 2490 VMDataOnlyReturnBW = dml_min( 2491 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2492 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2493 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2494 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2495 2496 #ifdef __DML_VBA_DEBUG__ 2497 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2498 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2499 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2500 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2501 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2502 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2503 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2504 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2505 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2506 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2507 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2508 #endif 2509 2510 if (v->GPUVMEnable && v->HostVMEnable) 2511 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2512 2513 v->UrgentExtraLatency = CalculateExtraLatency( 2514 v->RoundTripPingLatencyCycles, 2515 ReorderBytes, 2516 v->DCFCLK, 2517 v->TotalActiveDPP, 2518 v->PixelChunkSizeInKByte, 2519 v->TotalDCCActiveDPP, 2520 v->MetaChunkSize, 2521 v->ReturnBW, 2522 v->GPUVMEnable, 2523 v->HostVMEnable, 2524 v->NumberOfActivePlanes, 2525 v->DPPPerPlane, 2526 v->dpte_group_bytes, 2527 HostVMInefficiencyFactor, 2528 v->HostVMMinPageSize, 2529 v->HostVMMaxNonCachedPageTableLevels); 2530 2531 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2532 2533 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2534 if (v->BlendingAndTiming[k] == k) { 2535 if (v->WritebackEnable[k] == true) { 2536 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2537 + CalculateWriteBackDelay( 2538 v->WritebackPixelFormat[k], 2539 v->WritebackHRatio[k], 2540 v->WritebackVRatio[k], 2541 v->WritebackVTaps[k], 2542 v->WritebackDestinationWidth[k], 2543 v->WritebackDestinationHeight[k], 2544 v->WritebackSourceHeight[k], 2545 v->HTotal[k]) / v->DISPCLK; 2546 } else 2547 v->WritebackDelay[v->VoltageLevel][k] = 0; 2548 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2549 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2550 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2551 v->WritebackDelay[v->VoltageLevel][k], 2552 v->WritebackLatency 2553 + CalculateWriteBackDelay( 2554 v->WritebackPixelFormat[j], 2555 v->WritebackHRatio[j], 2556 v->WritebackVRatio[j], 2557 v->WritebackVTaps[j], 2558 v->WritebackDestinationWidth[j], 2559 v->WritebackDestinationHeight[j], 2560 v->WritebackSourceHeight[j], 2561 v->HTotal[k]) / v->DISPCLK); 2562 } 2563 } 2564 } 2565 } 2566 2567 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2568 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2569 if (v->BlendingAndTiming[k] == j) 2570 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2571 2572 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2573 v->MaxVStartupLines[k] = 2574 CalculateMaxVStartup( 2575 v->VTotal[k], 2576 v->VActive[k], 2577 v->VBlankNom[k], 2578 v->HTotal[k], 2579 v->PixelClock[k], 2580 v->ProgressiveToInterlaceUnitInOPP, 2581 v->Interlace[k], 2582 v->ip.VBlankNomDefaultUS, 2583 v->WritebackDelay[v->VoltageLevel][k]); 2584 2585 #ifdef __DML_VBA_DEBUG__ 2586 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2587 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2588 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2589 #endif 2590 } 2591 2592 v->MaximumMaxVStartupLines = 0; 2593 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2594 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2595 2596 // VBA_DELTA 2597 // We don't really care to iterate between the various prefetch modes 2598 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2599 2600 v->UrgentLatency = CalculateUrgentLatency( 2601 v->UrgentLatencyPixelDataOnly, 2602 v->UrgentLatencyPixelMixedWithVMData, 2603 v->UrgentLatencyVMDataOnly, 2604 v->DoUrgentLatencyAdjustment, 2605 v->UrgentLatencyAdjustmentFabricClockComponent, 2606 v->UrgentLatencyAdjustmentFabricClockReference, 2607 v->FabricClock); 2608 2609 v->FractionOfUrgentBandwidth = 0.0; 2610 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2611 2612 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2613 2614 do { 2615 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2616 bool DestinationLineTimesForPrefetchLessThan2 = false; 2617 bool VRatioPrefetchMoreThan4 = false; 2618 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2619 2620 MaxTotalRDBandwidth = 0; 2621 2622 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2623 2624 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2625 Pipe myPipe; 2626 2627 myPipe.DPPCLK = v->DPPCLK[k]; 2628 myPipe.DISPCLK = v->DISPCLK; 2629 myPipe.PixelClock = v->PixelClock[k]; 2630 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2631 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2632 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2633 myPipe.VRatio = v->VRatio[k]; 2634 myPipe.VRatioChroma = v->VRatioChroma[k]; 2635 myPipe.SourceScan = v->SourceScan[k]; 2636 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2637 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2638 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2639 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2640 myPipe.InterlaceEnable = v->Interlace[k]; 2641 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2642 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2643 myPipe.HTotal = v->HTotal[k]; 2644 myPipe.DCCEnable = v->DCCEnable[k]; 2645 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2646 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2647 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2648 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2649 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2650 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2651 v->ErrorResult[k] = CalculatePrefetchSchedule( 2652 mode_lib, 2653 HostVMInefficiencyFactor, 2654 &myPipe, 2655 v->DSCDelay[k], 2656 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2657 v->DPPCLKDelaySCL, 2658 v->DPPCLKDelaySCLLBOnly, 2659 v->DPPCLKDelayCNVCCursor, 2660 v->DISPCLKDelaySubtotal, 2661 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2662 v->OutputFormat[k], 2663 v->MaxInterDCNTileRepeaters, 2664 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2665 v->MaxVStartupLines[k], 2666 v->GPUVMMaxPageTableLevels, 2667 v->GPUVMEnable, 2668 v->HostVMEnable, 2669 v->HostVMMaxNonCachedPageTableLevels, 2670 v->HostVMMinPageSize, 2671 v->DynamicMetadataEnable[k], 2672 v->DynamicMetadataVMEnabled, 2673 v->DynamicMetadataLinesBeforeActiveRequired[k], 2674 v->DynamicMetadataTransmittedBytes[k], 2675 v->UrgentLatency, 2676 v->UrgentExtraLatency, 2677 v->TCalc, 2678 v->PDEAndMetaPTEBytesFrame[k], 2679 v->MetaRowByte[k], 2680 v->PixelPTEBytesPerRow[k], 2681 v->PrefetchSourceLinesY[k], 2682 v->SwathWidthY[k], 2683 v->VInitPreFillY[k], 2684 v->MaxNumSwathY[k], 2685 v->PrefetchSourceLinesC[k], 2686 v->SwathWidthC[k], 2687 v->VInitPreFillC[k], 2688 v->MaxNumSwathC[k], 2689 v->swath_width_luma_ub[k], 2690 v->swath_width_chroma_ub[k], 2691 v->SwathHeightY[k], 2692 v->SwathHeightC[k], 2693 TWait, 2694 &v->DSTXAfterScaler[k], 2695 &v->DSTYAfterScaler[k], 2696 &v->DestinationLinesForPrefetch[k], 2697 &v->PrefetchBandwidth[k], 2698 &v->DestinationLinesToRequestVMInVBlank[k], 2699 &v->DestinationLinesToRequestRowInVBlank[k], 2700 &v->VRatioPrefetchY[k], 2701 &v->VRatioPrefetchC[k], 2702 &v->RequiredPrefetchPixDataBWLuma[k], 2703 &v->RequiredPrefetchPixDataBWChroma[k], 2704 &v->NotEnoughTimeForDynamicMetadata[k], 2705 &v->Tno_bw[k], 2706 &v->prefetch_vmrow_bw[k], 2707 &v->Tdmdl_vm[k], 2708 &v->Tdmdl[k], 2709 &v->TSetup[k], 2710 &v->VUpdateOffsetPix[k], 2711 &v->VUpdateWidthPix[k], 2712 &v->VReadyOffsetPix[k]); 2713 2714 #ifdef __DML_VBA_DEBUG__ 2715 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2716 #endif 2717 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2718 } 2719 2720 v->NoEnoughUrgentLatencyHiding = false; 2721 v->NoEnoughUrgentLatencyHidingPre = false; 2722 2723 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2724 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2725 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2726 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2727 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2728 2729 CalculateUrgentBurstFactor( 2730 v->swath_width_luma_ub[k], 2731 v->swath_width_chroma_ub[k], 2732 v->SwathHeightY[k], 2733 v->SwathHeightC[k], 2734 v->HTotal[k] / v->PixelClock[k], 2735 v->UrgentLatency, 2736 v->CursorBufferSize, 2737 v->CursorWidth[k][0], 2738 v->CursorBPP[k][0], 2739 v->VRatio[k], 2740 v->VRatioChroma[k], 2741 v->BytePerPixelDETY[k], 2742 v->BytePerPixelDETC[k], 2743 v->DETBufferSizeY[k], 2744 v->DETBufferSizeC[k], 2745 &v->UrgBurstFactorCursor[k], 2746 &v->UrgBurstFactorLuma[k], 2747 &v->UrgBurstFactorChroma[k], 2748 &v->NoUrgentLatencyHiding[k]); 2749 2750 CalculateUrgentBurstFactor( 2751 v->swath_width_luma_ub[k], 2752 v->swath_width_chroma_ub[k], 2753 v->SwathHeightY[k], 2754 v->SwathHeightC[k], 2755 v->HTotal[k] / v->PixelClock[k], 2756 v->UrgentLatency, 2757 v->CursorBufferSize, 2758 v->CursorWidth[k][0], 2759 v->CursorBPP[k][0], 2760 v->VRatioPrefetchY[k], 2761 v->VRatioPrefetchC[k], 2762 v->BytePerPixelDETY[k], 2763 v->BytePerPixelDETC[k], 2764 v->DETBufferSizeY[k], 2765 v->DETBufferSizeC[k], 2766 &v->UrgBurstFactorCursorPre[k], 2767 &v->UrgBurstFactorLumaPre[k], 2768 &v->UrgBurstFactorChromaPre[k], 2769 &v->NoUrgentLatencyHidingPre[k]); 2770 2771 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2772 + dml_max3( 2773 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2774 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2775 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2776 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2777 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2778 v->DPPPerPlane[k] 2779 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2780 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2781 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2782 2783 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2784 + dml_max3( 2785 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2786 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2787 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2788 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2789 + v->cursor_bw_pre[k]); 2790 2791 #ifdef __DML_VBA_DEBUG__ 2792 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2793 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2794 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2795 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2796 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2797 2798 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2799 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2800 2801 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2802 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2803 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2804 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2805 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2806 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2807 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2808 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2809 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2810 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2811 #endif 2812 2813 if (v->DestinationLinesForPrefetch[k] < 2) 2814 DestinationLineTimesForPrefetchLessThan2 = true; 2815 2816 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2817 VRatioPrefetchMoreThan4 = true; 2818 2819 if (v->NoUrgentLatencyHiding[k] == true) 2820 v->NoEnoughUrgentLatencyHiding = true; 2821 2822 if (v->NoUrgentLatencyHidingPre[k] == true) 2823 v->NoEnoughUrgentLatencyHidingPre = true; 2824 } 2825 2826 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2827 2828 #ifdef __DML_VBA_DEBUG__ 2829 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2830 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW); 2831 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth); 2832 #endif 2833 2834 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2835 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2836 v->PrefetchModeSupported = true; 2837 else { 2838 v->PrefetchModeSupported = false; 2839 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2840 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2841 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2842 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2843 } 2844 2845 // PREVIOUS_ERROR 2846 // This error result check was done after the PrefetchModeSupported. So we will 2847 // still try to calculate flip schedule even prefetch mode not supported 2848 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2849 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2850 v->PrefetchModeSupported = false; 2851 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2852 } 2853 } 2854 2855 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2856 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2857 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2858 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2859 - dml_max( 2860 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2861 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2862 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2863 v->DPPPerPlane[k] 2864 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2865 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2866 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2867 } 2868 2869 v->TotImmediateFlipBytes = 0; 2870 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2871 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2872 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2873 } 2874 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2875 CalculateFlipSchedule( 2876 mode_lib, 2877 k, 2878 HostVMInefficiencyFactor, 2879 v->UrgentExtraLatency, 2880 v->UrgentLatency, 2881 v->PDEAndMetaPTEBytesFrame[k], 2882 v->MetaRowByte[k], 2883 v->PixelPTEBytesPerRow[k]); 2884 } 2885 2886 v->total_dcn_read_bw_with_flip = 0.0; 2887 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2888 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2889 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2890 + dml_max3( 2891 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2892 v->DPPPerPlane[k] * v->final_flip_bw[k] 2893 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2894 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 2895 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2896 v->DPPPerPlane[k] 2897 * (v->final_flip_bw[k] 2898 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2899 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2900 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2901 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 2902 + dml_max3( 2903 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2904 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 2905 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 2906 v->DPPPerPlane[k] 2907 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 2908 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 2909 } 2910 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 2911 2912 v->ImmediateFlipSupported = true; 2913 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 2914 #ifdef __DML_VBA_DEBUG__ 2915 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 2916 #endif 2917 v->ImmediateFlipSupported = false; 2918 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 2919 } 2920 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2921 if (v->ImmediateFlipSupportedForPipe[k] == false) { 2922 #ifdef __DML_VBA_DEBUG__ 2923 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); 2924 #endif 2925 v->ImmediateFlipSupported = false; 2926 } 2927 } 2928 } else { 2929 v->ImmediateFlipSupported = false; 2930 } 2931 2932 v->PrefetchAndImmediateFlipSupported = 2933 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable 2934 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || 2935 v->ImmediateFlipSupported)) ? true : false; 2936 #ifdef __DML_VBA_DEBUG__ 2937 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 2938 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required); 2939 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 2940 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 2941 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 2942 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 2943 #endif 2944 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 2945 2946 v->VStartupLines = v->VStartupLines + 1; 2947 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 2948 ASSERT(v->PrefetchAndImmediateFlipSupported); 2949 2950 // Unbounded Request Enabled 2951 CalculateUnboundedRequestAndCompressedBufferSize( 2952 v->DETBufferSizeInKByte[0], 2953 v->ConfigReturnBufferSizeInKByte, 2954 v->UseUnboundedRequesting, 2955 v->TotalActiveDPP, 2956 NoChromaPlanes, 2957 v->MaxNumDPP, 2958 v->CompressedBufferSegmentSizeInkByte, 2959 v->Output, 2960 &v->UnboundedRequestEnabled, 2961 &v->CompressedBufferSizeInkByte); 2962 2963 //Watermarks and NB P-State/DRAM Clock Change Support 2964 { 2965 enum clock_change_support DRAMClockChangeSupport; // dummy 2966 2967 CalculateWatermarksAndDRAMSpeedChangeSupport( 2968 mode_lib, 2969 PrefetchMode, 2970 v->DCFCLK, 2971 v->ReturnBW, 2972 v->UrgentLatency, 2973 v->UrgentExtraLatency, 2974 v->SOCCLK, 2975 v->DCFCLKDeepSleep, 2976 v->DETBufferSizeY, 2977 v->DETBufferSizeC, 2978 v->SwathHeightY, 2979 v->SwathHeightC, 2980 v->SwathWidthY, 2981 v->SwathWidthC, 2982 v->DPPPerPlane, 2983 v->BytePerPixelDETY, 2984 v->BytePerPixelDETC, 2985 v->UnboundedRequestEnabled, 2986 v->CompressedBufferSizeInkByte, 2987 &DRAMClockChangeSupport, 2988 &v->StutterExitWatermark, 2989 &v->StutterEnterPlusExitWatermark, 2990 &v->Z8StutterExitWatermark, 2991 &v->Z8StutterEnterPlusExitWatermark); 2992 2993 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2994 if (v->WritebackEnable[k] == true) { 2995 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 2996 0, 2997 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 2998 } else { 2999 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 3000 } 3001 } 3002 } 3003 3004 //Display Pipeline Delivery Time in Prefetch, Groups 3005 CalculatePixelDeliveryTimes( 3006 v->NumberOfActivePlanes, 3007 v->VRatio, 3008 v->VRatioChroma, 3009 v->VRatioPrefetchY, 3010 v->VRatioPrefetchC, 3011 v->swath_width_luma_ub, 3012 v->swath_width_chroma_ub, 3013 v->DPPPerPlane, 3014 v->HRatio, 3015 v->HRatioChroma, 3016 v->PixelClock, 3017 v->PSCL_THROUGHPUT_LUMA, 3018 v->PSCL_THROUGHPUT_CHROMA, 3019 v->DPPCLK, 3020 v->BytePerPixelC, 3021 v->SourceScan, 3022 v->NumberOfCursors, 3023 v->CursorWidth, 3024 v->CursorBPP, 3025 v->BlockWidth256BytesY, 3026 v->BlockHeight256BytesY, 3027 v->BlockWidth256BytesC, 3028 v->BlockHeight256BytesC, 3029 v->DisplayPipeLineDeliveryTimeLuma, 3030 v->DisplayPipeLineDeliveryTimeChroma, 3031 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3032 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3033 v->DisplayPipeRequestDeliveryTimeLuma, 3034 v->DisplayPipeRequestDeliveryTimeChroma, 3035 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3036 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3037 v->CursorRequestDeliveryTime, 3038 v->CursorRequestDeliveryTimePrefetch); 3039 3040 CalculateMetaAndPTETimes( 3041 v->NumberOfActivePlanes, 3042 v->GPUVMEnable, 3043 v->MetaChunkSize, 3044 v->MinMetaChunkSizeBytes, 3045 v->HTotal, 3046 v->VRatio, 3047 v->VRatioChroma, 3048 v->DestinationLinesToRequestRowInVBlank, 3049 v->DestinationLinesToRequestRowInImmediateFlip, 3050 v->DCCEnable, 3051 v->PixelClock, 3052 v->BytePerPixelY, 3053 v->BytePerPixelC, 3054 v->SourceScan, 3055 v->dpte_row_height, 3056 v->dpte_row_height_chroma, 3057 v->meta_row_width, 3058 v->meta_row_width_chroma, 3059 v->meta_row_height, 3060 v->meta_row_height_chroma, 3061 v->meta_req_width, 3062 v->meta_req_width_chroma, 3063 v->meta_req_height, 3064 v->meta_req_height_chroma, 3065 v->dpte_group_bytes, 3066 v->PTERequestSizeY, 3067 v->PTERequestSizeC, 3068 v->PixelPTEReqWidthY, 3069 v->PixelPTEReqHeightY, 3070 v->PixelPTEReqWidthC, 3071 v->PixelPTEReqHeightC, 3072 v->dpte_row_width_luma_ub, 3073 v->dpte_row_width_chroma_ub, 3074 v->DST_Y_PER_PTE_ROW_NOM_L, 3075 v->DST_Y_PER_PTE_ROW_NOM_C, 3076 v->DST_Y_PER_META_ROW_NOM_L, 3077 v->DST_Y_PER_META_ROW_NOM_C, 3078 v->TimePerMetaChunkNominal, 3079 v->TimePerChromaMetaChunkNominal, 3080 v->TimePerMetaChunkVBlank, 3081 v->TimePerChromaMetaChunkVBlank, 3082 v->TimePerMetaChunkFlip, 3083 v->TimePerChromaMetaChunkFlip, 3084 v->time_per_pte_group_nom_luma, 3085 v->time_per_pte_group_vblank_luma, 3086 v->time_per_pte_group_flip_luma, 3087 v->time_per_pte_group_nom_chroma, 3088 v->time_per_pte_group_vblank_chroma, 3089 v->time_per_pte_group_flip_chroma); 3090 3091 CalculateVMGroupAndRequestTimes( 3092 v->NumberOfActivePlanes, 3093 v->GPUVMEnable, 3094 v->GPUVMMaxPageTableLevels, 3095 v->HTotal, 3096 v->BytePerPixelC, 3097 v->DestinationLinesToRequestVMInVBlank, 3098 v->DestinationLinesToRequestVMInImmediateFlip, 3099 v->DCCEnable, 3100 v->PixelClock, 3101 v->dpte_row_width_luma_ub, 3102 v->dpte_row_width_chroma_ub, 3103 v->vm_group_bytes, 3104 v->dpde0_bytes_per_frame_ub_l, 3105 v->dpde0_bytes_per_frame_ub_c, 3106 v->meta_pte_bytes_per_frame_ub_l, 3107 v->meta_pte_bytes_per_frame_ub_c, 3108 v->TimePerVMGroupVBlank, 3109 v->TimePerVMGroupFlip, 3110 v->TimePerVMRequestVBlank, 3111 v->TimePerVMRequestFlip); 3112 3113 // Min TTUVBlank 3114 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3115 if (PrefetchMode == 0) { 3116 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3117 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3118 v->MinTTUVBlank[k] = dml_max( 3119 v->DRAMClockChangeWatermark, 3120 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3121 } else if (PrefetchMode == 1) { 3122 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3123 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3124 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3125 } else { 3126 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3127 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3128 v->MinTTUVBlank[k] = v->UrgentWatermark; 3129 } 3130 if (!v->DynamicMetadataEnable[k]) 3131 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3132 } 3133 3134 // DCC Configuration 3135 v->ActiveDPPs = 0; 3136 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3137 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3138 v->SourcePixelFormat[k], 3139 v->SurfaceWidthY[k], 3140 v->SurfaceWidthC[k], 3141 v->SurfaceHeightY[k], 3142 v->SurfaceHeightC[k], 3143 v->DETBufferSizeInKByte[0] * 1024, 3144 v->BlockHeight256BytesY[k], 3145 v->BlockHeight256BytesC[k], 3146 v->SurfaceTiling[k], 3147 v->BytePerPixelY[k], 3148 v->BytePerPixelC[k], 3149 v->BytePerPixelDETY[k], 3150 v->BytePerPixelDETC[k], 3151 v->SourceScan[k], 3152 &v->DCCYMaxUncompressedBlock[k], 3153 &v->DCCCMaxUncompressedBlock[k], 3154 &v->DCCYMaxCompressedBlock[k], 3155 &v->DCCCMaxCompressedBlock[k], 3156 &v->DCCYIndependentBlock[k], 3157 &v->DCCCIndependentBlock[k]); 3158 } 3159 3160 // VStartup Adjustment 3161 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3162 bool isInterlaceTiming; 3163 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3164 #ifdef __DML_VBA_DEBUG__ 3165 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3166 #endif 3167 3168 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3169 3170 #ifdef __DML_VBA_DEBUG__ 3171 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3172 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3173 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3174 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3175 #endif 3176 3177 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3178 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3179 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3180 } 3181 3182 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3183 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3184 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) { 3185 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0); 3186 } else { 3187 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]; 3188 } 3189 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; 3190 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3191 <= (isInterlaceTiming ? 3192 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3193 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3194 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3195 } else { 3196 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3197 } 3198 #ifdef __DML_VBA_DEBUG__ 3199 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3200 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3201 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3202 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3203 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3204 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3205 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3206 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3207 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3208 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3209 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3210 #endif 3211 } 3212 3213 { 3214 //Maximum Bandwidth Used 3215 double TotalWRBandwidth = 0; 3216 double MaxPerPlaneVActiveWRBandwidth = 0; 3217 double WRBandwidth = 0; 3218 3219 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3220 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3221 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3222 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3223 } else if (v->WritebackEnable[k] == true) { 3224 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3225 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3226 } 3227 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3228 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3229 } 3230 3231 v->TotalDataReadBandwidth = 0; 3232 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3233 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3234 } 3235 } 3236 // Stutter Efficiency 3237 CalculateStutterEfficiency( 3238 mode_lib, 3239 v->CompressedBufferSizeInkByte, 3240 v->UnboundedRequestEnabled, 3241 v->ConfigReturnBufferSizeInKByte, 3242 v->MetaFIFOSizeInKEntries, 3243 v->ZeroSizeBufferEntries, 3244 v->NumberOfActivePlanes, 3245 v->ROBBufferSizeInKByte, 3246 v->TotalDataReadBandwidth, 3247 v->DCFCLK, 3248 v->ReturnBW, 3249 v->COMPBUF_RESERVED_SPACE_64B, 3250 v->COMPBUF_RESERVED_SPACE_ZS, 3251 v->SRExitTime, 3252 v->SRExitZ8Time, 3253 v->SynchronizedVBlank, 3254 v->StutterEnterPlusExitWatermark, 3255 v->Z8StutterEnterPlusExitWatermark, 3256 v->ProgressiveToInterlaceUnitInOPP, 3257 v->Interlace, 3258 v->MinTTUVBlank, 3259 v->DPPPerPlane, 3260 v->DETBufferSizeY, 3261 v->BytePerPixelY, 3262 v->BytePerPixelDETY, 3263 v->SwathWidthY, 3264 v->SwathHeightY, 3265 v->SwathHeightC, 3266 v->DCCRateLuma, 3267 v->DCCRateChroma, 3268 v->DCCFractionOfZeroSizeRequestsLuma, 3269 v->DCCFractionOfZeroSizeRequestsChroma, 3270 v->HTotal, 3271 v->VTotal, 3272 v->PixelClock, 3273 v->VRatio, 3274 v->SourceScan, 3275 v->BlockHeight256BytesY, 3276 v->BlockWidth256BytesY, 3277 v->BlockHeight256BytesC, 3278 v->BlockWidth256BytesC, 3279 v->DCCYMaxUncompressedBlock, 3280 v->DCCCMaxUncompressedBlock, 3281 v->VActive, 3282 v->DCCEnable, 3283 v->WritebackEnable, 3284 v->ReadBandwidthPlaneLuma, 3285 v->ReadBandwidthPlaneChroma, 3286 v->meta_row_bw, 3287 v->dpte_row_bw, 3288 &v->StutterEfficiencyNotIncludingVBlank, 3289 &v->StutterEfficiency, 3290 &v->NumberOfStutterBurstsPerFrame, 3291 &v->Z8StutterEfficiencyNotIncludingVBlank, 3292 &v->Z8StutterEfficiency, 3293 &v->Z8NumberOfStutterBurstsPerFrame, 3294 &v->StutterPeriod); 3295 } 3296 3297 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3298 { 3299 struct vba_vars_st *v = &mode_lib->vba; 3300 // Display Pipe Configuration 3301 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3302 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3303 int BytePerPixY[DC__NUM_DPP__MAX]; 3304 int BytePerPixC[DC__NUM_DPP__MAX]; 3305 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3306 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3307 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3308 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3309 double dummy1[DC__NUM_DPP__MAX]; 3310 double dummy2[DC__NUM_DPP__MAX]; 3311 double dummy3[DC__NUM_DPP__MAX]; 3312 double dummy4[DC__NUM_DPP__MAX]; 3313 int dummy5[DC__NUM_DPP__MAX]; 3314 int dummy6[DC__NUM_DPP__MAX]; 3315 bool dummy7[DC__NUM_DPP__MAX]; 3316 bool dummysinglestring; 3317 3318 unsigned int k; 3319 3320 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3321 3322 CalculateBytePerPixelAnd256BBlockSizes( 3323 v->SourcePixelFormat[k], 3324 v->SurfaceTiling[k], 3325 &BytePerPixY[k], 3326 &BytePerPixC[k], 3327 &BytePerPixDETY[k], 3328 &BytePerPixDETC[k], 3329 &Read256BytesBlockHeightY[k], 3330 &Read256BytesBlockHeightC[k], 3331 &Read256BytesBlockWidthY[k], 3332 &Read256BytesBlockWidthC[k]); 3333 } 3334 3335 CalculateSwathAndDETConfiguration( 3336 false, 3337 v->NumberOfActivePlanes, 3338 v->DETBufferSizeInKByte[0], 3339 dummy1, 3340 dummy2, 3341 v->SourceScan, 3342 v->SourcePixelFormat, 3343 v->SurfaceTiling, 3344 v->ViewportWidth, 3345 v->ViewportHeight, 3346 v->SurfaceWidthY, 3347 v->SurfaceWidthC, 3348 v->SurfaceHeightY, 3349 v->SurfaceHeightC, 3350 Read256BytesBlockHeightY, 3351 Read256BytesBlockHeightC, 3352 Read256BytesBlockWidthY, 3353 Read256BytesBlockWidthC, 3354 v->ODMCombineEnabled, 3355 v->BlendingAndTiming, 3356 BytePerPixY, 3357 BytePerPixC, 3358 BytePerPixDETY, 3359 BytePerPixDETC, 3360 v->HActive, 3361 v->HRatio, 3362 v->HRatioChroma, 3363 v->DPPPerPlane, 3364 dummy5, 3365 dummy6, 3366 dummy3, 3367 dummy4, 3368 v->SwathHeightY, 3369 v->SwathHeightC, 3370 v->DETBufferSizeY, 3371 v->DETBufferSizeC, 3372 dummy7, 3373 &dummysinglestring); 3374 } 3375 3376 static bool CalculateBytePerPixelAnd256BBlockSizes( 3377 enum source_format_class SourcePixelFormat, 3378 enum dm_swizzle_mode SurfaceTiling, 3379 unsigned int *BytePerPixelY, 3380 unsigned int *BytePerPixelC, 3381 double *BytePerPixelDETY, 3382 double *BytePerPixelDETC, 3383 unsigned int *BlockHeight256BytesY, 3384 unsigned int *BlockHeight256BytesC, 3385 unsigned int *BlockWidth256BytesY, 3386 unsigned int *BlockWidth256BytesC) 3387 { 3388 if (SourcePixelFormat == dm_444_64) { 3389 *BytePerPixelDETY = 8; 3390 *BytePerPixelDETC = 0; 3391 *BytePerPixelY = 8; 3392 *BytePerPixelC = 0; 3393 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 3394 *BytePerPixelDETY = 4; 3395 *BytePerPixelDETC = 0; 3396 *BytePerPixelY = 4; 3397 *BytePerPixelC = 0; 3398 } else if (SourcePixelFormat == dm_444_16) { 3399 *BytePerPixelDETY = 2; 3400 *BytePerPixelDETC = 0; 3401 *BytePerPixelY = 2; 3402 *BytePerPixelC = 0; 3403 } else if (SourcePixelFormat == dm_444_8) { 3404 *BytePerPixelDETY = 1; 3405 *BytePerPixelDETC = 0; 3406 *BytePerPixelY = 1; 3407 *BytePerPixelC = 0; 3408 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3409 *BytePerPixelDETY = 4; 3410 *BytePerPixelDETC = 1; 3411 *BytePerPixelY = 4; 3412 *BytePerPixelC = 1; 3413 } else if (SourcePixelFormat == dm_420_8) { 3414 *BytePerPixelDETY = 1; 3415 *BytePerPixelDETC = 2; 3416 *BytePerPixelY = 1; 3417 *BytePerPixelC = 2; 3418 } else if (SourcePixelFormat == dm_420_12) { 3419 *BytePerPixelDETY = 2; 3420 *BytePerPixelDETC = 4; 3421 *BytePerPixelY = 2; 3422 *BytePerPixelC = 4; 3423 } else { 3424 *BytePerPixelDETY = 4.0 / 3; 3425 *BytePerPixelDETC = 8.0 / 3; 3426 *BytePerPixelY = 2; 3427 *BytePerPixelC = 4; 3428 } 3429 3430 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16 3431 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) { 3432 if (SurfaceTiling == dm_sw_linear) { 3433 *BlockHeight256BytesY = 1; 3434 } else if (SourcePixelFormat == dm_444_64) { 3435 *BlockHeight256BytesY = 4; 3436 } else if (SourcePixelFormat == dm_444_8) { 3437 *BlockHeight256BytesY = 16; 3438 } else { 3439 *BlockHeight256BytesY = 8; 3440 } 3441 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3442 *BlockHeight256BytesC = 0; 3443 *BlockWidth256BytesC = 0; 3444 } else { 3445 if (SurfaceTiling == dm_sw_linear) { 3446 *BlockHeight256BytesY = 1; 3447 *BlockHeight256BytesC = 1; 3448 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3449 *BlockHeight256BytesY = 8; 3450 *BlockHeight256BytesC = 16; 3451 } else if (SourcePixelFormat == dm_420_8) { 3452 *BlockHeight256BytesY = 16; 3453 *BlockHeight256BytesC = 8; 3454 } else { 3455 *BlockHeight256BytesY = 8; 3456 *BlockHeight256BytesC = 8; 3457 } 3458 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3459 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 3460 } 3461 return true; 3462 } 3463 3464 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3465 { 3466 if (PrefetchMode == 0) { 3467 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3468 } else if (PrefetchMode == 1) { 3469 return dml_max(SREnterPlusExitTime, UrgentLatency); 3470 } else { 3471 return UrgentLatency; 3472 } 3473 } 3474 3475 double dml314_CalculateWriteBackDISPCLK( 3476 enum source_format_class WritebackPixelFormat, 3477 double PixelClock, 3478 double WritebackHRatio, 3479 double WritebackVRatio, 3480 unsigned int WritebackHTaps, 3481 unsigned int WritebackVTaps, 3482 long WritebackSourceWidth, 3483 long WritebackDestinationWidth, 3484 unsigned int HTotal, 3485 unsigned int WritebackLineBufferSize) 3486 { 3487 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3488 3489 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3490 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3491 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3492 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3493 } 3494 3495 static double CalculateWriteBackDelay( 3496 enum source_format_class WritebackPixelFormat, 3497 double WritebackHRatio, 3498 double WritebackVRatio, 3499 unsigned int WritebackVTaps, 3500 int WritebackDestinationWidth, 3501 int WritebackDestinationHeight, 3502 int WritebackSourceHeight, 3503 unsigned int HTotal) 3504 { 3505 double CalculateWriteBackDelay; 3506 double Line_length; 3507 double Output_lines_last_notclamped; 3508 double WritebackVInit; 3509 3510 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3511 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3512 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3513 if (Output_lines_last_notclamped < 0) { 3514 CalculateWriteBackDelay = 0; 3515 } else { 3516 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3517 } 3518 return CalculateWriteBackDelay; 3519 } 3520 3521 static void CalculateVupdateAndDynamicMetadataParameters( 3522 int MaxInterDCNTileRepeaters, 3523 double DPPCLK, 3524 double DISPCLK, 3525 double DCFClkDeepSleep, 3526 double PixelClock, 3527 int HTotal, 3528 int VBlank, 3529 int DynamicMetadataTransmittedBytes, 3530 int DynamicMetadataLinesBeforeActiveRequired, 3531 int InterlaceEnable, 3532 bool ProgressiveToInterlaceUnitInOPP, 3533 double *TSetup, 3534 double *Tdmbf, 3535 double *Tdmec, 3536 double *Tdmsks, 3537 int *VUpdateOffsetPix, 3538 double *VUpdateWidthPix, 3539 double *VReadyOffsetPix) 3540 { 3541 double TotalRepeaterDelayTime; 3542 3543 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3544 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3545 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3546 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3547 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3548 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3549 *Tdmec = HTotal / PixelClock; 3550 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3551 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3552 } else { 3553 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3554 } 3555 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3556 *Tdmsks = *Tdmsks / 2; 3557 } 3558 #ifdef __DML_VBA_DEBUG__ 3559 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3560 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3561 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3562 #endif 3563 } 3564 3565 static void CalculateRowBandwidth( 3566 bool GPUVMEnable, 3567 enum source_format_class SourcePixelFormat, 3568 double VRatio, 3569 double VRatioChroma, 3570 bool DCCEnable, 3571 double LineTime, 3572 unsigned int MetaRowByteLuma, 3573 unsigned int MetaRowByteChroma, 3574 unsigned int meta_row_height_luma, 3575 unsigned int meta_row_height_chroma, 3576 unsigned int PixelPTEBytesPerRowLuma, 3577 unsigned int PixelPTEBytesPerRowChroma, 3578 unsigned int dpte_row_height_luma, 3579 unsigned int dpte_row_height_chroma, 3580 double *meta_row_bw, 3581 double *dpte_row_bw) 3582 { 3583 if (DCCEnable != true) { 3584 *meta_row_bw = 0; 3585 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3586 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3587 } else { 3588 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3589 } 3590 3591 if (GPUVMEnable != true) { 3592 *dpte_row_bw = 0; 3593 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3594 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3595 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3596 } else { 3597 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3598 } 3599 } 3600 3601 static void CalculateFlipSchedule( 3602 struct display_mode_lib *mode_lib, 3603 unsigned int k, 3604 double HostVMInefficiencyFactor, 3605 double UrgentExtraLatency, 3606 double UrgentLatency, 3607 double PDEAndMetaPTEBytesPerFrame, 3608 double MetaRowBytes, 3609 double DPTEBytesPerRow) 3610 { 3611 struct vba_vars_st *v = &mode_lib->vba; 3612 double min_row_time = 0.0; 3613 unsigned int HostVMDynamicLevelsTrips; 3614 double TimeForFetchingMetaPTEImmediateFlip; 3615 double TimeForFetchingRowInVBlankImmediateFlip; 3616 double ImmediateFlipBW; 3617 double LineTime = v->HTotal[k] / v->PixelClock[k]; 3618 3619 if (v->GPUVMEnable == true && v->HostVMEnable == true) { 3620 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3621 } else { 3622 HostVMDynamicLevelsTrips = 0; 3623 } 3624 3625 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { 3626 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; 3627 } 3628 3629 if (v->GPUVMEnable == true) { 3630 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3631 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3632 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3633 LineTime / 4.0); 3634 } else { 3635 TimeForFetchingMetaPTEImmediateFlip = 0; 3636 } 3637 3638 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3639 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3640 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3641 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3642 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3643 LineTime / 4); 3644 } else { 3645 TimeForFetchingRowInVBlankImmediateFlip = 0; 3646 } 3647 3648 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3649 3650 if (v->GPUVMEnable == true) { 3651 v->final_flip_bw[k] = dml_max( 3652 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), 3653 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); 3654 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3655 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); 3656 } else { 3657 v->final_flip_bw[k] = 0; 3658 } 3659 3660 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 3661 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3662 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3663 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3664 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3665 } else { 3666 min_row_time = dml_min4( 3667 v->dpte_row_height[k] * LineTime / v->VRatio[k], 3668 v->meta_row_height[k] * LineTime / v->VRatio[k], 3669 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], 3670 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3671 } 3672 } else { 3673 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3674 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; 3675 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3676 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; 3677 } else { 3678 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); 3679 } 3680 } 3681 3682 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 3683 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3684 v->ImmediateFlipSupportedForPipe[k] = false; 3685 } else { 3686 v->ImmediateFlipSupportedForPipe[k] = true; 3687 } 3688 3689 #ifdef __DML_VBA_DEBUG__ 3690 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); 3691 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); 3692 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3693 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3694 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3695 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); 3696 #endif 3697 3698 } 3699 3700 static double TruncToValidBPP( 3701 double LinkBitRate, 3702 int Lanes, 3703 int HTotal, 3704 int HActive, 3705 double PixelClock, 3706 double DesiredBPP, 3707 bool DSCEnable, 3708 enum output_encoder_class Output, 3709 enum output_format_class Format, 3710 unsigned int DSCInputBitPerComponent, 3711 int DSCSlices, 3712 int AudioRate, 3713 int AudioLayout, 3714 enum odm_combine_mode ODMCombine) 3715 { 3716 double MaxLinkBPP; 3717 int MinDSCBPP; 3718 double MaxDSCBPP; 3719 int NonDSCBPP0; 3720 int NonDSCBPP1; 3721 int NonDSCBPP2; 3722 3723 if (Format == dm_420) { 3724 NonDSCBPP0 = 12; 3725 NonDSCBPP1 = 15; 3726 NonDSCBPP2 = 18; 3727 MinDSCBPP = 6; 3728 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3729 } else if (Format == dm_444) { 3730 NonDSCBPP0 = 24; 3731 NonDSCBPP1 = 30; 3732 NonDSCBPP2 = 36; 3733 MinDSCBPP = 8; 3734 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3735 } else { 3736 3737 NonDSCBPP0 = 16; 3738 NonDSCBPP1 = 20; 3739 NonDSCBPP2 = 24; 3740 3741 if (Format == dm_n422) { 3742 MinDSCBPP = 7; 3743 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3744 } else { 3745 MinDSCBPP = 8; 3746 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3747 } 3748 } 3749 3750 if (DSCEnable && Output == dm_dp) { 3751 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3752 } else { 3753 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3754 } 3755 3756 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3757 MaxLinkBPP = 16; 3758 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3759 MaxLinkBPP = 32; 3760 } 3761 3762 if (DesiredBPP == 0) { 3763 if (DSCEnable) { 3764 if (MaxLinkBPP < MinDSCBPP) { 3765 return BPP_INVALID; 3766 } else if (MaxLinkBPP >= MaxDSCBPP) { 3767 return MaxDSCBPP; 3768 } else { 3769 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3770 } 3771 } else { 3772 if (MaxLinkBPP >= NonDSCBPP2) { 3773 return NonDSCBPP2; 3774 } else if (MaxLinkBPP >= NonDSCBPP1) { 3775 return NonDSCBPP1; 3776 } else if (MaxLinkBPP >= NonDSCBPP0) { 3777 return 16.0; 3778 } else { 3779 return BPP_INVALID; 3780 } 3781 } 3782 } else { 3783 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3784 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3785 return BPP_INVALID; 3786 } else { 3787 return DesiredBPP; 3788 } 3789 } 3790 return BPP_INVALID; 3791 } 3792 3793 static noinline void CalculatePrefetchSchedulePerPlane( 3794 struct display_mode_lib *mode_lib, 3795 double HostVMInefficiencyFactor, 3796 int i, 3797 unsigned int j, 3798 unsigned int k) 3799 { 3800 struct vba_vars_st *v = &mode_lib->vba; 3801 Pipe myPipe; 3802 3803 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 3804 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 3805 myPipe.PixelClock = v->PixelClock[k]; 3806 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 3807 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 3808 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 3809 myPipe.VRatio = mode_lib->vba.VRatio[k]; 3810 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; 3811 3812 myPipe.SourceScan = v->SourceScan[k]; 3813 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 3814 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 3815 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 3816 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 3817 myPipe.InterlaceEnable = v->Interlace[k]; 3818 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 3819 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 3820 myPipe.HTotal = v->HTotal[k]; 3821 myPipe.DCCEnable = v->DCCEnable[k]; 3822 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 3823 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 3824 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 3825 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 3826 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 3827 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 3828 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 3829 mode_lib, 3830 HostVMInefficiencyFactor, 3831 &myPipe, 3832 v->DSCDelayPerState[i][k], 3833 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 3834 v->DPPCLKDelaySCL, 3835 v->DPPCLKDelaySCLLBOnly, 3836 v->DPPCLKDelayCNVCCursor, 3837 v->DISPCLKDelaySubtotal, 3838 v->SwathWidthYThisState[k] / v->HRatio[k], 3839 v->OutputFormat[k], 3840 v->MaxInterDCNTileRepeaters, 3841 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 3842 v->MaximumVStartup[i][j][k], 3843 v->GPUVMMaxPageTableLevels, 3844 v->GPUVMEnable, 3845 v->HostVMEnable, 3846 v->HostVMMaxNonCachedPageTableLevels, 3847 v->HostVMMinPageSize, 3848 v->DynamicMetadataEnable[k], 3849 v->DynamicMetadataVMEnabled, 3850 v->DynamicMetadataLinesBeforeActiveRequired[k], 3851 v->DynamicMetadataTransmittedBytes[k], 3852 v->UrgLatency[i], 3853 v->ExtraLatency, 3854 v->TimeCalc, 3855 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 3856 v->MetaRowBytes[i][j][k], 3857 v->DPTEBytesPerRow[i][j][k], 3858 v->PrefetchLinesY[i][j][k], 3859 v->SwathWidthYThisState[k], 3860 v->PrefillY[k], 3861 v->MaxNumSwY[k], 3862 v->PrefetchLinesC[i][j][k], 3863 v->SwathWidthCThisState[k], 3864 v->PrefillC[k], 3865 v->MaxNumSwC[k], 3866 v->swath_width_luma_ub_this_state[k], 3867 v->swath_width_chroma_ub_this_state[k], 3868 v->SwathHeightYThisState[k], 3869 v->SwathHeightCThisState[k], 3870 v->TWait, 3871 &v->DSTXAfterScaler[k], 3872 &v->DSTYAfterScaler[k], 3873 &v->LineTimesForPrefetch[k], 3874 &v->PrefetchBW[k], 3875 &v->LinesForMetaPTE[k], 3876 &v->LinesForMetaAndDPTERow[k], 3877 &v->VRatioPreY[i][j][k], 3878 &v->VRatioPreC[i][j][k], 3879 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 3880 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 3881 &v->NoTimeForDynamicMetadata[i][j][k], 3882 &v->Tno_bw[k], 3883 &v->prefetch_vmrow_bw[k], 3884 &v->dummy7[k], 3885 &v->dummy8[k], 3886 &v->dummy13[k], 3887 &v->VUpdateOffsetPix[k], 3888 &v->VUpdateWidthPix[k], 3889 &v->VReadyOffsetPix[k]); 3890 } 3891 3892 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 3893 { 3894 struct vba_vars_st *v = &mode_lib->vba; 3895 3896 int i, j; 3897 unsigned int k, m; 3898 int ReorderingBytes; 3899 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 3900 bool NoChroma = true; 3901 bool EnoughWritebackUnits = true; 3902 bool P2IWith420 = false; 3903 bool DSCOnlyIfNecessaryWithBPP = false; 3904 bool DSC422NativeNotSupported = false; 3905 double MaxTotalVActiveRDBandwidth; 3906 bool ViewportExceedsSurface = false; 3907 bool FMTBufferExceeded = false; 3908 3909 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 3910 3911 CalculateMinAndMaxPrefetchMode( 3912 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 3913 &MinPrefetchMode, &MaxPrefetchMode); 3914 3915 /*Scale Ratio, taps Support Check*/ 3916 3917 v->ScaleRatioAndTapsSupport = true; 3918 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3919 if (v->ScalerEnabled[k] == false 3920 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3921 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3922 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3923 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 3924 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 3925 v->ScaleRatioAndTapsSupport = false; 3926 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 3927 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 3928 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 3929 || v->VRatio[k] > v->vtaps[k] 3930 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3931 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3932 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3933 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 3934 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 3935 || v->HRatioChroma[k] > v->MaxHSCLRatio 3936 || v->VRatioChroma[k] > v->MaxVSCLRatio 3937 || v->HRatioChroma[k] > v->HTAPsChroma[k] 3938 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 3939 v->ScaleRatioAndTapsSupport = false; 3940 } 3941 } 3942 /*Source Format, Pixel Format and Scan Support Check*/ 3943 3944 v->SourceFormatPixelAndScanSupport = true; 3945 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3946 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) { 3947 v->SourceFormatPixelAndScanSupport = false; 3948 } 3949 } 3950 /*Bandwidth Support Check*/ 3951 3952 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3953 CalculateBytePerPixelAnd256BBlockSizes( 3954 v->SourcePixelFormat[k], 3955 v->SurfaceTiling[k], 3956 &v->BytePerPixelY[k], 3957 &v->BytePerPixelC[k], 3958 &v->BytePerPixelInDETY[k], 3959 &v->BytePerPixelInDETC[k], 3960 &v->Read256BlockHeightY[k], 3961 &v->Read256BlockHeightC[k], 3962 &v->Read256BlockWidthY[k], 3963 &v->Read256BlockWidthC[k]); 3964 } 3965 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3966 if (v->SourceScan[k] != dm_vert) { 3967 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 3968 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 3969 } else { 3970 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 3971 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 3972 } 3973 } 3974 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3975 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 3976 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 3977 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 3978 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 3979 } 3980 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3981 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 3982 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3983 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 3984 } else if (v->WritebackEnable[k] == true) { 3985 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3986 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 3987 } else { 3988 v->WriteBandwidth[k] = 0.0; 3989 } 3990 } 3991 3992 /*Writeback Latency support check*/ 3993 3994 v->WritebackLatencySupport = true; 3995 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3996 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 3997 v->WritebackLatencySupport = false; 3998 } 3999 } 4000 4001 /*Writeback Mode Support Check*/ 4002 4003 v->TotalNumberOfActiveWriteback = 0; 4004 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4005 if (v->WritebackEnable[k] == true) { 4006 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 4007 } 4008 } 4009 4010 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 4011 EnoughWritebackUnits = false; 4012 } 4013 4014 /*Writeback Scale Ratio and Taps Support Check*/ 4015 4016 v->WritebackScaleRatioAndTapsSupport = true; 4017 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4018 if (v->WritebackEnable[k] == true) { 4019 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 4020 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 4021 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 4022 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 4023 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 4024 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 4025 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 4026 v->WritebackScaleRatioAndTapsSupport = false; 4027 } 4028 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 4029 v->WritebackScaleRatioAndTapsSupport = false; 4030 } 4031 } 4032 } 4033 /*Maximum DISPCLK/DPPCLK Support check*/ 4034 4035 v->WritebackRequiredDISPCLK = 0.0; 4036 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4037 if (v->WritebackEnable[k] == true) { 4038 v->WritebackRequiredDISPCLK = dml_max( 4039 v->WritebackRequiredDISPCLK, 4040 dml314_CalculateWriteBackDISPCLK( 4041 v->WritebackPixelFormat[k], 4042 v->PixelClock[k], 4043 v->WritebackHRatio[k], 4044 v->WritebackVRatio[k], 4045 v->WritebackHTaps[k], 4046 v->WritebackVTaps[k], 4047 v->WritebackSourceWidth[k], 4048 v->WritebackDestinationWidth[k], 4049 v->HTotal[k], 4050 v->WritebackLineBufferSize)); 4051 } 4052 } 4053 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4054 if (v->HRatio[k] > 1.0) { 4055 v->PSCL_FACTOR[k] = dml_min( 4056 v->MaxDCHUBToPSCLThroughput, 4057 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 4058 } else { 4059 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4060 } 4061 if (v->BytePerPixelC[k] == 0.0) { 4062 v->PSCL_FACTOR_CHROMA[k] = 0.0; 4063 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4064 * dml_max3( 4065 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4066 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4067 1.0); 4068 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4069 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4070 } 4071 } else { 4072 if (v->HRatioChroma[k] > 1.0) { 4073 v->PSCL_FACTOR_CHROMA[k] = dml_min( 4074 v->MaxDCHUBToPSCLThroughput, 4075 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 4076 } else { 4077 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4078 } 4079 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4080 * dml_max5( 4081 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4082 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4083 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 4084 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 4085 1.0); 4086 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 4087 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4088 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4089 } 4090 } 4091 } 4092 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4093 int MaximumSwathWidthSupportLuma; 4094 int MaximumSwathWidthSupportChroma; 4095 4096 if (v->SurfaceTiling[k] == dm_sw_linear) { 4097 MaximumSwathWidthSupportLuma = 8192.0; 4098 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4099 MaximumSwathWidthSupportLuma = 2880.0; 4100 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4101 MaximumSwathWidthSupportLuma = 3840.0; 4102 } else { 4103 MaximumSwathWidthSupportLuma = 5760.0; 4104 } 4105 4106 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4107 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4108 } else { 4109 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4110 } 4111 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4112 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4113 if (v->BytePerPixelC[k] == 0.0) { 4114 v->MaximumSwathWidthInLineBufferChroma = 0; 4115 } else { 4116 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4117 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4118 } 4119 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4120 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4121 } 4122 4123 CalculateSwathAndDETConfiguration( 4124 true, 4125 v->NumberOfActivePlanes, 4126 v->DETBufferSizeInKByte[0], 4127 v->MaximumSwathWidthLuma, 4128 v->MaximumSwathWidthChroma, 4129 v->SourceScan, 4130 v->SourcePixelFormat, 4131 v->SurfaceTiling, 4132 v->ViewportWidth, 4133 v->ViewportHeight, 4134 v->SurfaceWidthY, 4135 v->SurfaceWidthC, 4136 v->SurfaceHeightY, 4137 v->SurfaceHeightC, 4138 v->Read256BlockHeightY, 4139 v->Read256BlockHeightC, 4140 v->Read256BlockWidthY, 4141 v->Read256BlockWidthC, 4142 v->odm_combine_dummy, 4143 v->BlendingAndTiming, 4144 v->BytePerPixelY, 4145 v->BytePerPixelC, 4146 v->BytePerPixelInDETY, 4147 v->BytePerPixelInDETC, 4148 v->HActive, 4149 v->HRatio, 4150 v->HRatioChroma, 4151 v->NoOfDPPThisState, 4152 v->swath_width_luma_ub_this_state, 4153 v->swath_width_chroma_ub_this_state, 4154 v->SwathWidthYThisState, 4155 v->SwathWidthCThisState, 4156 v->SwathHeightYThisState, 4157 v->SwathHeightCThisState, 4158 v->DETBufferSizeYThisState, 4159 v->DETBufferSizeCThisState, 4160 v->SingleDPPViewportSizeSupportPerPlane, 4161 &v->ViewportSizeSupport[0][0]); 4162 4163 for (i = 0; i < v->soc.num_states; i++) { 4164 for (j = 0; j < 2; j++) { 4165 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4166 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4167 v->RequiredDISPCLK[i][j] = 0.0; 4168 v->DISPCLK_DPPCLK_Support[i][j] = true; 4169 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4170 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4171 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4172 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4173 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4174 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4175 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4176 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4177 } 4178 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4179 * (1 + v->DISPCLKRampingMargin / 100.0); 4180 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4181 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4182 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4183 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4184 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4185 } 4186 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4187 * (1 + v->DISPCLKRampingMargin / 100.0); 4188 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4189 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4190 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4191 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4192 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4193 } 4194 4195 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4196 || !(v->Output[k] == dm_dp || 4197 v->Output[k] == dm_dp2p0 || 4198 v->Output[k] == dm_edp)) { 4199 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4200 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4201 4202 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) 4203 FMTBufferExceeded = true; 4204 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4205 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4206 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4207 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4208 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4209 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4210 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4211 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4212 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4213 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4214 } else { 4215 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4216 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4217 } 4218 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH 4219 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4220 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) { 4221 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4222 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4223 } else { 4224 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4225 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4226 } 4227 } 4228 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH 4229 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4230 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) { 4231 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4232 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4233 4234 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH) 4235 FMTBufferExceeded = true; 4236 } else { 4237 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4238 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4239 } 4240 } 4241 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4242 v->MPCCombine[i][j][k] = false; 4243 v->NoOfDPP[i][j][k] = 4; 4244 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4245 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4246 v->MPCCombine[i][j][k] = false; 4247 v->NoOfDPP[i][j][k] = 2; 4248 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4249 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4250 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4251 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4252 v->MPCCombine[i][j][k] = false; 4253 v->NoOfDPP[i][j][k] = 1; 4254 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4255 } else { 4256 v->MPCCombine[i][j][k] = true; 4257 v->NoOfDPP[i][j][k] = 2; 4258 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4259 } 4260 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4261 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4262 > v->MaxDppclkRoundedDownToDFSGranularity) 4263 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4264 v->DISPCLK_DPPCLK_Support[i][j] = false; 4265 } 4266 } 4267 v->TotalNumberOfActiveDPP[i][j] = 0; 4268 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4269 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4270 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4271 if (v->NoOfDPP[i][j][k] == 1) 4272 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4273 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4274 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4275 NoChroma = false; 4276 } 4277 4278 // UPTO 4279 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4280 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4281 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4282 double BWOfNonSplitPlaneOfMaximumBandwidth; 4283 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4284 4285 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4286 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4287 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4288 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4289 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4290 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4291 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4292 } 4293 } 4294 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4295 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4296 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4297 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4298 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4299 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4300 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4301 } 4302 } 4303 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4304 v->RequiredDISPCLK[i][j] = 0.0; 4305 v->DISPCLK_DPPCLK_Support[i][j] = true; 4306 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4307 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4308 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4309 v->MPCCombine[i][j][k] = true; 4310 v->NoOfDPP[i][j][k] = 2; 4311 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4312 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4313 } else { 4314 v->MPCCombine[i][j][k] = false; 4315 v->NoOfDPP[i][j][k] = 1; 4316 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4317 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4318 } 4319 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4320 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4321 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4322 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4323 } else { 4324 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4325 } 4326 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4327 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4328 > v->MaxDppclkRoundedDownToDFSGranularity) 4329 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4330 v->DISPCLK_DPPCLK_Support[i][j] = false; 4331 } 4332 } 4333 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4334 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4335 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4336 } 4337 } 4338 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4339 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4340 v->DISPCLK_DPPCLK_Support[i][j] = false; 4341 } 4342 } 4343 } 4344 4345 /*Total Available Pipes Support Check*/ 4346 4347 for (i = 0; i < v->soc.num_states; i++) { 4348 for (j = 0; j < 2; j++) { 4349 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4350 v->TotalAvailablePipesSupport[i][j] = true; 4351 } else { 4352 v->TotalAvailablePipesSupport[i][j] = false; 4353 } 4354 } 4355 } 4356 /*Display IO and DSC Support Check*/ 4357 4358 v->NonsupportedDSCInputBPC = false; 4359 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4360 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4361 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4362 v->NonsupportedDSCInputBPC = true; 4363 } 4364 } 4365 4366 /*Number Of DSC Slices*/ 4367 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4368 if (v->BlendingAndTiming[k] == k) { 4369 if (v->PixelClockBackEnd[k] > 3200) { 4370 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4371 } else if (v->PixelClockBackEnd[k] > 1360) { 4372 v->NumberOfDSCSlices[k] = 8; 4373 } else if (v->PixelClockBackEnd[k] > 680) { 4374 v->NumberOfDSCSlices[k] = 4; 4375 } else if (v->PixelClockBackEnd[k] > 340) { 4376 v->NumberOfDSCSlices[k] = 2; 4377 } else { 4378 v->NumberOfDSCSlices[k] = 1; 4379 } 4380 } else { 4381 v->NumberOfDSCSlices[k] = 0; 4382 } 4383 } 4384 4385 for (i = 0; i < v->soc.num_states; i++) { 4386 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4387 v->RequiresDSC[i][k] = false; 4388 v->RequiresFEC[i][k] = false; 4389 if (v->BlendingAndTiming[k] == k) { 4390 if (v->Output[k] == dm_hdmi) { 4391 v->RequiresDSC[i][k] = false; 4392 v->RequiresFEC[i][k] = false; 4393 v->OutputBppPerState[i][k] = TruncToValidBPP( 4394 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4395 3, 4396 v->HTotal[k], 4397 v->HActive[k], 4398 v->PixelClockBackEnd[k], 4399 v->ForcedOutputLinkBPP[k], 4400 false, 4401 v->Output[k], 4402 v->OutputFormat[k], 4403 v->DSCInputBitPerComponent[k], 4404 v->NumberOfDSCSlices[k], 4405 v->AudioSampleRate[k], 4406 v->AudioSampleLayout[k], 4407 v->ODMCombineEnablePerState[i][k]); 4408 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) { 4409 if (v->DSCEnable[k] == true) { 4410 v->RequiresDSC[i][k] = true; 4411 v->LinkDSCEnable = true; 4412 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) { 4413 v->RequiresFEC[i][k] = true; 4414 } else { 4415 v->RequiresFEC[i][k] = false; 4416 } 4417 } else { 4418 v->RequiresDSC[i][k] = false; 4419 v->LinkDSCEnable = false; 4420 if (v->Output[k] == dm_dp2p0) { 4421 v->RequiresFEC[i][k] = true; 4422 } else { 4423 v->RequiresFEC[i][k] = false; 4424 } 4425 } 4426 if (v->Output[k] == dm_dp2p0) { 4427 v->Outbpp = BPP_INVALID; 4428 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) && 4429 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) { 4430 v->Outbpp = TruncToValidBPP( 4431 (1.0 - v->Downspreading / 100.0) * 10000, 4432 v->OutputLinkDPLanes[k], 4433 v->HTotal[k], 4434 v->HActive[k], 4435 v->PixelClockBackEnd[k], 4436 v->ForcedOutputLinkBPP[k], 4437 v->LinkDSCEnable, 4438 v->Output[k], 4439 v->OutputFormat[k], 4440 v->DSCInputBitPerComponent[k], 4441 v->NumberOfDSCSlices[k], 4442 v->AudioSampleRate[k], 4443 v->AudioSampleLayout[k], 4444 v->ODMCombineEnablePerState[i][k]); 4445 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 && 4446 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { 4447 v->RequiresDSC[i][k] = true; 4448 v->LinkDSCEnable = true; 4449 v->Outbpp = TruncToValidBPP( 4450 (1.0 - v->Downspreading / 100.0) * 10000, 4451 v->OutputLinkDPLanes[k], 4452 v->HTotal[k], 4453 v->HActive[k], 4454 v->PixelClockBackEnd[k], 4455 v->ForcedOutputLinkBPP[k], 4456 v->LinkDSCEnable, 4457 v->Output[k], 4458 v->OutputFormat[k], 4459 v->DSCInputBitPerComponent[k], 4460 v->NumberOfDSCSlices[k], 4461 v->AudioSampleRate[k], 4462 v->AudioSampleLayout[k], 4463 v->ODMCombineEnablePerState[i][k]); 4464 } 4465 v->OutputBppPerState[i][k] = v->Outbpp; 4466 // TODO: Need some other way to handle this nonsense 4467 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10" 4468 } 4469 if (v->Outbpp == BPP_INVALID && 4470 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) && 4471 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) { 4472 v->Outbpp = TruncToValidBPP( 4473 (1.0 - v->Downspreading / 100.0) * 13500, 4474 v->OutputLinkDPLanes[k], 4475 v->HTotal[k], 4476 v->HActive[k], 4477 v->PixelClockBackEnd[k], 4478 v->ForcedOutputLinkBPP[k], 4479 v->LinkDSCEnable, 4480 v->Output[k], 4481 v->OutputFormat[k], 4482 v->DSCInputBitPerComponent[k], 4483 v->NumberOfDSCSlices[k], 4484 v->AudioSampleRate[k], 4485 v->AudioSampleLayout[k], 4486 v->ODMCombineEnablePerState[i][k]); 4487 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 && 4488 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { 4489 v->RequiresDSC[i][k] = true; 4490 v->LinkDSCEnable = true; 4491 v->Outbpp = TruncToValidBPP( 4492 (1.0 - v->Downspreading / 100.0) * 13500, 4493 v->OutputLinkDPLanes[k], 4494 v->HTotal[k], 4495 v->HActive[k], 4496 v->PixelClockBackEnd[k], 4497 v->ForcedOutputLinkBPP[k], 4498 v->LinkDSCEnable, 4499 v->Output[k], 4500 v->OutputFormat[k], 4501 v->DSCInputBitPerComponent[k], 4502 v->NumberOfDSCSlices[k], 4503 v->AudioSampleRate[k], 4504 v->AudioSampleLayout[k], 4505 v->ODMCombineEnablePerState[i][k]); 4506 } 4507 v->OutputBppPerState[i][k] = v->Outbpp; 4508 // TODO: Need some other way to handle this nonsense 4509 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5" 4510 } 4511 if (v->Outbpp == BPP_INVALID && 4512 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) && 4513 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) { 4514 v->Outbpp = TruncToValidBPP( 4515 (1.0 - v->Downspreading / 100.0) * 20000, 4516 v->OutputLinkDPLanes[k], 4517 v->HTotal[k], 4518 v->HActive[k], 4519 v->PixelClockBackEnd[k], 4520 v->ForcedOutputLinkBPP[k], 4521 v->LinkDSCEnable, 4522 v->Output[k], 4523 v->OutputFormat[k], 4524 v->DSCInputBitPerComponent[k], 4525 v->NumberOfDSCSlices[k], 4526 v->AudioSampleRate[k], 4527 v->AudioSampleLayout[k], 4528 v->ODMCombineEnablePerState[i][k]); 4529 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true && 4530 v->ForcedOutputLinkBPP[k] == 0) { 4531 v->RequiresDSC[i][k] = true; 4532 v->LinkDSCEnable = true; 4533 v->Outbpp = TruncToValidBPP( 4534 (1.0 - v->Downspreading / 100.0) * 20000, 4535 v->OutputLinkDPLanes[k], 4536 v->HTotal[k], 4537 v->HActive[k], 4538 v->PixelClockBackEnd[k], 4539 v->ForcedOutputLinkBPP[k], 4540 v->LinkDSCEnable, 4541 v->Output[k], 4542 v->OutputFormat[k], 4543 v->DSCInputBitPerComponent[k], 4544 v->NumberOfDSCSlices[k], 4545 v->AudioSampleRate[k], 4546 v->AudioSampleLayout[k], 4547 v->ODMCombineEnablePerState[i][k]); 4548 } 4549 v->OutputBppPerState[i][k] = v->Outbpp; 4550 // TODO: Need some other way to handle this nonsense 4551 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20" 4552 } 4553 } else { 4554 v->Outbpp = BPP_INVALID; 4555 if (v->PHYCLKPerState[i] >= 270.0) { 4556 v->Outbpp = TruncToValidBPP( 4557 (1.0 - v->Downspreading / 100.0) * 2700, 4558 v->OutputLinkDPLanes[k], 4559 v->HTotal[k], 4560 v->HActive[k], 4561 v->PixelClockBackEnd[k], 4562 v->ForcedOutputLinkBPP[k], 4563 v->LinkDSCEnable, 4564 v->Output[k], 4565 v->OutputFormat[k], 4566 v->DSCInputBitPerComponent[k], 4567 v->NumberOfDSCSlices[k], 4568 v->AudioSampleRate[k], 4569 v->AudioSampleLayout[k], 4570 v->ODMCombineEnablePerState[i][k]); 4571 v->OutputBppPerState[i][k] = v->Outbpp; 4572 // TODO: Need some other way to handle this nonsense 4573 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4574 } 4575 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4576 v->Outbpp = TruncToValidBPP( 4577 (1.0 - v->Downspreading / 100.0) * 5400, 4578 v->OutputLinkDPLanes[k], 4579 v->HTotal[k], 4580 v->HActive[k], 4581 v->PixelClockBackEnd[k], 4582 v->ForcedOutputLinkBPP[k], 4583 v->LinkDSCEnable, 4584 v->Output[k], 4585 v->OutputFormat[k], 4586 v->DSCInputBitPerComponent[k], 4587 v->NumberOfDSCSlices[k], 4588 v->AudioSampleRate[k], 4589 v->AudioSampleLayout[k], 4590 v->ODMCombineEnablePerState[i][k]); 4591 v->OutputBppPerState[i][k] = v->Outbpp; 4592 // TODO: Need some other way to handle this nonsense 4593 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4594 } 4595 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4596 v->Outbpp = TruncToValidBPP( 4597 (1.0 - v->Downspreading / 100.0) * 8100, 4598 v->OutputLinkDPLanes[k], 4599 v->HTotal[k], 4600 v->HActive[k], 4601 v->PixelClockBackEnd[k], 4602 v->ForcedOutputLinkBPP[k], 4603 v->LinkDSCEnable, 4604 v->Output[k], 4605 v->OutputFormat[k], 4606 v->DSCInputBitPerComponent[k], 4607 v->NumberOfDSCSlices[k], 4608 v->AudioSampleRate[k], 4609 v->AudioSampleLayout[k], 4610 v->ODMCombineEnablePerState[i][k]); 4611 v->OutputBppPerState[i][k] = v->Outbpp; 4612 // TODO: Need some other way to handle this nonsense 4613 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4614 } 4615 } 4616 } 4617 } else { 4618 v->OutputBppPerState[i][k] = 0; 4619 } 4620 } 4621 } 4622 4623 for (i = 0; i < v->soc.num_states; i++) { 4624 v->LinkCapacitySupport[i] = true; 4625 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4626 if (v->BlendingAndTiming[k] == k 4627 && (v->Output[k] == dm_dp || 4628 v->Output[k] == dm_edp || 4629 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4630 v->LinkCapacitySupport[i] = false; 4631 } 4632 } 4633 } 4634 4635 // UPTO 2172 4636 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4637 if (v->BlendingAndTiming[k] == k 4638 && (v->Output[k] == dm_dp || 4639 v->Output[k] == dm_edp || 4640 v->Output[k] == dm_hdmi)) { 4641 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4642 P2IWith420 = true; 4643 } 4644 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4645 && !v->DSC422NativeSupport) { 4646 DSC422NativeNotSupported = true; 4647 } 4648 } 4649 } 4650 4651 4652 for (i = 0; i < v->soc.num_states; ++i) { 4653 v->ODMCombine4To1SupportCheckOK[i] = true; 4654 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4655 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4656 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4657 || v->Output[k] == dm_hdmi)) { 4658 v->ODMCombine4To1SupportCheckOK[i] = false; 4659 } 4660 } 4661 } 4662 4663 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4664 4665 for (i = 0; i < v->soc.num_states; i++) { 4666 v->NotEnoughDSCUnits[i] = false; 4667 v->TotalDSCUnitsRequired = 0.0; 4668 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4669 if (v->RequiresDSC[i][k] == true) { 4670 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4671 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4672 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4673 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4674 } else { 4675 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4676 } 4677 } 4678 } 4679 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4680 v->NotEnoughDSCUnits[i] = true; 4681 } 4682 } 4683 /*DSC Delay per state*/ 4684 4685 for (i = 0; i < v->soc.num_states; i++) { 4686 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4687 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4688 v->BPP = 0.0; 4689 } else { 4690 v->BPP = v->OutputBppPerState[i][k]; 4691 } 4692 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4693 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4694 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4695 v->DSCInputBitPerComponent[k], 4696 v->BPP, 4697 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4698 v->NumberOfDSCSlices[k], 4699 v->OutputFormat[k], 4700 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4701 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4702 v->DSCDelayPerState[i][k] = 2.0 4703 * (dscceComputeDelay( 4704 v->DSCInputBitPerComponent[k], 4705 v->BPP, 4706 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4707 v->NumberOfDSCSlices[k] / 2, 4708 v->OutputFormat[k], 4709 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4710 } else { 4711 v->DSCDelayPerState[i][k] = 4.0 4712 * (dscceComputeDelay( 4713 v->DSCInputBitPerComponent[k], 4714 v->BPP, 4715 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4716 v->NumberOfDSCSlices[k] / 4, 4717 v->OutputFormat[k], 4718 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4719 } 4720 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4721 } else { 4722 v->DSCDelayPerState[i][k] = 0.0; 4723 } 4724 } 4725 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4726 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4727 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4728 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4729 } 4730 } 4731 } 4732 } 4733 4734 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4735 // 4736 for (i = 0; i < v->soc.num_states; ++i) { 4737 for (j = 0; j <= 1; ++j) { 4738 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4739 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4740 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4741 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4742 } 4743 4744 CalculateSwathAndDETConfiguration( 4745 false, 4746 v->NumberOfActivePlanes, 4747 v->DETBufferSizeInKByte[0], 4748 v->MaximumSwathWidthLuma, 4749 v->MaximumSwathWidthChroma, 4750 v->SourceScan, 4751 v->SourcePixelFormat, 4752 v->SurfaceTiling, 4753 v->ViewportWidth, 4754 v->ViewportHeight, 4755 v->SurfaceWidthY, 4756 v->SurfaceWidthC, 4757 v->SurfaceHeightY, 4758 v->SurfaceHeightC, 4759 v->Read256BlockHeightY, 4760 v->Read256BlockHeightC, 4761 v->Read256BlockWidthY, 4762 v->Read256BlockWidthC, 4763 v->ODMCombineEnableThisState, 4764 v->BlendingAndTiming, 4765 v->BytePerPixelY, 4766 v->BytePerPixelC, 4767 v->BytePerPixelInDETY, 4768 v->BytePerPixelInDETC, 4769 v->HActive, 4770 v->HRatio, 4771 v->HRatioChroma, 4772 v->NoOfDPPThisState, 4773 v->swath_width_luma_ub_this_state, 4774 v->swath_width_chroma_ub_this_state, 4775 v->SwathWidthYThisState, 4776 v->SwathWidthCThisState, 4777 v->SwathHeightYThisState, 4778 v->SwathHeightCThisState, 4779 v->DETBufferSizeYThisState, 4780 v->DETBufferSizeCThisState, 4781 v->dummystring, 4782 &v->ViewportSizeSupport[i][j]); 4783 4784 CalculateDCFCLKDeepSleep( 4785 mode_lib, 4786 v->NumberOfActivePlanes, 4787 v->BytePerPixelY, 4788 v->BytePerPixelC, 4789 v->VRatio, 4790 v->VRatioChroma, 4791 v->SwathWidthYThisState, 4792 v->SwathWidthCThisState, 4793 v->NoOfDPPThisState, 4794 v->HRatio, 4795 v->HRatioChroma, 4796 v->PixelClock, 4797 v->PSCL_FACTOR, 4798 v->PSCL_FACTOR_CHROMA, 4799 v->RequiredDPPCLKThisState, 4800 v->ReadBandwidthLuma, 4801 v->ReadBandwidthChroma, 4802 v->ReturnBusWidth, 4803 &v->ProjectedDCFCLKDeepSleep[i][j]); 4804 4805 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4806 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4807 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4808 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4809 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4810 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4811 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4812 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4813 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4814 } 4815 } 4816 } 4817 4818 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4819 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4820 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4821 } 4822 4823 for (i = 0; i < v->soc.num_states; i++) { 4824 for (j = 0; j < 2; j++) { 4825 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4826 4827 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4828 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4829 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4830 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4831 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4832 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4833 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4834 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4835 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4836 } 4837 4838 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4839 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4840 if (v->DCCEnable[k] == true) { 4841 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4842 } 4843 } 4844 4845 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4846 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4847 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4848 4849 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4850 && v->SourceScan[k] != dm_vert) { 4851 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4852 / 2; 4853 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4854 } else { 4855 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4856 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4857 } 4858 4859 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4860 mode_lib, 4861 v->DCCEnable[k], 4862 v->Read256BlockHeightC[k], 4863 v->Read256BlockWidthC[k], 4864 v->SourcePixelFormat[k], 4865 v->SurfaceTiling[k], 4866 v->BytePerPixelC[k], 4867 v->SourceScan[k], 4868 v->SwathWidthCThisState[k], 4869 v->ViewportHeightChroma[k], 4870 v->GPUVMEnable, 4871 v->HostVMEnable, 4872 v->HostVMMaxNonCachedPageTableLevels, 4873 v->GPUVMMinPageSize, 4874 v->HostVMMinPageSize, 4875 v->PTEBufferSizeInRequestsForChroma, 4876 v->PitchC[k], 4877 0.0, 4878 &v->MacroTileWidthC[k], 4879 &v->MetaRowBytesC, 4880 &v->DPTEBytesPerRowC, 4881 &v->PTEBufferSizeNotExceededC[i][j][k], 4882 &v->dummyinteger7, 4883 &v->dpte_row_height_chroma[k], 4884 &v->dummyinteger28, 4885 &v->dummyinteger26, 4886 &v->dummyinteger23, 4887 &v->meta_row_height_chroma[k], 4888 &v->dummyinteger8, 4889 &v->dummyinteger9, 4890 &v->dummyinteger19, 4891 &v->dummyinteger20, 4892 &v->dummyinteger17, 4893 &v->dummyinteger10, 4894 &v->dummyinteger11); 4895 4896 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4897 mode_lib, 4898 v->VRatioChroma[k], 4899 v->VTAPsChroma[k], 4900 v->Interlace[k], 4901 v->ProgressiveToInterlaceUnitInOPP, 4902 v->SwathHeightCThisState[k], 4903 v->ViewportYStartC[k], 4904 &v->PrefillC[k], 4905 &v->MaxNumSwC[k]); 4906 } else { 4907 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4908 v->PTEBufferSizeInRequestsForChroma = 0; 4909 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4910 v->MetaRowBytesC = 0.0; 4911 v->DPTEBytesPerRowC = 0.0; 4912 v->PrefetchLinesC[i][j][k] = 0.0; 4913 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4914 } 4915 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4916 mode_lib, 4917 v->DCCEnable[k], 4918 v->Read256BlockHeightY[k], 4919 v->Read256BlockWidthY[k], 4920 v->SourcePixelFormat[k], 4921 v->SurfaceTiling[k], 4922 v->BytePerPixelY[k], 4923 v->SourceScan[k], 4924 v->SwathWidthYThisState[k], 4925 v->ViewportHeight[k], 4926 v->GPUVMEnable, 4927 v->HostVMEnable, 4928 v->HostVMMaxNonCachedPageTableLevels, 4929 v->GPUVMMinPageSize, 4930 v->HostVMMinPageSize, 4931 v->PTEBufferSizeInRequestsForLuma, 4932 v->PitchY[k], 4933 v->DCCMetaPitchY[k], 4934 &v->MacroTileWidthY[k], 4935 &v->MetaRowBytesY, 4936 &v->DPTEBytesPerRowY, 4937 &v->PTEBufferSizeNotExceededY[i][j][k], 4938 &v->dummyinteger7, 4939 &v->dpte_row_height[k], 4940 &v->dummyinteger29, 4941 &v->dummyinteger27, 4942 &v->dummyinteger24, 4943 &v->meta_row_height[k], 4944 &v->dummyinteger25, 4945 &v->dpte_group_bytes[k], 4946 &v->dummyinteger21, 4947 &v->dummyinteger22, 4948 &v->dummyinteger18, 4949 &v->dummyinteger5, 4950 &v->dummyinteger6); 4951 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4952 mode_lib, 4953 v->VRatio[k], 4954 v->vtaps[k], 4955 v->Interlace[k], 4956 v->ProgressiveToInterlaceUnitInOPP, 4957 v->SwathHeightYThisState[k], 4958 v->ViewportYStartY[k], 4959 &v->PrefillY[k], 4960 &v->MaxNumSwY[k]); 4961 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4962 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4963 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 4964 4965 CalculateRowBandwidth( 4966 v->GPUVMEnable, 4967 v->SourcePixelFormat[k], 4968 v->VRatio[k], 4969 v->VRatioChroma[k], 4970 v->DCCEnable[k], 4971 v->HTotal[k] / v->PixelClock[k], 4972 v->MetaRowBytesY, 4973 v->MetaRowBytesC, 4974 v->meta_row_height[k], 4975 v->meta_row_height_chroma[k], 4976 v->DPTEBytesPerRowY, 4977 v->DPTEBytesPerRowC, 4978 v->dpte_row_height[k], 4979 v->dpte_row_height_chroma[k], 4980 &v->meta_row_bandwidth[i][j][k], 4981 &v->dpte_row_bandwidth[i][j][k]); 4982 } 4983 /* 4984 * DCCMetaBufferSizeSupport(i, j) = True 4985 * For k = 0 To NumberOfActivePlanes - 1 4986 * If MetaRowBytes(i, j, k) > 24064 Then 4987 * DCCMetaBufferSizeSupport(i, j) = False 4988 * End If 4989 * Next k 4990 */ 4991 v->DCCMetaBufferSizeSupport[i][j] = true; 4992 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4993 if (v->MetaRowBytes[i][j][k] > 24064) 4994 v->DCCMetaBufferSizeSupport[i][j] = false; 4995 } 4996 v->UrgLatency[i] = CalculateUrgentLatency( 4997 v->UrgentLatencyPixelDataOnly, 4998 v->UrgentLatencyPixelMixedWithVMData, 4999 v->UrgentLatencyVMDataOnly, 5000 v->DoUrgentLatencyAdjustment, 5001 v->UrgentLatencyAdjustmentFabricClockComponent, 5002 v->UrgentLatencyAdjustmentFabricClockReference, 5003 v->FabricClockPerState[i]); 5004 5005 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5006 CalculateUrgentBurstFactor( 5007 v->swath_width_luma_ub_this_state[k], 5008 v->swath_width_chroma_ub_this_state[k], 5009 v->SwathHeightYThisState[k], 5010 v->SwathHeightCThisState[k], 5011 v->HTotal[k] / v->PixelClock[k], 5012 v->UrgLatency[i], 5013 v->CursorBufferSize, 5014 v->CursorWidth[k][0], 5015 v->CursorBPP[k][0], 5016 v->VRatio[k], 5017 v->VRatioChroma[k], 5018 v->BytePerPixelInDETY[k], 5019 v->BytePerPixelInDETC[k], 5020 v->DETBufferSizeYThisState[k], 5021 v->DETBufferSizeCThisState[k], 5022 &v->UrgentBurstFactorCursor[k], 5023 &v->UrgentBurstFactorLuma[k], 5024 &v->UrgentBurstFactorChroma[k], 5025 &NotUrgentLatencyHiding[k]); 5026 } 5027 5028 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 5029 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5030 if (NotUrgentLatencyHiding[k]) { 5031 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 5032 } 5033 } 5034 5035 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5036 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 5037 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 5038 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 5039 } 5040 5041 v->TotalVActivePixelBandwidth[i][j] = 0; 5042 v->TotalVActiveCursorBandwidth[i][j] = 0; 5043 v->TotalMetaRowBandwidth[i][j] = 0; 5044 v->TotalDPTERowBandwidth[i][j] = 0; 5045 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5046 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 5047 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 5048 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 5049 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 5050 } 5051 } 5052 } 5053 5054 //Calculate Return BW 5055 for (i = 0; i < v->soc.num_states; ++i) { 5056 for (j = 0; j <= 1; ++j) { 5057 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5058 if (v->BlendingAndTiming[k] == k) { 5059 if (v->WritebackEnable[k] == true) { 5060 v->WritebackDelayTime[k] = v->WritebackLatency 5061 + CalculateWriteBackDelay( 5062 v->WritebackPixelFormat[k], 5063 v->WritebackHRatio[k], 5064 v->WritebackVRatio[k], 5065 v->WritebackVTaps[k], 5066 v->WritebackDestinationWidth[k], 5067 v->WritebackDestinationHeight[k], 5068 v->WritebackSourceHeight[k], 5069 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 5070 } else { 5071 v->WritebackDelayTime[k] = 0.0; 5072 } 5073 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5074 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 5075 v->WritebackDelayTime[k] = dml_max( 5076 v->WritebackDelayTime[k], 5077 v->WritebackLatency 5078 + CalculateWriteBackDelay( 5079 v->WritebackPixelFormat[m], 5080 v->WritebackHRatio[m], 5081 v->WritebackVRatio[m], 5082 v->WritebackVTaps[m], 5083 v->WritebackDestinationWidth[m], 5084 v->WritebackDestinationHeight[m], 5085 v->WritebackSourceHeight[m], 5086 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 5087 } 5088 } 5089 } 5090 } 5091 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5092 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5093 if (v->BlendingAndTiming[k] == m) { 5094 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 5095 } 5096 } 5097 } 5098 v->MaxMaxVStartup[i][j] = 0; 5099 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5100 v->MaximumVStartup[i][j][k] = 5101 CalculateMaxVStartup( 5102 v->VTotal[k], 5103 v->VActive[k], 5104 v->VBlankNom[k], 5105 v->HTotal[k], 5106 v->PixelClock[k], 5107 v->ProgressiveToInterlaceUnitInOPP, 5108 v->Interlace[k], 5109 v->ip.VBlankNomDefaultUS, 5110 v->WritebackDelayTime[k]); 5111 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 5112 } 5113 } 5114 } 5115 5116 ReorderingBytes = v->NumberOfChannels 5117 * dml_max3( 5118 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 5119 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 5120 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 5121 5122 for (i = 0; i < v->soc.num_states; ++i) { 5123 for (j = 0; j <= 1; ++j) { 5124 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 5125 } 5126 } 5127 5128 if (v->UseMinimumRequiredDCFCLK == true) 5129 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); 5130 5131 for (i = 0; i < v->soc.num_states; ++i) { 5132 for (j = 0; j <= 1; ++j) { 5133 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 5134 v->ReturnBusWidth * v->DCFCLKState[i][j], 5135 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 5136 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 5137 double PixelDataOnlyReturnBWPerState = dml_min( 5138 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5139 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 5140 double PixelMixedWithVMDataReturnBWPerState = dml_min( 5141 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5142 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 5143 5144 if (v->HostVMEnable != true) { 5145 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 5146 } else { 5147 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 5148 } 5149 } 5150 } 5151 5152 //Re-ordering Buffer Support Check 5153 for (i = 0; i < v->soc.num_states; ++i) { 5154 for (j = 0; j <= 1; ++j) { 5155 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 5156 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 5157 v->ROBSupport[i][j] = true; 5158 } else { 5159 v->ROBSupport[i][j] = false; 5160 } 5161 } 5162 } 5163 5164 //Vertical Active BW support check 5165 5166 MaxTotalVActiveRDBandwidth = 0; 5167 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5168 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 5169 } 5170 5171 for (i = 0; i < v->soc.num_states; ++i) { 5172 for (j = 0; j <= 1; ++j) { 5173 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 5174 dml_min( 5175 v->ReturnBusWidth * v->DCFCLKState[i][j], 5176 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5177 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 5178 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5179 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 5180 5181 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 5182 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 5183 } else { 5184 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 5185 } 5186 } 5187 } 5188 5189 v->UrgentLatency = CalculateUrgentLatency( 5190 v->UrgentLatencyPixelDataOnly, 5191 v->UrgentLatencyPixelMixedWithVMData, 5192 v->UrgentLatencyVMDataOnly, 5193 v->DoUrgentLatencyAdjustment, 5194 v->UrgentLatencyAdjustmentFabricClockComponent, 5195 v->UrgentLatencyAdjustmentFabricClockReference, 5196 v->FabricClock); 5197 //Prefetch Check 5198 for (i = 0; i < v->soc.num_states; ++i) { 5199 for (j = 0; j <= 1; ++j) { 5200 double VMDataOnlyReturnBWPerState; 5201 double HostVMInefficiencyFactor = 1; 5202 int NextPrefetchModeState = MinPrefetchMode; 5203 bool UnboundedRequestEnabledThisState = false; 5204 int CompressedBufferSizeInkByteThisState = 0; 5205 double dummy; 5206 5207 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5208 5209 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5210 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5211 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5212 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5213 } 5214 5215 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5216 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5217 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5218 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5219 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5220 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5221 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5222 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5223 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5224 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5225 } 5226 5227 VMDataOnlyReturnBWPerState = dml_min( 5228 dml_min( 5229 v->ReturnBusWidth * v->DCFCLKState[i][j], 5230 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5231 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5232 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5233 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5234 if (v->GPUVMEnable && v->HostVMEnable) 5235 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5236 5237 v->ExtraLatency = CalculateExtraLatency( 5238 v->RoundTripPingLatencyCycles, 5239 ReorderingBytes, 5240 v->DCFCLKState[i][j], 5241 v->TotalNumberOfActiveDPP[i][j], 5242 v->PixelChunkSizeInKByte, 5243 v->TotalNumberOfDCCActiveDPP[i][j], 5244 v->MetaChunkSize, 5245 v->ReturnBWPerState[i][j], 5246 v->GPUVMEnable, 5247 v->HostVMEnable, 5248 v->NumberOfActivePlanes, 5249 v->NoOfDPPThisState, 5250 v->dpte_group_bytes, 5251 HostVMInefficiencyFactor, 5252 v->HostVMMinPageSize, 5253 v->HostVMMaxNonCachedPageTableLevels); 5254 5255 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5256 do { 5257 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5258 v->MaxVStartup = v->NextMaxVStartup; 5259 5260 v->TWait = CalculateTWait( 5261 v->PrefetchModePerState[i][j], 5262 v->DRAMClockChangeLatency, 5263 v->UrgLatency[i], 5264 v->SREnterPlusExitTime); 5265 5266 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5267 CalculatePrefetchSchedulePerPlane(mode_lib, 5268 HostVMInefficiencyFactor, 5269 i, j, k); 5270 } 5271 5272 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5273 CalculateUrgentBurstFactor( 5274 v->swath_width_luma_ub_this_state[k], 5275 v->swath_width_chroma_ub_this_state[k], 5276 v->SwathHeightYThisState[k], 5277 v->SwathHeightCThisState[k], 5278 v->HTotal[k] / v->PixelClock[k], 5279 v->UrgLatency[i], 5280 v->CursorBufferSize, 5281 v->CursorWidth[k][0], 5282 v->CursorBPP[k][0], 5283 v->VRatioPreY[i][j][k], 5284 v->VRatioPreC[i][j][k], 5285 v->BytePerPixelInDETY[k], 5286 v->BytePerPixelInDETC[k], 5287 v->DETBufferSizeYThisState[k], 5288 v->DETBufferSizeCThisState[k], 5289 &v->UrgentBurstFactorCursorPre[k], 5290 &v->UrgentBurstFactorLumaPre[k], 5291 &v->UrgentBurstFactorChromaPre[k], 5292 &v->NotUrgentLatencyHidingPre[k]); 5293 } 5294 5295 v->MaximumReadBandwidthWithPrefetch = 0.0; 5296 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5297 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5298 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5299 5300 v->MaximumReadBandwidthWithPrefetch = 5301 v->MaximumReadBandwidthWithPrefetch 5302 + dml_max3( 5303 v->VActivePixelBandwidth[i][j][k] 5304 + v->VActiveCursorBandwidth[i][j][k] 5305 + v->NoOfDPP[i][j][k] 5306 * (v->meta_row_bandwidth[i][j][k] 5307 + v->dpte_row_bandwidth[i][j][k]), 5308 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5309 v->NoOfDPP[i][j][k] 5310 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5311 * v->UrgentBurstFactorLumaPre[k] 5312 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5313 * v->UrgentBurstFactorChromaPre[k]) 5314 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5315 } 5316 5317 v->NotEnoughUrgentLatencyHidingPre = false; 5318 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5319 if (v->NotUrgentLatencyHidingPre[k] == true) { 5320 v->NotEnoughUrgentLatencyHidingPre = true; 5321 } 5322 } 5323 5324 v->PrefetchSupported[i][j] = true; 5325 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5326 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5327 v->PrefetchSupported[i][j] = false; 5328 } 5329 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5330 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5331 || v->NoTimeForPrefetch[i][j][k] == true) { 5332 v->PrefetchSupported[i][j] = false; 5333 } 5334 } 5335 5336 v->DynamicMetadataSupported[i][j] = true; 5337 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5338 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5339 v->DynamicMetadataSupported[i][j] = false; 5340 } 5341 } 5342 5343 v->VRatioInPrefetchSupported[i][j] = true; 5344 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5345 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5346 v->VRatioInPrefetchSupported[i][j] = false; 5347 } 5348 } 5349 v->AnyLinesForVMOrRowTooLarge = false; 5350 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5351 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5352 v->AnyLinesForVMOrRowTooLarge = true; 5353 } 5354 } 5355 5356 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5357 5358 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5359 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5360 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5361 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5362 - dml_max( 5363 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5364 v->NoOfDPP[i][j][k] 5365 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5366 * v->UrgentBurstFactorLumaPre[k] 5367 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5368 * v->UrgentBurstFactorChromaPre[k]) 5369 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5370 } 5371 v->TotImmediateFlipBytes = 0.0; 5372 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5373 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5374 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5375 + v->DPTEBytesPerRow[i][j][k]); 5376 } 5377 5378 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5379 CalculateFlipSchedule( 5380 mode_lib, 5381 k, 5382 HostVMInefficiencyFactor, 5383 v->ExtraLatency, 5384 v->UrgLatency[i], 5385 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5386 v->MetaRowBytes[i][j][k], 5387 v->DPTEBytesPerRow[i][j][k]); 5388 } 5389 v->total_dcn_read_bw_with_flip = 0.0; 5390 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5391 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5392 + dml_max3( 5393 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5394 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5395 + v->VActiveCursorBandwidth[i][j][k], 5396 v->NoOfDPP[i][j][k] 5397 * (v->final_flip_bw[k] 5398 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5399 * v->UrgentBurstFactorLumaPre[k] 5400 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5401 * v->UrgentBurstFactorChromaPre[k]) 5402 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5403 } 5404 v->ImmediateFlipSupportedForState[i][j] = true; 5405 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5406 v->ImmediateFlipSupportedForState[i][j] = false; 5407 } 5408 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5409 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5410 v->ImmediateFlipSupportedForState[i][j] = false; 5411 } 5412 } 5413 } else { 5414 v->ImmediateFlipSupportedForState[i][j] = false; 5415 } 5416 5417 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5418 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5419 NextPrefetchModeState = NextPrefetchModeState + 1; 5420 } else { 5421 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5422 } 5423 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5424 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5425 && ((v->HostVMEnable == false && 5426 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5427 || v->ImmediateFlipSupportedForState[i][j] == true)) 5428 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5429 5430 CalculateUnboundedRequestAndCompressedBufferSize( 5431 v->DETBufferSizeInKByte[0], 5432 v->ConfigReturnBufferSizeInKByte, 5433 v->UseUnboundedRequesting, 5434 v->TotalNumberOfActiveDPP[i][j], 5435 NoChroma, 5436 v->MaxNumDPP, 5437 v->CompressedBufferSegmentSizeInkByte, 5438 v->Output, 5439 &UnboundedRequestEnabledThisState, 5440 &CompressedBufferSizeInkByteThisState); 5441 5442 CalculateWatermarksAndDRAMSpeedChangeSupport( 5443 mode_lib, 5444 v->PrefetchModePerState[i][j], 5445 v->DCFCLKState[i][j], 5446 v->ReturnBWPerState[i][j], 5447 v->UrgLatency[i], 5448 v->ExtraLatency, 5449 v->SOCCLKPerState[i], 5450 v->ProjectedDCFCLKDeepSleep[i][j], 5451 v->DETBufferSizeYThisState, 5452 v->DETBufferSizeCThisState, 5453 v->SwathHeightYThisState, 5454 v->SwathHeightCThisState, 5455 v->SwathWidthYThisState, 5456 v->SwathWidthCThisState, 5457 v->NoOfDPPThisState, 5458 v->BytePerPixelInDETY, 5459 v->BytePerPixelInDETC, 5460 UnboundedRequestEnabledThisState, 5461 CompressedBufferSizeInkByteThisState, 5462 &v->DRAMClockChangeSupport[i][j], 5463 &dummy, 5464 &dummy, 5465 &dummy, 5466 &dummy); 5467 } 5468 } 5469 5470 /*PTE Buffer Size Check*/ 5471 for (i = 0; i < v->soc.num_states; i++) { 5472 for (j = 0; j < 2; j++) { 5473 v->PTEBufferSizeNotExceeded[i][j] = true; 5474 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5475 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5476 v->PTEBufferSizeNotExceeded[i][j] = false; 5477 } 5478 } 5479 } 5480 } 5481 5482 /*Cursor Support Check*/ 5483 v->CursorSupport = true; 5484 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5485 if (v->CursorWidth[k][0] > 0.0) { 5486 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5487 v->CursorSupport = false; 5488 } 5489 } 5490 } 5491 5492 /*Valid Pitch Check*/ 5493 v->PitchSupport = true; 5494 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5495 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5496 if (v->DCCEnable[k] == true) { 5497 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5498 } else { 5499 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5500 } 5501 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5502 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5503 && v->SourcePixelFormat[k] != dm_mono_8) { 5504 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5505 if (v->DCCEnable[k] == true) { 5506 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5507 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5508 64.0 * v->Read256BlockWidthC[k]); 5509 } else { 5510 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5511 } 5512 } else { 5513 v->AlignedCPitch[k] = v->PitchC[k]; 5514 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5515 } 5516 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5517 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5518 v->PitchSupport = false; 5519 } 5520 } 5521 5522 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5523 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5524 ViewportExceedsSurface = true; 5525 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5526 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5527 && v->SourcePixelFormat[k] != dm_rgbe) { 5528 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5529 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5530 ViewportExceedsSurface = true; 5531 } 5532 } 5533 } 5534 } 5535 5536 /*Mode Support, Voltage State and SOC Configuration*/ 5537 for (i = v->soc.num_states - 1; i >= 0; i--) { 5538 for (j = 0; j < 2; j++) { 5539 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5540 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5541 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5542 && v->DTBCLKRequiredMoreThanSupported[i] == false 5543 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5544 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5545 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5546 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5547 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5548 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5549 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5550 && ((v->HostVMEnable == false 5551 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5552 || v->ImmediateFlipSupportedForState[i][j] == true) 5553 && FMTBufferExceeded == false) { 5554 v->ModeSupport[i][j] = true; 5555 } else { 5556 v->ModeSupport[i][j] = false; 5557 } 5558 } 5559 } 5560 for (i = v->soc.num_states; i >= 0; i--) { 5561 for (j = 0; j < 2; j++) { 5562 enum dm_validation_status status = DML_VALIDATION_OK; 5563 5564 if (!v->ScaleRatioAndTapsSupport) { 5565 status = DML_FAIL_SCALE_RATIO_TAP; 5566 } else if (!v->SourceFormatPixelAndScanSupport) { 5567 status = DML_FAIL_SOURCE_PIXEL_FORMAT; 5568 } else if (!v->ViewportSizeSupport[i][j]) { 5569 status = DML_FAIL_VIEWPORT_SIZE; 5570 } else if (P2IWith420) { 5571 status = DML_FAIL_P2I_WITH_420; 5572 } else if (DSCOnlyIfNecessaryWithBPP) { 5573 status = DML_FAIL_DSC_ONLY_IF_NECESSARY_WITH_BPP; 5574 } else if (DSC422NativeNotSupported) { 5575 status = DML_FAIL_NOT_DSC422_NATIVE; 5576 } else if (!v->ODMCombine4To1SupportCheckOK[i]) { 5577 status = DML_FAIL_ODM_COMBINE4TO1; 5578 } else if (v->NotEnoughDSCUnits[i]) { 5579 status = DML_FAIL_NOT_ENOUGH_DSC; 5580 } else if (!v->ROBSupport[i][j]) { 5581 status = DML_FAIL_REORDERING_BUFFER; 5582 } else if (!v->DISPCLK_DPPCLK_Support[i][j]) { 5583 status = DML_FAIL_DISPCLK_DPPCLK; 5584 } else if (!v->TotalAvailablePipesSupport[i][j]) { 5585 status = DML_FAIL_TOTAL_AVAILABLE_PIPES; 5586 } else if (!EnoughWritebackUnits) { 5587 status = DML_FAIL_ENOUGH_WRITEBACK_UNITS; 5588 } else if (!v->WritebackLatencySupport) { 5589 status = DML_FAIL_WRITEBACK_LATENCY; 5590 } else if (!v->WritebackScaleRatioAndTapsSupport) { 5591 status = DML_FAIL_WRITEBACK_SCALE_RATIO_TAP; 5592 } else if (!v->CursorSupport) { 5593 status = DML_FAIL_CURSOR_SUPPORT; 5594 } else if (!v->PitchSupport) { 5595 status = DML_FAIL_PITCH_SUPPORT; 5596 } else if (ViewportExceedsSurface) { 5597 status = DML_FAIL_VIEWPORT_EXCEEDS_SURFACE; 5598 } else if (!v->PrefetchSupported[i][j]) { 5599 status = DML_FAIL_PREFETCH_SUPPORT; 5600 } else if (!v->DynamicMetadataSupported[i][j]) { 5601 status = DML_FAIL_DYNAMIC_METADATA; 5602 } else if (!v->TotalVerticalActiveBandwidthSupport[i][j]) { 5603 status = DML_FAIL_TOTAL_V_ACTIVE_BW; 5604 } else if (!v->VRatioInPrefetchSupported[i][j]) { 5605 status = DML_FAIL_V_RATIO_PREFETCH; 5606 } else if (!v->PTEBufferSizeNotExceeded[i][j]) { 5607 status = DML_FAIL_PTE_BUFFER_SIZE; 5608 } else if (v->NonsupportedDSCInputBPC) { 5609 status = DML_FAIL_DSC_INPUT_BPC; 5610 } else if ((v->HostVMEnable 5611 && !v->ImmediateFlipSupportedForState[i][j])) { 5612 status = DML_FAIL_HOST_VM_IMMEDIATE_FLIP; 5613 } else if (FMTBufferExceeded) { 5614 status = DML_FAIL_FMT_BUFFER_EXCEEDED; 5615 } 5616 mode_lib->vba.ValidationStatus[i] = status; 5617 } 5618 } 5619 5620 { 5621 unsigned int MaximumMPCCombine = 0; 5622 5623 for (i = v->soc.num_states; i >= 0; i--) { 5624 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5625 v->VoltageLevel = i; 5626 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5627 if (v->ModeSupport[i][0] == true) { 5628 MaximumMPCCombine = 0; 5629 } else { 5630 MaximumMPCCombine = 1; 5631 } 5632 } 5633 } 5634 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5635 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5636 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5637 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5638 } 5639 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5640 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5641 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5642 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5643 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5644 v->maxMpcComb = MaximumMPCCombine; 5645 } 5646 } 5647 5648 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5649 struct display_mode_lib *mode_lib, 5650 unsigned int PrefetchMode, 5651 double DCFCLK, 5652 double ReturnBW, 5653 double UrgentLatency, 5654 double ExtraLatency, 5655 double SOCCLK, 5656 double DCFCLKDeepSleep, 5657 unsigned int DETBufferSizeY[], 5658 unsigned int DETBufferSizeC[], 5659 unsigned int SwathHeightY[], 5660 unsigned int SwathHeightC[], 5661 double SwathWidthY[], 5662 double SwathWidthC[], 5663 unsigned int DPPPerPlane[], 5664 double BytePerPixelDETY[], 5665 double BytePerPixelDETC[], 5666 bool UnboundedRequestEnabled, 5667 unsigned int CompressedBufferSizeInkByte, 5668 enum clock_change_support *DRAMClockChangeSupport, 5669 double *StutterExitWatermark, 5670 double *StutterEnterPlusExitWatermark, 5671 double *Z8StutterExitWatermark, 5672 double *Z8StutterEnterPlusExitWatermark) 5673 { 5674 struct vba_vars_st *v = &mode_lib->vba; 5675 double EffectiveLBLatencyHidingY; 5676 double EffectiveLBLatencyHidingC; 5677 double LinesInDETY[DC__NUM_DPP__MAX]; 5678 double LinesInDETC; 5679 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5680 unsigned int LinesInDETCRoundedDownToSwath; 5681 double FullDETBufferingTimeY; 5682 double FullDETBufferingTimeC; 5683 double ActiveDRAMClockChangeLatencyMarginY; 5684 double ActiveDRAMClockChangeLatencyMarginC; 5685 double WritebackDRAMClockChangeLatencyMargin; 5686 double PlaneWithMinActiveDRAMClockChangeMargin; 5687 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5688 double WritebackDRAMClockChangeLatencyHiding; 5689 double TotalPixelBW = 0.0; 5690 int k, j; 5691 5692 v->UrgentWatermark = UrgentLatency + ExtraLatency; 5693 5694 #ifdef __DML_VBA_DEBUG__ 5695 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5696 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5697 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); 5698 #endif 5699 5700 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; 5701 5702 #ifdef __DML_VBA_DEBUG__ 5703 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); 5704 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); 5705 #endif 5706 5707 v->TotalActiveWriteback = 0; 5708 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5709 if (v->WritebackEnable[k] == true) { 5710 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5711 } 5712 } 5713 5714 if (v->TotalActiveWriteback <= 1) { 5715 v->WritebackUrgentWatermark = v->WritebackLatency; 5716 } else { 5717 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5718 } 5719 5720 if (v->TotalActiveWriteback <= 1) { 5721 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; 5722 } else { 5723 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5724 } 5725 5726 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5727 TotalPixelBW = TotalPixelBW 5728 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) 5729 / (v->HTotal[k] / v->PixelClock[k]); 5730 } 5731 5732 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5733 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5734 5735 v->LBLatencyHidingSourceLinesY = dml_min( 5736 (double) v->MaxLineBufferLines, 5737 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 5738 5739 v->LBLatencyHidingSourceLinesC = dml_min( 5740 (double) v->MaxLineBufferLines, 5741 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 5742 5743 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 5744 5745 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 5746 5747 if (UnboundedRequestEnabled) { 5748 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5749 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 5750 } 5751 5752 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5753 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5754 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 5755 if (BytePerPixelDETC[k] > 0) { 5756 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5757 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5758 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; 5759 } else { 5760 LinesInDETC = 0; 5761 FullDETBufferingTimeC = 999999; 5762 } 5763 5764 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5765 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5766 5767 if (v->NumberOfActivePlanes > 1) { 5768 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5769 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; 5770 } 5771 5772 if (BytePerPixelDETC[k] > 0) { 5773 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5774 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5775 5776 if (v->NumberOfActivePlanes > 1) { 5777 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5778 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; 5779 } 5780 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5781 } else { 5782 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5783 } 5784 5785 if (v->WritebackEnable[k] == true) { 5786 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 5787 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 5788 if (v->WritebackPixelFormat[k] == dm_444_64) { 5789 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5790 } 5791 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5792 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5793 } 5794 } 5795 5796 v->MinActiveDRAMClockChangeMargin = 999999; 5797 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5798 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5799 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5800 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5801 if (v->BlendingAndTiming[k] == k) { 5802 PlaneWithMinActiveDRAMClockChangeMargin = k; 5803 } else { 5804 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 5805 if (v->BlendingAndTiming[k] == j) { 5806 PlaneWithMinActiveDRAMClockChangeMargin = j; 5807 } 5808 } 5809 } 5810 } 5811 } 5812 5813 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; 5814 5815 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5816 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5817 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5818 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5819 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5820 } 5821 } 5822 5823 v->TotalNumberOfActiveOTG = 0; 5824 5825 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5826 if (v->BlendingAndTiming[k] == k) { 5827 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5828 } 5829 } 5830 5831 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5832 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5833 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5834 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5835 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5836 } else { 5837 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5838 } 5839 5840 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5841 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5842 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5843 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5844 5845 #ifdef __DML_VBA_DEBUG__ 5846 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5847 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5848 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5849 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5850 #endif 5851 } 5852 5853 static void CalculateDCFCLKDeepSleep( 5854 struct display_mode_lib *mode_lib, 5855 unsigned int NumberOfActivePlanes, 5856 int BytePerPixelY[], 5857 int BytePerPixelC[], 5858 double VRatio[], 5859 double VRatioChroma[], 5860 double SwathWidthY[], 5861 double SwathWidthC[], 5862 unsigned int DPPPerPlane[], 5863 double HRatio[], 5864 double HRatioChroma[], 5865 double PixelClock[], 5866 double PSCL_THROUGHPUT[], 5867 double PSCL_THROUGHPUT_CHROMA[], 5868 double DPPCLK[], 5869 double ReadBandwidthLuma[], 5870 double ReadBandwidthChroma[], 5871 int ReturnBusWidth, 5872 double *DCFCLKDeepSleep) 5873 { 5874 struct vba_vars_st *v = &mode_lib->vba; 5875 double DisplayPipeLineDeliveryTimeLuma; 5876 double DisplayPipeLineDeliveryTimeChroma; 5877 double ReadBandwidth = 0.0; 5878 int k; 5879 5880 for (k = 0; k < NumberOfActivePlanes; ++k) { 5881 5882 if (VRatio[k] <= 1) { 5883 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5884 } else { 5885 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5886 } 5887 if (BytePerPixelC[k] == 0) { 5888 DisplayPipeLineDeliveryTimeChroma = 0; 5889 } else { 5890 if (VRatioChroma[k] <= 1) { 5891 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5892 } else { 5893 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5894 } 5895 } 5896 5897 if (BytePerPixelC[k] > 0) { 5898 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 5899 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 5900 } else { 5901 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 5902 } 5903 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 5904 5905 } 5906 5907 for (k = 0; k < NumberOfActivePlanes; ++k) { 5908 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 5909 } 5910 5911 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 5912 5913 for (k = 0; k < NumberOfActivePlanes; ++k) { 5914 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 5915 } 5916 } 5917 5918 static void CalculateUrgentBurstFactor( 5919 int swath_width_luma_ub, 5920 int swath_width_chroma_ub, 5921 unsigned int SwathHeightY, 5922 unsigned int SwathHeightC, 5923 double LineTime, 5924 double UrgentLatency, 5925 double CursorBufferSize, 5926 unsigned int CursorWidth, 5927 unsigned int CursorBPP, 5928 double VRatio, 5929 double VRatioC, 5930 double BytePerPixelInDETY, 5931 double BytePerPixelInDETC, 5932 double DETBufferSizeY, 5933 double DETBufferSizeC, 5934 double *UrgentBurstFactorCursor, 5935 double *UrgentBurstFactorLuma, 5936 double *UrgentBurstFactorChroma, 5937 bool *NotEnoughUrgentLatencyHiding) 5938 { 5939 double LinesInDETLuma; 5940 double LinesInDETChroma; 5941 unsigned int LinesInCursorBuffer; 5942 double CursorBufferSizeInTime; 5943 double DETBufferSizeInTimeLuma; 5944 double DETBufferSizeInTimeChroma; 5945 5946 *NotEnoughUrgentLatencyHiding = 0; 5947 5948 if (CursorWidth > 0) { 5949 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 5950 if (VRatio > 0) { 5951 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 5952 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 5953 *NotEnoughUrgentLatencyHiding = 1; 5954 *UrgentBurstFactorCursor = 0; 5955 } else { 5956 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 5957 } 5958 } else { 5959 *UrgentBurstFactorCursor = 1; 5960 } 5961 } 5962 5963 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 5964 if (VRatio > 0) { 5965 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 5966 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 5967 *NotEnoughUrgentLatencyHiding = 1; 5968 *UrgentBurstFactorLuma = 0; 5969 } else { 5970 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 5971 } 5972 } else { 5973 *UrgentBurstFactorLuma = 1; 5974 } 5975 5976 if (BytePerPixelInDETC > 0) { 5977 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 5978 if (VRatio > 0) { 5979 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 5980 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 5981 *NotEnoughUrgentLatencyHiding = 1; 5982 *UrgentBurstFactorChroma = 0; 5983 } else { 5984 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 5985 } 5986 } else { 5987 *UrgentBurstFactorChroma = 1; 5988 } 5989 } 5990 } 5991 5992 static void CalculatePixelDeliveryTimes( 5993 unsigned int NumberOfActivePlanes, 5994 double VRatio[], 5995 double VRatioChroma[], 5996 double VRatioPrefetchY[], 5997 double VRatioPrefetchC[], 5998 unsigned int swath_width_luma_ub[], 5999 unsigned int swath_width_chroma_ub[], 6000 unsigned int DPPPerPlane[], 6001 double HRatio[], 6002 double HRatioChroma[], 6003 double PixelClock[], 6004 double PSCL_THROUGHPUT[], 6005 double PSCL_THROUGHPUT_CHROMA[], 6006 double DPPCLK[], 6007 int BytePerPixelC[], 6008 enum scan_direction_class SourceScan[], 6009 unsigned int NumberOfCursors[], 6010 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 6011 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 6012 unsigned int BlockWidth256BytesY[], 6013 unsigned int BlockHeight256BytesY[], 6014 unsigned int BlockWidth256BytesC[], 6015 unsigned int BlockHeight256BytesC[], 6016 double DisplayPipeLineDeliveryTimeLuma[], 6017 double DisplayPipeLineDeliveryTimeChroma[], 6018 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 6019 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 6020 double DisplayPipeRequestDeliveryTimeLuma[], 6021 double DisplayPipeRequestDeliveryTimeChroma[], 6022 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 6023 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 6024 double CursorRequestDeliveryTime[], 6025 double CursorRequestDeliveryTimePrefetch[]) 6026 { 6027 double req_per_swath_ub; 6028 int k; 6029 6030 for (k = 0; k < NumberOfActivePlanes; ++k) { 6031 if (VRatio[k] <= 1) { 6032 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6033 } else { 6034 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6035 } 6036 6037 if (BytePerPixelC[k] == 0) { 6038 DisplayPipeLineDeliveryTimeChroma[k] = 0; 6039 } else { 6040 if (VRatioChroma[k] <= 1) { 6041 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6042 } else { 6043 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6044 } 6045 } 6046 6047 if (VRatioPrefetchY[k] <= 1) { 6048 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6049 } else { 6050 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6051 } 6052 6053 if (BytePerPixelC[k] == 0) { 6054 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 6055 } else { 6056 if (VRatioPrefetchC[k] <= 1) { 6057 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6058 } else { 6059 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6060 } 6061 } 6062 } 6063 6064 for (k = 0; k < NumberOfActivePlanes; ++k) { 6065 if (SourceScan[k] != dm_vert) { 6066 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 6067 } else { 6068 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 6069 } 6070 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 6071 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 6072 if (BytePerPixelC[k] == 0) { 6073 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 6074 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 6075 } else { 6076 if (SourceScan[k] != dm_vert) { 6077 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 6078 } else { 6079 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 6080 } 6081 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 6082 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 6083 } 6084 #ifdef __DML_VBA_DEBUG__ 6085 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 6086 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 6087 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 6088 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 6089 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 6090 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 6091 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 6092 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 6093 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 6094 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 6095 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 6096 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 6097 #endif 6098 } 6099 6100 for (k = 0; k < NumberOfActivePlanes; ++k) { 6101 int cursor_req_per_width; 6102 6103 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 6104 if (NumberOfCursors[k] > 0) { 6105 if (VRatio[k] <= 1) { 6106 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6107 } else { 6108 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6109 } 6110 if (VRatioPrefetchY[k] <= 1) { 6111 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6112 } else { 6113 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6114 } 6115 } else { 6116 CursorRequestDeliveryTime[k] = 0; 6117 CursorRequestDeliveryTimePrefetch[k] = 0; 6118 } 6119 #ifdef __DML_VBA_DEBUG__ 6120 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 6121 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 6122 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 6123 #endif 6124 } 6125 } 6126 6127 static void CalculateMetaAndPTETimes( 6128 int NumberOfActivePlanes, 6129 bool GPUVMEnable, 6130 int MetaChunkSize, 6131 int MinMetaChunkSizeBytes, 6132 int HTotal[], 6133 double VRatio[], 6134 double VRatioChroma[], 6135 double DestinationLinesToRequestRowInVBlank[], 6136 double DestinationLinesToRequestRowInImmediateFlip[], 6137 bool DCCEnable[], 6138 double PixelClock[], 6139 int BytePerPixelY[], 6140 int BytePerPixelC[], 6141 enum scan_direction_class SourceScan[], 6142 int dpte_row_height[], 6143 int dpte_row_height_chroma[], 6144 int meta_row_width[], 6145 int meta_row_width_chroma[], 6146 int meta_row_height[], 6147 int meta_row_height_chroma[], 6148 int meta_req_width[], 6149 int meta_req_width_chroma[], 6150 int meta_req_height[], 6151 int meta_req_height_chroma[], 6152 int dpte_group_bytes[], 6153 int PTERequestSizeY[], 6154 int PTERequestSizeC[], 6155 int PixelPTEReqWidthY[], 6156 int PixelPTEReqHeightY[], 6157 int PixelPTEReqWidthC[], 6158 int PixelPTEReqHeightC[], 6159 int dpte_row_width_luma_ub[], 6160 int dpte_row_width_chroma_ub[], 6161 double DST_Y_PER_PTE_ROW_NOM_L[], 6162 double DST_Y_PER_PTE_ROW_NOM_C[], 6163 double DST_Y_PER_META_ROW_NOM_L[], 6164 double DST_Y_PER_META_ROW_NOM_C[], 6165 double TimePerMetaChunkNominal[], 6166 double TimePerChromaMetaChunkNominal[], 6167 double TimePerMetaChunkVBlank[], 6168 double TimePerChromaMetaChunkVBlank[], 6169 double TimePerMetaChunkFlip[], 6170 double TimePerChromaMetaChunkFlip[], 6171 double time_per_pte_group_nom_luma[], 6172 double time_per_pte_group_vblank_luma[], 6173 double time_per_pte_group_flip_luma[], 6174 double time_per_pte_group_nom_chroma[], 6175 double time_per_pte_group_vblank_chroma[], 6176 double time_per_pte_group_flip_chroma[]) 6177 { 6178 unsigned int meta_chunk_width; 6179 unsigned int min_meta_chunk_width; 6180 unsigned int meta_chunk_per_row_int; 6181 unsigned int meta_row_remainder; 6182 unsigned int meta_chunk_threshold; 6183 unsigned int meta_chunks_per_row_ub; 6184 unsigned int meta_chunk_width_chroma; 6185 unsigned int min_meta_chunk_width_chroma; 6186 unsigned int meta_chunk_per_row_int_chroma; 6187 unsigned int meta_row_remainder_chroma; 6188 unsigned int meta_chunk_threshold_chroma; 6189 unsigned int meta_chunks_per_row_ub_chroma; 6190 unsigned int dpte_group_width_luma; 6191 unsigned int dpte_groups_per_row_luma_ub; 6192 unsigned int dpte_group_width_chroma; 6193 unsigned int dpte_groups_per_row_chroma_ub; 6194 int k; 6195 6196 for (k = 0; k < NumberOfActivePlanes; ++k) { 6197 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 6198 if (BytePerPixelC[k] == 0) { 6199 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 6200 } else { 6201 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 6202 } 6203 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 6204 if (BytePerPixelC[k] == 0) { 6205 DST_Y_PER_META_ROW_NOM_C[k] = 0; 6206 } else { 6207 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 6208 } 6209 } 6210 6211 for (k = 0; k < NumberOfActivePlanes; ++k) { 6212 if (DCCEnable[k] == true) { 6213 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 6214 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 6215 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 6216 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 6217 if (SourceScan[k] != dm_vert) { 6218 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 6219 } else { 6220 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 6221 } 6222 if (meta_row_remainder <= meta_chunk_threshold) { 6223 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 6224 } else { 6225 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 6226 } 6227 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6228 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6229 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6230 if (BytePerPixelC[k] == 0) { 6231 TimePerChromaMetaChunkNominal[k] = 0; 6232 TimePerChromaMetaChunkVBlank[k] = 0; 6233 TimePerChromaMetaChunkFlip[k] = 0; 6234 } else { 6235 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6236 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6237 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 6238 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 6239 if (SourceScan[k] != dm_vert) { 6240 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 6241 } else { 6242 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 6243 } 6244 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 6245 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 6246 } else { 6247 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 6248 } 6249 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6250 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6251 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6252 } 6253 } else { 6254 TimePerMetaChunkNominal[k] = 0; 6255 TimePerMetaChunkVBlank[k] = 0; 6256 TimePerMetaChunkFlip[k] = 0; 6257 TimePerChromaMetaChunkNominal[k] = 0; 6258 TimePerChromaMetaChunkVBlank[k] = 0; 6259 TimePerChromaMetaChunkFlip[k] = 0; 6260 } 6261 } 6262 6263 for (k = 0; k < NumberOfActivePlanes; ++k) { 6264 if (GPUVMEnable == true) { 6265 if (SourceScan[k] != dm_vert) { 6266 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6267 } else { 6268 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6269 } 6270 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6271 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6272 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6273 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6274 if (BytePerPixelC[k] == 0) { 6275 time_per_pte_group_nom_chroma[k] = 0; 6276 time_per_pte_group_vblank_chroma[k] = 0; 6277 time_per_pte_group_flip_chroma[k] = 0; 6278 } else { 6279 if (SourceScan[k] != dm_vert) { 6280 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6281 } else { 6282 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6283 } 6284 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6285 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6286 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6287 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6288 } 6289 } else { 6290 time_per_pte_group_nom_luma[k] = 0; 6291 time_per_pte_group_vblank_luma[k] = 0; 6292 time_per_pte_group_flip_luma[k] = 0; 6293 time_per_pte_group_nom_chroma[k] = 0; 6294 time_per_pte_group_vblank_chroma[k] = 0; 6295 time_per_pte_group_flip_chroma[k] = 0; 6296 } 6297 } 6298 } 6299 6300 static void CalculateVMGroupAndRequestTimes( 6301 unsigned int NumberOfActivePlanes, 6302 bool GPUVMEnable, 6303 unsigned int GPUVMMaxPageTableLevels, 6304 unsigned int HTotal[], 6305 int BytePerPixelC[], 6306 double DestinationLinesToRequestVMInVBlank[], 6307 double DestinationLinesToRequestVMInImmediateFlip[], 6308 bool DCCEnable[], 6309 double PixelClock[], 6310 int dpte_row_width_luma_ub[], 6311 int dpte_row_width_chroma_ub[], 6312 int vm_group_bytes[], 6313 unsigned int dpde0_bytes_per_frame_ub_l[], 6314 unsigned int dpde0_bytes_per_frame_ub_c[], 6315 int meta_pte_bytes_per_frame_ub_l[], 6316 int meta_pte_bytes_per_frame_ub_c[], 6317 double TimePerVMGroupVBlank[], 6318 double TimePerVMGroupFlip[], 6319 double TimePerVMRequestVBlank[], 6320 double TimePerVMRequestFlip[]) 6321 { 6322 int num_group_per_lower_vm_stage; 6323 int num_req_per_lower_vm_stage; 6324 int k; 6325 6326 for (k = 0; k < NumberOfActivePlanes; ++k) { 6327 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6328 if (DCCEnable[k] == false) { 6329 if (BytePerPixelC[k] > 0) { 6330 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6331 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6332 } else { 6333 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6334 } 6335 } else { 6336 if (GPUVMMaxPageTableLevels == 1) { 6337 if (BytePerPixelC[k] > 0) { 6338 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6339 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6340 } else { 6341 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6342 } 6343 } else { 6344 if (BytePerPixelC[k] > 0) { 6345 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6346 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6347 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6348 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6349 } else { 6350 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6351 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6352 } 6353 } 6354 } 6355 6356 if (DCCEnable[k] == false) { 6357 if (BytePerPixelC[k] > 0) { 6358 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6359 } else { 6360 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6361 } 6362 } else { 6363 if (GPUVMMaxPageTableLevels == 1) { 6364 if (BytePerPixelC[k] > 0) { 6365 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6366 } else { 6367 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6368 } 6369 } else { 6370 if (BytePerPixelC[k] > 0) { 6371 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6372 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6373 } else { 6374 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6375 } 6376 } 6377 } 6378 6379 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6380 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6381 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6382 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6383 6384 if (GPUVMMaxPageTableLevels > 2) { 6385 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6386 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6387 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6388 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6389 } 6390 6391 } else { 6392 TimePerVMGroupVBlank[k] = 0; 6393 TimePerVMGroupFlip[k] = 0; 6394 TimePerVMRequestVBlank[k] = 0; 6395 TimePerVMRequestFlip[k] = 0; 6396 } 6397 } 6398 } 6399 6400 static void CalculateStutterEfficiency( 6401 struct display_mode_lib *mode_lib, 6402 int CompressedBufferSizeInkByte, 6403 bool UnboundedRequestEnabled, 6404 int ConfigReturnBufferSizeInKByte, 6405 int MetaFIFOSizeInKEntries, 6406 int ZeroSizeBufferEntries, 6407 int NumberOfActivePlanes, 6408 int ROBBufferSizeInKByte, 6409 double TotalDataReadBandwidth, 6410 double DCFCLK, 6411 double ReturnBW, 6412 double COMPBUF_RESERVED_SPACE_64B, 6413 double COMPBUF_RESERVED_SPACE_ZS, 6414 double SRExitTime, 6415 double SRExitZ8Time, 6416 bool SynchronizedVBlank, 6417 double Z8StutterEnterPlusExitWatermark, 6418 double StutterEnterPlusExitWatermark, 6419 bool ProgressiveToInterlaceUnitInOPP, 6420 bool Interlace[], 6421 double MinTTUVBlank[], 6422 int DPPPerPlane[], 6423 unsigned int DETBufferSizeY[], 6424 int BytePerPixelY[], 6425 double BytePerPixelDETY[], 6426 double SwathWidthY[], 6427 int SwathHeightY[], 6428 int SwathHeightC[], 6429 double NetDCCRateLuma[], 6430 double NetDCCRateChroma[], 6431 double DCCFractionOfZeroSizeRequestsLuma[], 6432 double DCCFractionOfZeroSizeRequestsChroma[], 6433 int HTotal[], 6434 int VTotal[], 6435 double PixelClock[], 6436 double VRatio[], 6437 enum scan_direction_class SourceScan[], 6438 int BlockHeight256BytesY[], 6439 int BlockWidth256BytesY[], 6440 int BlockHeight256BytesC[], 6441 int BlockWidth256BytesC[], 6442 int DCCYMaxUncompressedBlock[], 6443 int DCCCMaxUncompressedBlock[], 6444 int VActive[], 6445 bool DCCEnable[], 6446 bool WritebackEnable[], 6447 double ReadBandwidthPlaneLuma[], 6448 double ReadBandwidthPlaneChroma[], 6449 double meta_row_bw[], 6450 double dpte_row_bw[], 6451 double *StutterEfficiencyNotIncludingVBlank, 6452 double *StutterEfficiency, 6453 int *NumberOfStutterBurstsPerFrame, 6454 double *Z8StutterEfficiencyNotIncludingVBlank, 6455 double *Z8StutterEfficiency, 6456 int *Z8NumberOfStutterBurstsPerFrame, 6457 double *StutterPeriod) 6458 { 6459 struct vba_vars_st *v = &mode_lib->vba; 6460 6461 double DETBufferingTimeY; 6462 double SwathWidthYCriticalPlane = 0; 6463 double VActiveTimeCriticalPlane = 0; 6464 double FrameTimeCriticalPlane = 0; 6465 int BytePerPixelYCriticalPlane = 0; 6466 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6467 double MinTTUVBlankCriticalPlane = 0; 6468 double TotalCompressedReadBandwidth; 6469 double TotalRowReadBandwidth; 6470 double AverageDCCCompressionRate; 6471 double EffectiveCompressedBufferSize; 6472 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6473 double StutterBurstTime; 6474 int TotalActiveWriteback; 6475 double LinesInDETY; 6476 double LinesInDETYRoundedDownToSwath; 6477 double MaximumEffectiveCompressionLuma; 6478 double MaximumEffectiveCompressionChroma; 6479 double TotalZeroSizeRequestReadBandwidth; 6480 double TotalZeroSizeCompressedReadBandwidth; 6481 double AverageDCCZeroSizeFraction; 6482 double AverageZeroSizeCompressionRate; 6483 int TotalNumberOfActiveOTG = 0; 6484 double LastStutterPeriod = 0.0; 6485 double LastZ8StutterPeriod = 0.0; 6486 int k; 6487 6488 TotalZeroSizeRequestReadBandwidth = 0; 6489 TotalZeroSizeCompressedReadBandwidth = 0; 6490 TotalRowReadBandwidth = 0; 6491 TotalCompressedReadBandwidth = 0; 6492 6493 for (k = 0; k < NumberOfActivePlanes; ++k) { 6494 if (DCCEnable[k] == true) { 6495 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6496 || DCCYMaxUncompressedBlock[k] < 256) { 6497 MaximumEffectiveCompressionLuma = 2; 6498 } else { 6499 MaximumEffectiveCompressionLuma = 4; 6500 } 6501 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6502 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6503 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6504 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6505 if (ReadBandwidthPlaneChroma[k] > 0) { 6506 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6507 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6508 MaximumEffectiveCompressionChroma = 2; 6509 } else { 6510 MaximumEffectiveCompressionChroma = 4; 6511 } 6512 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6513 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6514 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6515 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6516 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6517 } 6518 } else { 6519 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6520 } 6521 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6522 } 6523 6524 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6525 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6526 6527 #ifdef __DML_VBA_DEBUG__ 6528 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6529 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6530 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6531 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6532 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6533 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6534 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6535 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6536 #endif 6537 6538 if (AverageDCCZeroSizeFraction == 1) { 6539 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6540 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6541 } else if (AverageDCCZeroSizeFraction > 0) { 6542 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6543 EffectiveCompressedBufferSize = dml_min( 6544 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6545 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6546 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6547 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6548 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6549 dml_print( 6550 "DML::%s: min 2 = %f\n", 6551 __func__, 6552 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6553 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6554 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6555 } else { 6556 EffectiveCompressedBufferSize = dml_min( 6557 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6558 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6559 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6560 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6561 } 6562 6563 #ifdef __DML_VBA_DEBUG__ 6564 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6565 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6566 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6567 #endif 6568 6569 *StutterPeriod = 0; 6570 for (k = 0; k < NumberOfActivePlanes; ++k) { 6571 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6572 / BytePerPixelDETY[k] / SwathWidthY[k]; 6573 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6574 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6575 #ifdef __DML_VBA_DEBUG__ 6576 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6577 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6578 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6579 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6580 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6581 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6582 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6583 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6584 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6585 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6586 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6587 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6588 #endif 6589 6590 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6591 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6592 6593 *StutterPeriod = DETBufferingTimeY; 6594 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6595 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6596 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6597 SwathWidthYCriticalPlane = SwathWidthY[k]; 6598 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6599 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6600 6601 #ifdef __DML_VBA_DEBUG__ 6602 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6603 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6604 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6605 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6606 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6607 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6608 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6609 #endif 6610 } 6611 } 6612 6613 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6614 #ifdef __DML_VBA_DEBUG__ 6615 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6616 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6617 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6618 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6619 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6620 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6621 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6622 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6623 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6624 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6625 #endif 6626 6627 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6628 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6629 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6630 #ifdef __DML_VBA_DEBUG__ 6631 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6632 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6633 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6634 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6635 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6636 #endif 6637 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6638 6639 dml_print( 6640 "DML::%s: Time to finish residue swath=%f\n", 6641 __func__, 6642 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6643 6644 TotalActiveWriteback = 0; 6645 for (k = 0; k < NumberOfActivePlanes; ++k) { 6646 if (WritebackEnable[k]) { 6647 TotalActiveWriteback = TotalActiveWriteback + 1; 6648 } 6649 } 6650 6651 if (TotalActiveWriteback == 0) { 6652 #ifdef __DML_VBA_DEBUG__ 6653 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6654 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6655 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6656 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6657 #endif 6658 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6659 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6660 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6661 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6662 } else { 6663 *StutterEfficiencyNotIncludingVBlank = 0.; 6664 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6665 *NumberOfStutterBurstsPerFrame = 0; 6666 *Z8NumberOfStutterBurstsPerFrame = 0; 6667 } 6668 #ifdef __DML_VBA_DEBUG__ 6669 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6670 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6671 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6672 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6673 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6674 #endif 6675 6676 for (k = 0; k < NumberOfActivePlanes; ++k) { 6677 if (v->BlendingAndTiming[k] == k) { 6678 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6679 } 6680 } 6681 6682 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6683 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6684 6685 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6686 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6687 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6688 } else { 6689 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6690 } 6691 } else { 6692 *StutterEfficiency = 0; 6693 } 6694 6695 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6696 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6697 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6698 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6699 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6700 } else { 6701 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6702 } 6703 } else { 6704 *Z8StutterEfficiency = 0.; 6705 } 6706 6707 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6708 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6709 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6710 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6711 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6712 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6713 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6714 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6715 } 6716 6717 static void CalculateSwathAndDETConfiguration( 6718 bool ForceSingleDPP, 6719 int NumberOfActivePlanes, 6720 unsigned int DETBufferSizeInKByte, 6721 double MaximumSwathWidthLuma[], 6722 double MaximumSwathWidthChroma[], 6723 enum scan_direction_class SourceScan[], 6724 enum source_format_class SourcePixelFormat[], 6725 enum dm_swizzle_mode SurfaceTiling[], 6726 int ViewportWidth[], 6727 int ViewportHeight[], 6728 int SurfaceWidthY[], 6729 int SurfaceWidthC[], 6730 int SurfaceHeightY[], 6731 int SurfaceHeightC[], 6732 int Read256BytesBlockHeightY[], 6733 int Read256BytesBlockHeightC[], 6734 int Read256BytesBlockWidthY[], 6735 int Read256BytesBlockWidthC[], 6736 enum odm_combine_mode ODMCombineEnabled[], 6737 int BlendingAndTiming[], 6738 int BytePerPixY[], 6739 int BytePerPixC[], 6740 double BytePerPixDETY[], 6741 double BytePerPixDETC[], 6742 int HActive[], 6743 double HRatio[], 6744 double HRatioChroma[], 6745 int DPPPerPlane[], 6746 int swath_width_luma_ub[], 6747 int swath_width_chroma_ub[], 6748 double SwathWidth[], 6749 double SwathWidthChroma[], 6750 int SwathHeightY[], 6751 int SwathHeightC[], 6752 unsigned int DETBufferSizeY[], 6753 unsigned int DETBufferSizeC[], 6754 bool ViewportSizeSupportPerPlane[], 6755 bool *ViewportSizeSupport) 6756 { 6757 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6758 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6759 int MinimumSwathHeightY; 6760 int MinimumSwathHeightC; 6761 int RoundedUpMaxSwathSizeBytesY; 6762 int RoundedUpMaxSwathSizeBytesC; 6763 int RoundedUpMinSwathSizeBytesY; 6764 int RoundedUpMinSwathSizeBytesC; 6765 int RoundedUpSwathSizeBytesY; 6766 int RoundedUpSwathSizeBytesC; 6767 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6768 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6769 int k; 6770 6771 CalculateSwathWidth( 6772 ForceSingleDPP, 6773 NumberOfActivePlanes, 6774 SourcePixelFormat, 6775 SourceScan, 6776 ViewportWidth, 6777 ViewportHeight, 6778 SurfaceWidthY, 6779 SurfaceWidthC, 6780 SurfaceHeightY, 6781 SurfaceHeightC, 6782 ODMCombineEnabled, 6783 BytePerPixY, 6784 BytePerPixC, 6785 Read256BytesBlockHeightY, 6786 Read256BytesBlockHeightC, 6787 Read256BytesBlockWidthY, 6788 Read256BytesBlockWidthC, 6789 BlendingAndTiming, 6790 HActive, 6791 HRatio, 6792 DPPPerPlane, 6793 SwathWidthSingleDPP, 6794 SwathWidthSingleDPPChroma, 6795 SwathWidth, 6796 SwathWidthChroma, 6797 MaximumSwathHeightY, 6798 MaximumSwathHeightC, 6799 swath_width_luma_ub, 6800 swath_width_chroma_ub); 6801 6802 *ViewportSizeSupport = true; 6803 for (k = 0; k < NumberOfActivePlanes; ++k) { 6804 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6805 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6806 if (SurfaceTiling[k] == dm_sw_linear 6807 || (SourcePixelFormat[k] == dm_444_64 6808 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6809 && SourceScan[k] != dm_vert)) { 6810 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6811 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6812 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6813 } else { 6814 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6815 } 6816 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6817 } else { 6818 if (SurfaceTiling[k] == dm_sw_linear) { 6819 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6820 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6821 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6822 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6823 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6824 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6825 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6826 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6827 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6828 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6829 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6830 } else { 6831 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6832 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6833 } 6834 } 6835 6836 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6837 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6838 if (SourcePixelFormat[k] == dm_420_10) { 6839 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6840 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6841 } 6842 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6843 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6844 if (SourcePixelFormat[k] == dm_420_10) { 6845 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6846 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6847 } 6848 6849 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6850 SwathHeightY[k] = MaximumSwathHeightY[k]; 6851 SwathHeightC[k] = MaximumSwathHeightC[k]; 6852 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6853 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6854 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6855 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6856 SwathHeightY[k] = MinimumSwathHeightY; 6857 SwathHeightC[k] = MaximumSwathHeightC[k]; 6858 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6859 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6860 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6861 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6862 SwathHeightY[k] = MaximumSwathHeightY[k]; 6863 SwathHeightC[k] = MinimumSwathHeightC; 6864 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6865 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6866 } else { 6867 SwathHeightY[k] = MinimumSwathHeightY; 6868 SwathHeightC[k] = MinimumSwathHeightC; 6869 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6870 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6871 } 6872 { 6873 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 6874 6875 if (SwathHeightC[k] == 0) { 6876 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 6877 DETBufferSizeC[k] = 0; 6878 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 6879 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 6880 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 6881 } else { 6882 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 6883 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 6884 } 6885 6886 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 6887 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 6888 *ViewportSizeSupport = false; 6889 ViewportSizeSupportPerPlane[k] = false; 6890 } else { 6891 ViewportSizeSupportPerPlane[k] = true; 6892 } 6893 } 6894 } 6895 } 6896 6897 static void CalculateSwathWidth( 6898 bool ForceSingleDPP, 6899 int NumberOfActivePlanes, 6900 enum source_format_class SourcePixelFormat[], 6901 enum scan_direction_class SourceScan[], 6902 int ViewportWidth[], 6903 int ViewportHeight[], 6904 int SurfaceWidthY[], 6905 int SurfaceWidthC[], 6906 int SurfaceHeightY[], 6907 int SurfaceHeightC[], 6908 enum odm_combine_mode ODMCombineEnabled[], 6909 int BytePerPixY[], 6910 int BytePerPixC[], 6911 int Read256BytesBlockHeightY[], 6912 int Read256BytesBlockHeightC[], 6913 int Read256BytesBlockWidthY[], 6914 int Read256BytesBlockWidthC[], 6915 int BlendingAndTiming[], 6916 int HActive[], 6917 double HRatio[], 6918 int DPPPerPlane[], 6919 double SwathWidthSingleDPPY[], 6920 double SwathWidthSingleDPPC[], 6921 double SwathWidthY[], 6922 double SwathWidthC[], 6923 int MaximumSwathHeightY[], 6924 int MaximumSwathHeightC[], 6925 int swath_width_luma_ub[], 6926 int swath_width_chroma_ub[]) 6927 { 6928 enum odm_combine_mode MainPlaneODMCombine; 6929 int j, k; 6930 6931 #ifdef __DML_VBA_DEBUG__ 6932 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 6933 #endif 6934 6935 for (k = 0; k < NumberOfActivePlanes; ++k) { 6936 if (SourceScan[k] != dm_vert) { 6937 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 6938 } else { 6939 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 6940 } 6941 6942 #ifdef __DML_VBA_DEBUG__ 6943 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 6944 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 6945 #endif 6946 6947 MainPlaneODMCombine = ODMCombineEnabled[k]; 6948 for (j = 0; j < NumberOfActivePlanes; ++j) { 6949 if (BlendingAndTiming[k] == j) { 6950 MainPlaneODMCombine = ODMCombineEnabled[j]; 6951 } 6952 } 6953 6954 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) 6955 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 6956 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) 6957 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 6958 else if (DPPPerPlane[k] == 2) 6959 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 6960 else 6961 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6962 6963 #ifdef __DML_VBA_DEBUG__ 6964 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 6965 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 6966 #endif 6967 6968 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 6969 SwathWidthC[k] = SwathWidthY[k] / 2; 6970 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 6971 } else { 6972 SwathWidthC[k] = SwathWidthY[k]; 6973 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 6974 } 6975 6976 if (ForceSingleDPP == true) { 6977 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6978 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 6979 } 6980 { 6981 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 6982 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 6983 6984 #ifdef __DML_VBA_DEBUG__ 6985 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 6986 #endif 6987 6988 if (SourceScan[k] != dm_vert) { 6989 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 6990 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 6991 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 6992 if (BytePerPixC[k] > 0) { 6993 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 6994 6995 swath_width_chroma_ub[k] = dml_min( 6996 surface_width_ub_c, 6997 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 6998 } else { 6999 swath_width_chroma_ub[k] = 0; 7000 } 7001 } else { 7002 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 7003 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 7004 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 7005 if (BytePerPixC[k] > 0) { 7006 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 7007 7008 swath_width_chroma_ub[k] = dml_min( 7009 surface_height_ub_c, 7010 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 7011 } else { 7012 swath_width_chroma_ub[k] = 0; 7013 } 7014 } 7015 } 7016 } 7017 } 7018 7019 static double CalculateExtraLatency( 7020 int RoundTripPingLatencyCycles, 7021 int ReorderingBytes, 7022 double DCFCLK, 7023 int TotalNumberOfActiveDPP, 7024 int PixelChunkSizeInKByte, 7025 int TotalNumberOfDCCActiveDPP, 7026 int MetaChunkSize, 7027 double ReturnBW, 7028 bool GPUVMEnable, 7029 bool HostVMEnable, 7030 int NumberOfActivePlanes, 7031 int NumberOfDPP[], 7032 int dpte_group_bytes[], 7033 double HostVMInefficiencyFactor, 7034 double HostVMMinPageSize, 7035 int HostVMMaxNonCachedPageTableLevels) 7036 { 7037 double ExtraLatencyBytes; 7038 double ExtraLatency; 7039 7040 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7041 ReorderingBytes, 7042 TotalNumberOfActiveDPP, 7043 PixelChunkSizeInKByte, 7044 TotalNumberOfDCCActiveDPP, 7045 MetaChunkSize, 7046 GPUVMEnable, 7047 HostVMEnable, 7048 NumberOfActivePlanes, 7049 NumberOfDPP, 7050 dpte_group_bytes, 7051 HostVMInefficiencyFactor, 7052 HostVMMinPageSize, 7053 HostVMMaxNonCachedPageTableLevels); 7054 7055 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 7056 7057 #ifdef __DML_VBA_DEBUG__ 7058 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 7059 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 7060 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 7061 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 7062 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 7063 #endif 7064 7065 return ExtraLatency; 7066 } 7067 7068 static double CalculateExtraLatencyBytes( 7069 int ReorderingBytes, 7070 int TotalNumberOfActiveDPP, 7071 int PixelChunkSizeInKByte, 7072 int TotalNumberOfDCCActiveDPP, 7073 int MetaChunkSize, 7074 bool GPUVMEnable, 7075 bool HostVMEnable, 7076 int NumberOfActivePlanes, 7077 int NumberOfDPP[], 7078 int dpte_group_bytes[], 7079 double HostVMInefficiencyFactor, 7080 double HostVMMinPageSize, 7081 int HostVMMaxNonCachedPageTableLevels) 7082 { 7083 double ret; 7084 int HostVMDynamicLevels = 0, k; 7085 7086 if (GPUVMEnable == true && HostVMEnable == true) { 7087 if (HostVMMinPageSize < 2048) 7088 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 7089 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 7090 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 7091 else 7092 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 7093 } else { 7094 HostVMDynamicLevels = 0; 7095 } 7096 7097 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 7098 7099 if (GPUVMEnable == true) { 7100 for (k = 0; k < NumberOfActivePlanes; ++k) 7101 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 7102 } 7103 return ret; 7104 } 7105 7106 static double CalculateUrgentLatency( 7107 double UrgentLatencyPixelDataOnly, 7108 double UrgentLatencyPixelMixedWithVMData, 7109 double UrgentLatencyVMDataOnly, 7110 bool DoUrgentLatencyAdjustment, 7111 double UrgentLatencyAdjustmentFabricClockComponent, 7112 double UrgentLatencyAdjustmentFabricClockReference, 7113 double FabricClock) 7114 { 7115 double ret; 7116 7117 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 7118 if (DoUrgentLatencyAdjustment == true) 7119 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 7120 return ret; 7121 } 7122 7123 static noinline_for_stack void UseMinimumDCFCLK( 7124 struct display_mode_lib *mode_lib, 7125 int MaxPrefetchMode, 7126 int ReorderingBytes) 7127 { 7128 struct vba_vars_st *v = &mode_lib->vba; 7129 int dummy1, i, j, k; 7130 double NormalEfficiency, dummy2, dummy3; 7131 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 7132 7133 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 7134 for (i = 0; i < v->soc.num_states; ++i) { 7135 for (j = 0; j <= 1; ++j) { 7136 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 7137 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 7138 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 7139 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 7140 double MinimumTWait; 7141 double NonDPTEBandwidth; 7142 double DPTEBandwidth; 7143 double DCFCLKRequiredForAverageBandwidth; 7144 double ExtraLatencyBytes; 7145 double ExtraLatencyCycles; 7146 double DCFCLKRequiredForPeakBandwidth; 7147 int NoOfDPPState[DC__NUM_DPP__MAX]; 7148 double MinimumTvmPlus2Tr0; 7149 7150 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 7151 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7152 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 7153 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); 7154 } 7155 7156 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) 7157 NoOfDPPState[k] = v->NoOfDPP[i][j][k]; 7158 7159 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); 7160 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; 7161 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? 7162 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; 7163 DCFCLKRequiredForAverageBandwidth = dml_max3( 7164 v->ProjectedDCFCLKDeepSleep[i][j], 7165 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth 7166 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 7167 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); 7168 7169 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7170 ReorderingBytes, 7171 v->TotalNumberOfActiveDPP[i][j], 7172 v->PixelChunkSizeInKByte, 7173 v->TotalNumberOfDCCActiveDPP[i][j], 7174 v->MetaChunkSize, 7175 v->GPUVMEnable, 7176 v->HostVMEnable, 7177 v->NumberOfActivePlanes, 7178 NoOfDPPState, 7179 v->dpte_group_bytes, 7180 1, 7181 v->HostVMMinPageSize, 7182 v->HostVMMaxNonCachedPageTableLevels); 7183 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; 7184 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7185 double DCFCLKCyclesRequiredInPrefetch; 7186 double ExpectedPrefetchBWAcceleration; 7187 double PrefetchTime; 7188 7189 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] 7190 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; 7191 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 7192 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) 7193 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth 7194 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 7195 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; 7196 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) 7197 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); 7198 DynamicMetadataVMExtraLatency[k] = 7199 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? 7200 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 7201 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait 7202 - v->UrgLatency[i] 7203 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) 7204 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 7205 - DynamicMetadataVMExtraLatency[k]; 7206 7207 if (PrefetchTime > 0) { 7208 double ExpectedVRatioPrefetch; 7209 7210 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 7211 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 7212 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 7213 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 7214 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { 7215 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 7216 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; 7217 } 7218 } else { 7219 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7220 } 7221 if (v->DynamicMetadataEnable[k] == true) { 7222 double TSetupPipe; 7223 double TdmbfPipe; 7224 double TdmsksPipe; 7225 double TdmecPipe; 7226 double AllowedTimeForUrgentExtraLatency; 7227 7228 CalculateVupdateAndDynamicMetadataParameters( 7229 v->MaxInterDCNTileRepeaters, 7230 v->RequiredDPPCLK[i][j][k], 7231 v->RequiredDISPCLK[i][j], 7232 v->ProjectedDCFCLKDeepSleep[i][j], 7233 v->PixelClock[k], 7234 v->HTotal[k], 7235 v->VTotal[k] - v->VActive[k], 7236 v->DynamicMetadataTransmittedBytes[k], 7237 v->DynamicMetadataLinesBeforeActiveRequired[k], 7238 v->Interlace[k], 7239 v->ProgressiveToInterlaceUnitInOPP, 7240 &TSetupPipe, 7241 &TdmbfPipe, 7242 &TdmecPipe, 7243 &TdmsksPipe, 7244 &dummy1, 7245 &dummy2, 7246 &dummy3); 7247 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 7248 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 7249 if (AllowedTimeForUrgentExtraLatency > 0) { 7250 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7251 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7252 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7253 } else { 7254 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7255 } 7256 } 7257 } 7258 DCFCLKRequiredForPeakBandwidth = 0; 7259 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) 7260 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7261 7262 MinimumTvmPlus2Tr0 = v->UrgLatency[i] 7263 * (v->GPUVMEnable == true ? 7264 (v->HostVMEnable == true ? 7265 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 7266 0); 7267 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7268 double MaximumTvmPlus2Tr0PlusTsw; 7269 7270 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7271 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7272 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; 7273 } else { 7274 DCFCLKRequiredForPeakBandwidth = dml_max3( 7275 DCFCLKRequiredForPeakBandwidth, 7276 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7277 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7278 } 7279 } 7280 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7281 } 7282 } 7283 } 7284 7285 static void CalculateUnboundedRequestAndCompressedBufferSize( 7286 unsigned int DETBufferSizeInKByte, 7287 int ConfigReturnBufferSizeInKByte, 7288 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7289 int TotalActiveDPP, 7290 bool NoChromaPlanes, 7291 int MaxNumDPP, 7292 int CompressedBufferSegmentSizeInkByteFinal, 7293 enum output_encoder_class *Output, 7294 bool *UnboundedRequestEnabled, 7295 int *CompressedBufferSizeInkByte) 7296 { 7297 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7298 7299 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7300 *CompressedBufferSizeInkByte = ( 7301 *UnboundedRequestEnabled == true ? 7302 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7303 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7304 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7305 7306 #ifdef __DML_VBA_DEBUG__ 7307 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7308 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7309 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7310 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7311 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7312 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7313 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7314 #endif 7315 } 7316 7317 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7318 { 7319 bool ret_val = false; 7320 7321 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7322 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 7323 ret_val = false; 7324 return ret_val; 7325 } 7326 7327 static unsigned int CalculateMaxVStartup( 7328 unsigned int VTotal, 7329 unsigned int VActive, 7330 unsigned int VBlankNom, 7331 unsigned int HTotal, 7332 double PixelClock, 7333 bool ProgressiveTointerlaceUnitinOPP, 7334 bool Interlace, 7335 unsigned int VBlankNomDefaultUS, 7336 double WritebackDelayTime) 7337 { 7338 unsigned int MaxVStartup = 0; 7339 unsigned int vblank_size = 0; 7340 double line_time_us = HTotal / PixelClock; 7341 unsigned int vblank_actual = VTotal - VActive; 7342 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0); 7343 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line); 7344 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input; 7345 7346 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail); 7347 if (Interlace && !ProgressiveTointerlaceUnitinOPP) 7348 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0); 7349 else 7350 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0)); 7351 if (MaxVStartup > 1023) 7352 MaxVStartup = 1023; 7353 return MaxVStartup; 7354 } 7355