1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dc.h" 27 #include "../display_mode_lib.h" 28 #include "../dcn30/display_mode_vba_30.h" 29 #include "display_mode_vba_31.h" 30 #include "../dml_inline_defs.h" 31 32 /* 33 * NOTE: 34 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 35 * 36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 37 * ways. Unless there is something clearly wrong with it the code should 38 * remain as-is as it provides us with a guarantee from HW that it is correct. 39 */ 40 41 #define BPP_INVALID 0 42 #define BPP_BLENDED_PIPE 0xffffffff 43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184 44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096 45 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 46 #define DCN3_15_MAX_DET_SIZE 384 47 48 // For DML-C changes that hasn't been propagated to VBA yet 49 //#define __DML_VBA_ALLOW_DELTA__ 50 51 // Move these to ip paramaters/constant 52 53 // At which vstartup the DML start to try if the mode can be supported 54 #define __DML_VBA_MIN_VSTARTUP__ 9 55 56 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 57 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 58 59 // fudge factor for min dcfclk calclation 60 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 61 62 typedef struct { 63 double DPPCLK; 64 double DISPCLK; 65 double PixelClock; 66 double DCFCLKDeepSleep; 67 unsigned int DPPPerPlane; 68 bool ScalerEnabled; 69 double VRatio; 70 double VRatioChroma; 71 enum scan_direction_class SourceScan; 72 unsigned int BlockWidth256BytesY; 73 unsigned int BlockHeight256BytesY; 74 unsigned int BlockWidth256BytesC; 75 unsigned int BlockHeight256BytesC; 76 unsigned int InterlaceEnable; 77 unsigned int NumberOfCursors; 78 unsigned int VBlank; 79 unsigned int HTotal; 80 unsigned int DCCEnable; 81 bool ODMCombineIsEnabled; 82 enum source_format_class SourcePixelFormat; 83 int BytePerPixelY; 84 int BytePerPixelC; 85 bool ProgressiveToInterlaceUnitInOPP; 86 } Pipe; 87 88 #define BPP_INVALID 0 89 #define BPP_BLENDED_PIPE 0xffffffff 90 91 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 92 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 93 static unsigned int dscceComputeDelay( 94 unsigned int bpc, 95 double BPP, 96 unsigned int sliceWidth, 97 unsigned int numSlices, 98 enum output_format_class pixelFormat, 99 enum output_encoder_class Output); 100 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 101 static bool CalculatePrefetchSchedule( 102 struct display_mode_lib *mode_lib, 103 double HostVMInefficiencyFactor, 104 Pipe *myPipe, 105 unsigned int DSCDelay, 106 double DPPCLKDelaySubtotalPlusCNVCFormater, 107 double DPPCLKDelaySCL, 108 double DPPCLKDelaySCLLBOnly, 109 double DPPCLKDelayCNVCCursor, 110 double DISPCLKDelaySubtotal, 111 unsigned int DPP_RECOUT_WIDTH, 112 enum output_format_class OutputFormat, 113 unsigned int MaxInterDCNTileRepeaters, 114 unsigned int VStartup, 115 unsigned int MaxVStartup, 116 unsigned int GPUVMPageTableLevels, 117 bool GPUVMEnable, 118 bool HostVMEnable, 119 unsigned int HostVMMaxNonCachedPageTableLevels, 120 double HostVMMinPageSize, 121 bool DynamicMetadataEnable, 122 bool DynamicMetadataVMEnabled, 123 int DynamicMetadataLinesBeforeActiveRequired, 124 unsigned int DynamicMetadataTransmittedBytes, 125 double UrgentLatency, 126 double UrgentExtraLatency, 127 double TCalc, 128 unsigned int PDEAndMetaPTEBytesFrame, 129 unsigned int MetaRowByte, 130 unsigned int PixelPTEBytesPerRow, 131 double PrefetchSourceLinesY, 132 unsigned int SwathWidthY, 133 double VInitPreFillY, 134 unsigned int MaxNumSwathY, 135 double PrefetchSourceLinesC, 136 unsigned int SwathWidthC, 137 double VInitPreFillC, 138 unsigned int MaxNumSwathC, 139 int swath_width_luma_ub, 140 int swath_width_chroma_ub, 141 unsigned int SwathHeightY, 142 unsigned int SwathHeightC, 143 double TWait, 144 double *DSTXAfterScaler, 145 double *DSTYAfterScaler, 146 double *DestinationLinesForPrefetch, 147 double *PrefetchBandwidth, 148 double *DestinationLinesToRequestVMInVBlank, 149 double *DestinationLinesToRequestRowInVBlank, 150 double *VRatioPrefetchY, 151 double *VRatioPrefetchC, 152 double *RequiredPrefetchPixDataBWLuma, 153 double *RequiredPrefetchPixDataBWChroma, 154 bool *NotEnoughTimeForDynamicMetadata, 155 double *Tno_bw, 156 double *prefetch_vmrow_bw, 157 double *Tdmdl_vm, 158 double *Tdmdl, 159 double *TSetup, 160 int *VUpdateOffsetPix, 161 double *VUpdateWidthPix, 162 double *VReadyOffsetPix); 163 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 164 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 165 static void CalculateDCCConfiguration( 166 bool DCCEnabled, 167 bool DCCProgrammingAssumesScanDirectionUnknown, 168 enum source_format_class SourcePixelFormat, 169 unsigned int SurfaceWidthLuma, 170 unsigned int SurfaceWidthChroma, 171 unsigned int SurfaceHeightLuma, 172 unsigned int SurfaceHeightChroma, 173 double DETBufferSize, 174 unsigned int RequestHeight256ByteLuma, 175 unsigned int RequestHeight256ByteChroma, 176 enum dm_swizzle_mode TilingFormat, 177 unsigned int BytePerPixelY, 178 unsigned int BytePerPixelC, 179 double BytePerPixelDETY, 180 double BytePerPixelDETC, 181 enum scan_direction_class ScanOrientation, 182 unsigned int *MaxUncompressedBlockLuma, 183 unsigned int *MaxUncompressedBlockChroma, 184 unsigned int *MaxCompressedBlockLuma, 185 unsigned int *MaxCompressedBlockChroma, 186 unsigned int *IndependentBlockLuma, 187 unsigned int *IndependentBlockChroma); 188 static double CalculatePrefetchSourceLines( 189 struct display_mode_lib *mode_lib, 190 double VRatio, 191 double vtaps, 192 bool Interlace, 193 bool ProgressiveToInterlaceUnitInOPP, 194 unsigned int SwathHeight, 195 unsigned int ViewportYStart, 196 double *VInitPreFill, 197 unsigned int *MaxNumSwath); 198 static unsigned int CalculateVMAndRowBytes( 199 struct display_mode_lib *mode_lib, 200 bool DCCEnable, 201 unsigned int BlockHeight256Bytes, 202 unsigned int BlockWidth256Bytes, 203 enum source_format_class SourcePixelFormat, 204 unsigned int SurfaceTiling, 205 unsigned int BytePerPixel, 206 enum scan_direction_class ScanDirection, 207 unsigned int SwathWidth, 208 unsigned int ViewportHeight, 209 bool GPUVMEnable, 210 bool HostVMEnable, 211 unsigned int HostVMMaxNonCachedPageTableLevels, 212 unsigned int GPUVMMinPageSize, 213 unsigned int HostVMMinPageSize, 214 unsigned int PTEBufferSizeInRequests, 215 unsigned int Pitch, 216 unsigned int DCCMetaPitch, 217 unsigned int *MacroTileWidth, 218 unsigned int *MetaRowByte, 219 unsigned int *PixelPTEBytesPerRow, 220 bool *PTEBufferSizeNotExceeded, 221 int *dpte_row_width_ub, 222 unsigned int *dpte_row_height, 223 unsigned int *MetaRequestWidth, 224 unsigned int *MetaRequestHeight, 225 unsigned int *meta_row_width, 226 unsigned int *meta_row_height, 227 int *vm_group_bytes, 228 unsigned int *dpte_group_bytes, 229 unsigned int *PixelPTEReqWidth, 230 unsigned int *PixelPTEReqHeight, 231 unsigned int *PTERequestSize, 232 int *DPDE0BytesFrame, 233 int *MetaPTEBytesFrame); 234 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 235 static void CalculateRowBandwidth( 236 bool GPUVMEnable, 237 enum source_format_class SourcePixelFormat, 238 double VRatio, 239 double VRatioChroma, 240 bool DCCEnable, 241 double LineTime, 242 unsigned int MetaRowByteLuma, 243 unsigned int MetaRowByteChroma, 244 unsigned int meta_row_height_luma, 245 unsigned int meta_row_height_chroma, 246 unsigned int PixelPTEBytesPerRowLuma, 247 unsigned int PixelPTEBytesPerRowChroma, 248 unsigned int dpte_row_height_luma, 249 unsigned int dpte_row_height_chroma, 250 double *meta_row_bw, 251 double *dpte_row_bw); 252 253 static void CalculateFlipSchedule( 254 struct display_mode_lib *mode_lib, 255 unsigned int k, 256 double HostVMInefficiencyFactor, 257 double UrgentExtraLatency, 258 double UrgentLatency, 259 double PDEAndMetaPTEBytesPerFrame, 260 double MetaRowBytes, 261 double DPTEBytesPerRow); 262 static double CalculateWriteBackDelay( 263 enum source_format_class WritebackPixelFormat, 264 double WritebackHRatio, 265 double WritebackVRatio, 266 unsigned int WritebackVTaps, 267 int WritebackDestinationWidth, 268 int WritebackDestinationHeight, 269 int WritebackSourceHeight, 270 unsigned int HTotal); 271 272 static void CalculateVupdateAndDynamicMetadataParameters( 273 int MaxInterDCNTileRepeaters, 274 double DPPCLK, 275 double DISPCLK, 276 double DCFClkDeepSleep, 277 double PixelClock, 278 int HTotal, 279 int VBlank, 280 int DynamicMetadataTransmittedBytes, 281 int DynamicMetadataLinesBeforeActiveRequired, 282 int InterlaceEnable, 283 bool ProgressiveToInterlaceUnitInOPP, 284 double *TSetup, 285 double *Tdmbf, 286 double *Tdmec, 287 double *Tdmsks, 288 int *VUpdateOffsetPix, 289 double *VUpdateWidthPix, 290 double *VReadyOffsetPix); 291 292 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 293 struct display_mode_lib *mode_lib, 294 unsigned int PrefetchMode, 295 double DCFCLK, 296 double ReturnBW, 297 double UrgentLatency, 298 double ExtraLatency, 299 double SOCCLK, 300 double DCFCLKDeepSleep, 301 unsigned int DETBufferSizeY[], 302 unsigned int DETBufferSizeC[], 303 unsigned int SwathHeightY[], 304 unsigned int SwathHeightC[], 305 double SwathWidthY[], 306 double SwathWidthC[], 307 unsigned int DPPPerPlane[], 308 double BytePerPixelDETY[], 309 double BytePerPixelDETC[], 310 bool UnboundedRequestEnabled, 311 int unsigned CompressedBufferSizeInkByte, 312 enum clock_change_support *DRAMClockChangeSupport, 313 double *StutterExitWatermark, 314 double *StutterEnterPlusExitWatermark, 315 double *Z8StutterExitWatermark, 316 double *Z8StutterEnterPlusExitWatermark); 317 318 static void CalculateDCFCLKDeepSleep( 319 struct display_mode_lib *mode_lib, 320 unsigned int NumberOfActivePlanes, 321 int BytePerPixelY[], 322 int BytePerPixelC[], 323 double VRatio[], 324 double VRatioChroma[], 325 double SwathWidthY[], 326 double SwathWidthC[], 327 unsigned int DPPPerPlane[], 328 double HRatio[], 329 double HRatioChroma[], 330 double PixelClock[], 331 double PSCL_THROUGHPUT[], 332 double PSCL_THROUGHPUT_CHROMA[], 333 double DPPCLK[], 334 double ReadBandwidthLuma[], 335 double ReadBandwidthChroma[], 336 int ReturnBusWidth, 337 double *DCFCLKDeepSleep); 338 339 static void CalculateUrgentBurstFactor( 340 int swath_width_luma_ub, 341 int swath_width_chroma_ub, 342 unsigned int SwathHeightY, 343 unsigned int SwathHeightC, 344 double LineTime, 345 double UrgentLatency, 346 double CursorBufferSize, 347 unsigned int CursorWidth, 348 unsigned int CursorBPP, 349 double VRatio, 350 double VRatioC, 351 double BytePerPixelInDETY, 352 double BytePerPixelInDETC, 353 double DETBufferSizeY, 354 double DETBufferSizeC, 355 double *UrgentBurstFactorCursor, 356 double *UrgentBurstFactorLuma, 357 double *UrgentBurstFactorChroma, 358 bool *NotEnoughUrgentLatencyHiding); 359 360 static void UseMinimumDCFCLK( 361 struct display_mode_lib *mode_lib, 362 int MaxPrefetchMode, 363 int ReorderingBytes); 364 365 static void CalculatePixelDeliveryTimes( 366 unsigned int NumberOfActivePlanes, 367 double VRatio[], 368 double VRatioChroma[], 369 double VRatioPrefetchY[], 370 double VRatioPrefetchC[], 371 unsigned int swath_width_luma_ub[], 372 unsigned int swath_width_chroma_ub[], 373 unsigned int DPPPerPlane[], 374 double HRatio[], 375 double HRatioChroma[], 376 double PixelClock[], 377 double PSCL_THROUGHPUT[], 378 double PSCL_THROUGHPUT_CHROMA[], 379 double DPPCLK[], 380 int BytePerPixelC[], 381 enum scan_direction_class SourceScan[], 382 unsigned int NumberOfCursors[], 383 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 384 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 385 unsigned int BlockWidth256BytesY[], 386 unsigned int BlockHeight256BytesY[], 387 unsigned int BlockWidth256BytesC[], 388 unsigned int BlockHeight256BytesC[], 389 double DisplayPipeLineDeliveryTimeLuma[], 390 double DisplayPipeLineDeliveryTimeChroma[], 391 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 392 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 393 double DisplayPipeRequestDeliveryTimeLuma[], 394 double DisplayPipeRequestDeliveryTimeChroma[], 395 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 396 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 397 double CursorRequestDeliveryTime[], 398 double CursorRequestDeliveryTimePrefetch[]); 399 400 static void CalculateMetaAndPTETimes( 401 int NumberOfActivePlanes, 402 bool GPUVMEnable, 403 int MetaChunkSize, 404 int MinMetaChunkSizeBytes, 405 int HTotal[], 406 double VRatio[], 407 double VRatioChroma[], 408 double DestinationLinesToRequestRowInVBlank[], 409 double DestinationLinesToRequestRowInImmediateFlip[], 410 bool DCCEnable[], 411 double PixelClock[], 412 int BytePerPixelY[], 413 int BytePerPixelC[], 414 enum scan_direction_class SourceScan[], 415 int dpte_row_height[], 416 int dpte_row_height_chroma[], 417 int meta_row_width[], 418 int meta_row_width_chroma[], 419 int meta_row_height[], 420 int meta_row_height_chroma[], 421 int meta_req_width[], 422 int meta_req_width_chroma[], 423 int meta_req_height[], 424 int meta_req_height_chroma[], 425 int dpte_group_bytes[], 426 int PTERequestSizeY[], 427 int PTERequestSizeC[], 428 int PixelPTEReqWidthY[], 429 int PixelPTEReqHeightY[], 430 int PixelPTEReqWidthC[], 431 int PixelPTEReqHeightC[], 432 int dpte_row_width_luma_ub[], 433 int dpte_row_width_chroma_ub[], 434 double DST_Y_PER_PTE_ROW_NOM_L[], 435 double DST_Y_PER_PTE_ROW_NOM_C[], 436 double DST_Y_PER_META_ROW_NOM_L[], 437 double DST_Y_PER_META_ROW_NOM_C[], 438 double TimePerMetaChunkNominal[], 439 double TimePerChromaMetaChunkNominal[], 440 double TimePerMetaChunkVBlank[], 441 double TimePerChromaMetaChunkVBlank[], 442 double TimePerMetaChunkFlip[], 443 double TimePerChromaMetaChunkFlip[], 444 double time_per_pte_group_nom_luma[], 445 double time_per_pte_group_vblank_luma[], 446 double time_per_pte_group_flip_luma[], 447 double time_per_pte_group_nom_chroma[], 448 double time_per_pte_group_vblank_chroma[], 449 double time_per_pte_group_flip_chroma[]); 450 451 static void CalculateVMGroupAndRequestTimes( 452 unsigned int NumberOfActivePlanes, 453 bool GPUVMEnable, 454 unsigned int GPUVMMaxPageTableLevels, 455 unsigned int HTotal[], 456 int BytePerPixelC[], 457 double DestinationLinesToRequestVMInVBlank[], 458 double DestinationLinesToRequestVMInImmediateFlip[], 459 bool DCCEnable[], 460 double PixelClock[], 461 int dpte_row_width_luma_ub[], 462 int dpte_row_width_chroma_ub[], 463 int vm_group_bytes[], 464 unsigned int dpde0_bytes_per_frame_ub_l[], 465 unsigned int dpde0_bytes_per_frame_ub_c[], 466 int meta_pte_bytes_per_frame_ub_l[], 467 int meta_pte_bytes_per_frame_ub_c[], 468 double TimePerVMGroupVBlank[], 469 double TimePerVMGroupFlip[], 470 double TimePerVMRequestVBlank[], 471 double TimePerVMRequestFlip[]); 472 473 static void CalculateStutterEfficiency( 474 struct display_mode_lib *mode_lib, 475 int CompressedBufferSizeInkByte, 476 bool UnboundedRequestEnabled, 477 int ConfigReturnBufferSizeInKByte, 478 int MetaFIFOSizeInKEntries, 479 int ZeroSizeBufferEntries, 480 int NumberOfActivePlanes, 481 int ROBBufferSizeInKByte, 482 double TotalDataReadBandwidth, 483 double DCFCLK, 484 double ReturnBW, 485 double COMPBUF_RESERVED_SPACE_64B, 486 double COMPBUF_RESERVED_SPACE_ZS, 487 double SRExitTime, 488 double SRExitZ8Time, 489 bool SynchronizedVBlank, 490 double Z8StutterEnterPlusExitWatermark, 491 double StutterEnterPlusExitWatermark, 492 bool ProgressiveToInterlaceUnitInOPP, 493 bool Interlace[], 494 double MinTTUVBlank[], 495 int DPPPerPlane[], 496 unsigned int DETBufferSizeY[], 497 int BytePerPixelY[], 498 double BytePerPixelDETY[], 499 double SwathWidthY[], 500 int SwathHeightY[], 501 int SwathHeightC[], 502 double NetDCCRateLuma[], 503 double NetDCCRateChroma[], 504 double DCCFractionOfZeroSizeRequestsLuma[], 505 double DCCFractionOfZeroSizeRequestsChroma[], 506 int HTotal[], 507 int VTotal[], 508 double PixelClock[], 509 double VRatio[], 510 enum scan_direction_class SourceScan[], 511 int BlockHeight256BytesY[], 512 int BlockWidth256BytesY[], 513 int BlockHeight256BytesC[], 514 int BlockWidth256BytesC[], 515 int DCCYMaxUncompressedBlock[], 516 int DCCCMaxUncompressedBlock[], 517 int VActive[], 518 bool DCCEnable[], 519 bool WritebackEnable[], 520 double ReadBandwidthPlaneLuma[], 521 double ReadBandwidthPlaneChroma[], 522 double meta_row_bw[], 523 double dpte_row_bw[], 524 double *StutterEfficiencyNotIncludingVBlank, 525 double *StutterEfficiency, 526 int *NumberOfStutterBurstsPerFrame, 527 double *Z8StutterEfficiencyNotIncludingVBlank, 528 double *Z8StutterEfficiency, 529 int *Z8NumberOfStutterBurstsPerFrame, 530 double *StutterPeriod); 531 532 static void CalculateSwathAndDETConfiguration( 533 bool ForceSingleDPP, 534 int NumberOfActivePlanes, 535 bool DETSharedByAllDPP, 536 unsigned int DETBufferSizeInKByte[], 537 double MaximumSwathWidthLuma[], 538 double MaximumSwathWidthChroma[], 539 enum scan_direction_class SourceScan[], 540 enum source_format_class SourcePixelFormat[], 541 enum dm_swizzle_mode SurfaceTiling[], 542 int ViewportWidth[], 543 int ViewportHeight[], 544 int SurfaceWidthY[], 545 int SurfaceWidthC[], 546 int SurfaceHeightY[], 547 int SurfaceHeightC[], 548 int Read256BytesBlockHeightY[], 549 int Read256BytesBlockHeightC[], 550 int Read256BytesBlockWidthY[], 551 int Read256BytesBlockWidthC[], 552 enum odm_combine_mode ODMCombineEnabled[], 553 int BlendingAndTiming[], 554 int BytePerPixY[], 555 int BytePerPixC[], 556 double BytePerPixDETY[], 557 double BytePerPixDETC[], 558 int HActive[], 559 double HRatio[], 560 double HRatioChroma[], 561 int DPPPerPlane[], 562 int swath_width_luma_ub[], 563 int swath_width_chroma_ub[], 564 double SwathWidth[], 565 double SwathWidthChroma[], 566 int SwathHeightY[], 567 int SwathHeightC[], 568 unsigned int DETBufferSizeY[], 569 unsigned int DETBufferSizeC[], 570 bool ViewportSizeSupportPerPlane[], 571 bool *ViewportSizeSupport); 572 static void CalculateSwathWidth( 573 bool ForceSingleDPP, 574 int NumberOfActivePlanes, 575 enum source_format_class SourcePixelFormat[], 576 enum scan_direction_class SourceScan[], 577 int ViewportWidth[], 578 int ViewportHeight[], 579 int SurfaceWidthY[], 580 int SurfaceWidthC[], 581 int SurfaceHeightY[], 582 int SurfaceHeightC[], 583 enum odm_combine_mode ODMCombineEnabled[], 584 int BytePerPixY[], 585 int BytePerPixC[], 586 int Read256BytesBlockHeightY[], 587 int Read256BytesBlockHeightC[], 588 int Read256BytesBlockWidthY[], 589 int Read256BytesBlockWidthC[], 590 int BlendingAndTiming[], 591 int HActive[], 592 double HRatio[], 593 int DPPPerPlane[], 594 double SwathWidthSingleDPPY[], 595 double SwathWidthSingleDPPC[], 596 double SwathWidthY[], 597 double SwathWidthC[], 598 int MaximumSwathHeightY[], 599 int MaximumSwathHeightC[], 600 int swath_width_luma_ub[], 601 int swath_width_chroma_ub[]); 602 603 static double CalculateExtraLatency( 604 int RoundTripPingLatencyCycles, 605 int ReorderingBytes, 606 double DCFCLK, 607 int TotalNumberOfActiveDPP, 608 int PixelChunkSizeInKByte, 609 int TotalNumberOfDCCActiveDPP, 610 int MetaChunkSize, 611 double ReturnBW, 612 bool GPUVMEnable, 613 bool HostVMEnable, 614 int NumberOfActivePlanes, 615 int NumberOfDPP[], 616 int dpte_group_bytes[], 617 double HostVMInefficiencyFactor, 618 double HostVMMinPageSize, 619 int HostVMMaxNonCachedPageTableLevels); 620 621 static double CalculateExtraLatencyBytes( 622 int ReorderingBytes, 623 int TotalNumberOfActiveDPP, 624 int PixelChunkSizeInKByte, 625 int TotalNumberOfDCCActiveDPP, 626 int MetaChunkSize, 627 bool GPUVMEnable, 628 bool HostVMEnable, 629 int NumberOfActivePlanes, 630 int NumberOfDPP[], 631 int dpte_group_bytes[], 632 double HostVMInefficiencyFactor, 633 double HostVMMinPageSize, 634 int HostVMMaxNonCachedPageTableLevels); 635 636 static double CalculateUrgentLatency( 637 double UrgentLatencyPixelDataOnly, 638 double UrgentLatencyPixelMixedWithVMData, 639 double UrgentLatencyVMDataOnly, 640 bool DoUrgentLatencyAdjustment, 641 double UrgentLatencyAdjustmentFabricClockComponent, 642 double UrgentLatencyAdjustmentFabricClockReference, 643 double FabricClockSingle); 644 645 static void CalculateUnboundedRequestAndCompressedBufferSize( 646 unsigned int DETBufferSizeInKByte, 647 int ConfigReturnBufferSizeInKByte, 648 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 649 int TotalActiveDPP, 650 bool NoChromaPlanes, 651 int MaxNumDPP, 652 int CompressedBufferSegmentSizeInkByteFinal, 653 enum output_encoder_class *Output, 654 bool *UnboundedRequestEnabled, 655 int *CompressedBufferSizeInkByte); 656 657 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 658 659 void dml31_recalculate(struct display_mode_lib *mode_lib) 660 { 661 ModeSupportAndSystemConfiguration(mode_lib); 662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 663 DisplayPipeConfiguration(mode_lib); 664 #ifdef __DML_VBA_DEBUG__ 665 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 666 #endif 667 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 668 } 669 670 static unsigned int dscceComputeDelay( 671 unsigned int bpc, 672 double BPP, 673 unsigned int sliceWidth, 674 unsigned int numSlices, 675 enum output_format_class pixelFormat, 676 enum output_encoder_class Output) 677 { 678 // valid bpc = source bits per component in the set of {8, 10, 12} 679 // valid bpp = increments of 1/16 of a bit 680 // min = 6/7/8 in N420/N422/444, respectively 681 // max = such that compression is 1:1 682 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 683 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 684 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 685 686 // fixed value 687 unsigned int rcModelSize = 8192; 688 689 // N422/N420 operate at 2 pixels per clock 690 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 691 692 if (pixelFormat == dm_420) 693 pixelsPerClock = 2; 694 else if (pixelFormat == dm_444) 695 pixelsPerClock = 1; 696 else if (pixelFormat == dm_n422) 697 pixelsPerClock = 2; 698 // #all other modes operate at 1 pixel per clock 699 else 700 pixelsPerClock = 1; 701 702 //initial transmit delay as per PPS 703 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 704 705 //compute ssm delay 706 if (bpc == 8) 707 D = 81; 708 else if (bpc == 10) 709 D = 89; 710 else 711 D = 113; 712 713 //divide by pixel per cycle to compute slice width as seen by DSC 714 w = sliceWidth / pixelsPerClock; 715 716 //422 mode has an additional cycle of delay 717 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 718 s = 0; 719 else 720 s = 1; 721 722 //main calculation for the dscce 723 ix = initalXmitDelay + 45; 724 wx = (w + 2) / 3; 725 P = 3 * wx - w; 726 l0 = ix / w; 727 a = ix + P * l0; 728 ax = (a + 2) / 3 + D + 6 + 1; 729 L = (ax + wx - 1) / wx; 730 if ((ix % w) == 0 && P != 0) 731 lstall = 1; 732 else 733 lstall = 0; 734 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 735 736 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 737 pixels = Delay * 3 * pixelsPerClock; 738 return pixels; 739 } 740 741 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 742 { 743 unsigned int Delay = 0; 744 745 if (pixelFormat == dm_420) { 746 // sfr 747 Delay = Delay + 2; 748 // dsccif 749 Delay = Delay + 0; 750 // dscc - input deserializer 751 Delay = Delay + 3; 752 // dscc gets pixels every other cycle 753 Delay = Delay + 2; 754 // dscc - input cdc fifo 755 Delay = Delay + 12; 756 // dscc gets pixels every other cycle 757 Delay = Delay + 13; 758 // dscc - cdc uncertainty 759 Delay = Delay + 2; 760 // dscc - output cdc fifo 761 Delay = Delay + 7; 762 // dscc gets pixels every other cycle 763 Delay = Delay + 3; 764 // dscc - cdc uncertainty 765 Delay = Delay + 2; 766 // dscc - output serializer 767 Delay = Delay + 1; 768 // sft 769 Delay = Delay + 1; 770 } else if (pixelFormat == dm_n422) { 771 // sfr 772 Delay = Delay + 2; 773 // dsccif 774 Delay = Delay + 1; 775 // dscc - input deserializer 776 Delay = Delay + 5; 777 // dscc - input cdc fifo 778 Delay = Delay + 25; 779 // dscc - cdc uncertainty 780 Delay = Delay + 2; 781 // dscc - output cdc fifo 782 Delay = Delay + 10; 783 // dscc - cdc uncertainty 784 Delay = Delay + 2; 785 // dscc - output serializer 786 Delay = Delay + 1; 787 // sft 788 Delay = Delay + 1; 789 } else { 790 // sfr 791 Delay = Delay + 2; 792 // dsccif 793 Delay = Delay + 0; 794 // dscc - input deserializer 795 Delay = Delay + 3; 796 // dscc - input cdc fifo 797 Delay = Delay + 12; 798 // dscc - cdc uncertainty 799 Delay = Delay + 2; 800 // dscc - output cdc fifo 801 Delay = Delay + 7; 802 // dscc - output serializer 803 Delay = Delay + 1; 804 // dscc - cdc uncertainty 805 Delay = Delay + 2; 806 // sft 807 Delay = Delay + 1; 808 } 809 810 return Delay; 811 } 812 813 static bool CalculatePrefetchSchedule( 814 struct display_mode_lib *mode_lib, 815 double HostVMInefficiencyFactor, 816 Pipe *myPipe, 817 unsigned int DSCDelay, 818 double DPPCLKDelaySubtotalPlusCNVCFormater, 819 double DPPCLKDelaySCL, 820 double DPPCLKDelaySCLLBOnly, 821 double DPPCLKDelayCNVCCursor, 822 double DISPCLKDelaySubtotal, 823 unsigned int DPP_RECOUT_WIDTH, 824 enum output_format_class OutputFormat, 825 unsigned int MaxInterDCNTileRepeaters, 826 unsigned int VStartup, 827 unsigned int MaxVStartup, 828 unsigned int GPUVMPageTableLevels, 829 bool GPUVMEnable, 830 bool HostVMEnable, 831 unsigned int HostVMMaxNonCachedPageTableLevels, 832 double HostVMMinPageSize, 833 bool DynamicMetadataEnable, 834 bool DynamicMetadataVMEnabled, 835 int DynamicMetadataLinesBeforeActiveRequired, 836 unsigned int DynamicMetadataTransmittedBytes, 837 double UrgentLatency, 838 double UrgentExtraLatency, 839 double TCalc, 840 unsigned int PDEAndMetaPTEBytesFrame, 841 unsigned int MetaRowByte, 842 unsigned int PixelPTEBytesPerRow, 843 double PrefetchSourceLinesY, 844 unsigned int SwathWidthY, 845 double VInitPreFillY, 846 unsigned int MaxNumSwathY, 847 double PrefetchSourceLinesC, 848 unsigned int SwathWidthC, 849 double VInitPreFillC, 850 unsigned int MaxNumSwathC, 851 int swath_width_luma_ub, 852 int swath_width_chroma_ub, 853 unsigned int SwathHeightY, 854 unsigned int SwathHeightC, 855 double TWait, 856 double *DSTXAfterScaler, 857 double *DSTYAfterScaler, 858 double *DestinationLinesForPrefetch, 859 double *PrefetchBandwidth, 860 double *DestinationLinesToRequestVMInVBlank, 861 double *DestinationLinesToRequestRowInVBlank, 862 double *VRatioPrefetchY, 863 double *VRatioPrefetchC, 864 double *RequiredPrefetchPixDataBWLuma, 865 double *RequiredPrefetchPixDataBWChroma, 866 bool *NotEnoughTimeForDynamicMetadata, 867 double *Tno_bw, 868 double *prefetch_vmrow_bw, 869 double *Tdmdl_vm, 870 double *Tdmdl, 871 double *TSetup, 872 int *VUpdateOffsetPix, 873 double *VUpdateWidthPix, 874 double *VReadyOffsetPix) 875 { 876 bool MyError = false; 877 unsigned int DPPCycles, DISPCLKCycles; 878 double DSTTotalPixelsAfterScaler; 879 double LineTime; 880 double dst_y_prefetch_equ; 881 #ifdef __DML_VBA_DEBUG__ 882 double Tsw_oto; 883 #endif 884 double prefetch_bw_oto; 885 double prefetch_bw_pr; 886 double Tvm_oto; 887 double Tr0_oto; 888 double Tvm_oto_lines; 889 double Tr0_oto_lines; 890 double dst_y_prefetch_oto; 891 double TimeForFetchingMetaPTE = 0; 892 double TimeForFetchingRowInVBlank = 0; 893 double LinesToRequestPrefetchPixelData = 0; 894 unsigned int HostVMDynamicLevelsTrips; 895 double trip_to_mem; 896 double Tvm_trips; 897 double Tr0_trips; 898 double Tvm_trips_rounded; 899 double Tr0_trips_rounded; 900 double Lsw_oto; 901 double Tpre_rounded; 902 double prefetch_bw_equ; 903 double Tvm_equ; 904 double Tr0_equ; 905 double Tdmbf; 906 double Tdmec; 907 double Tdmsks; 908 double prefetch_sw_bytes; 909 double bytes_pp; 910 double dep_bytes; 911 int max_vratio_pre = 4; 912 double min_Lsw; 913 double Tsw_est1 = 0; 914 double Tsw_est3 = 0; 915 double max_Tsw = 0; 916 917 if (GPUVMEnable == true && HostVMEnable == true) { 918 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 919 } else { 920 HostVMDynamicLevelsTrips = 0; 921 } 922 #ifdef __DML_VBA_DEBUG__ 923 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 924 #endif 925 CalculateVupdateAndDynamicMetadataParameters( 926 MaxInterDCNTileRepeaters, 927 myPipe->DPPCLK, 928 myPipe->DISPCLK, 929 myPipe->DCFCLKDeepSleep, 930 myPipe->PixelClock, 931 myPipe->HTotal, 932 myPipe->VBlank, 933 DynamicMetadataTransmittedBytes, 934 DynamicMetadataLinesBeforeActiveRequired, 935 myPipe->InterlaceEnable, 936 myPipe->ProgressiveToInterlaceUnitInOPP, 937 TSetup, 938 &Tdmbf, 939 &Tdmec, 940 &Tdmsks, 941 VUpdateOffsetPix, 942 VUpdateWidthPix, 943 VReadyOffsetPix); 944 945 LineTime = myPipe->HTotal / myPipe->PixelClock; 946 trip_to_mem = UrgentLatency; 947 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 948 949 #ifdef __DML_VBA_ALLOW_DELTA__ 950 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 951 #else 952 if (DynamicMetadataVMEnabled == true) { 953 #endif 954 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 955 } else { 956 *Tdmdl = TWait + UrgentExtraLatency; 957 } 958 959 #ifdef __DML_VBA_ALLOW_DELTA__ 960 if (DynamicMetadataEnable == false) { 961 *Tdmdl = 0.0; 962 } 963 #endif 964 965 if (DynamicMetadataEnable == true) { 966 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 967 *NotEnoughTimeForDynamicMetadata = true; 968 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 969 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 970 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 971 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 972 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl); 973 } else { 974 *NotEnoughTimeForDynamicMetadata = false; 975 } 976 } else { 977 *NotEnoughTimeForDynamicMetadata = false; 978 } 979 980 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 981 982 if (myPipe->ScalerEnabled) 983 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 984 else 985 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 986 987 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 988 989 DISPCLKCycles = DISPCLKDelaySubtotal; 990 991 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 992 return true; 993 994 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 995 996 #ifdef __DML_VBA_DEBUG__ 997 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 998 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 999 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 1000 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 1001 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1002 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1003 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1004 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1005 #endif 1006 1007 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1008 1009 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1010 *DSTYAfterScaler = 1; 1011 else 1012 *DSTYAfterScaler = 0; 1013 1014 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1015 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1016 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1017 1018 #ifdef __DML_VBA_DEBUG__ 1019 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1020 #endif 1021 1022 MyError = false; 1023 1024 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1025 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1026 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1027 1028 #ifdef __DML_VBA_ALLOW_DELTA__ 1029 if (!myPipe->DCCEnable) { 1030 Tr0_trips = 0.0; 1031 Tr0_trips_rounded = 0.0; 1032 } 1033 #endif 1034 1035 if (!GPUVMEnable) { 1036 Tvm_trips = 0.0; 1037 Tvm_trips_rounded = 0.0; 1038 } 1039 1040 if (GPUVMEnable) { 1041 if (GPUVMPageTableLevels >= 3) { 1042 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1043 } else { 1044 *Tno_bw = 0; 1045 } 1046 } else if (!myPipe->DCCEnable) { 1047 *Tno_bw = LineTime; 1048 } else { 1049 *Tno_bw = LineTime / 4; 1050 } 1051 1052 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1053 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1054 else 1055 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1056 /*rev 99*/ 1057 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane; 1058 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr; 1059 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; 1060 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1061 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); 1062 1063 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); 1064 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1065 #ifdef __DML_VBA_DEBUG__ 1066 Tsw_oto = Lsw_oto * LineTime; 1067 #endif 1068 1069 1070 #ifdef __DML_VBA_DEBUG__ 1071 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1072 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1073 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1074 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1075 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1076 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1077 #endif 1078 1079 if (GPUVMEnable == true) 1080 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1081 else 1082 Tvm_oto = LineTime / 4.0; 1083 1084 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1085 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1086 LineTime - Tvm_oto, 1087 LineTime / 4); 1088 } else { 1089 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1090 } 1091 1092 #ifdef __DML_VBA_DEBUG__ 1093 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1094 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1095 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1096 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1097 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1098 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1099 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1100 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1101 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1102 #endif 1103 1104 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1105 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1106 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1107 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1108 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1109 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1110 1111 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1112 1113 if (prefetch_sw_bytes < dep_bytes) 1114 prefetch_sw_bytes = 2 * dep_bytes; 1115 1116 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1117 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1118 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1119 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1120 dml_print("DML: LineTime: %f\n", LineTime); 1121 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1122 1123 dml_print("DML: LineTime: %f\n", LineTime); 1124 dml_print("DML: VStartup: %d\n", VStartup); 1125 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1126 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1127 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1128 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1129 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1130 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1131 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1132 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); 1133 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); 1134 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); 1135 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler); 1136 1137 *PrefetchBandwidth = 0; 1138 *DestinationLinesToRequestVMInVBlank = 0; 1139 *DestinationLinesToRequestRowInVBlank = 0; 1140 *VRatioPrefetchY = 0; 1141 *VRatioPrefetchC = 0; 1142 *RequiredPrefetchPixDataBWLuma = 0; 1143 if (dst_y_prefetch_equ > 1) { 1144 double PrefetchBandwidth1; 1145 double PrefetchBandwidth2; 1146 double PrefetchBandwidth3; 1147 double PrefetchBandwidth4; 1148 1149 if (Tpre_rounded - *Tno_bw > 0) { 1150 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1151 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1152 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1153 } else { 1154 PrefetchBandwidth1 = 0; 1155 } 1156 1157 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1158 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1159 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1160 } 1161 1162 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1163 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1164 else 1165 PrefetchBandwidth2 = 0; 1166 1167 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1168 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1169 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1170 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1171 } else { 1172 PrefetchBandwidth3 = 0; 1173 } 1174 1175 #ifdef __DML_VBA_DEBUG__ 1176 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1177 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1178 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1179 #endif 1180 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1181 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1182 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1183 } 1184 1185 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1186 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1187 else 1188 PrefetchBandwidth4 = 0; 1189 1190 { 1191 bool Case1OK; 1192 bool Case2OK; 1193 bool Case3OK; 1194 1195 if (PrefetchBandwidth1 > 0) { 1196 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1197 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1198 Case1OK = true; 1199 } else { 1200 Case1OK = false; 1201 } 1202 } else { 1203 Case1OK = false; 1204 } 1205 1206 if (PrefetchBandwidth2 > 0) { 1207 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1208 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1209 Case2OK = true; 1210 } else { 1211 Case2OK = false; 1212 } 1213 } else { 1214 Case2OK = false; 1215 } 1216 1217 if (PrefetchBandwidth3 > 0) { 1218 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1219 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1220 Case3OK = true; 1221 } else { 1222 Case3OK = false; 1223 } 1224 } else { 1225 Case3OK = false; 1226 } 1227 1228 if (Case1OK) { 1229 prefetch_bw_equ = PrefetchBandwidth1; 1230 } else if (Case2OK) { 1231 prefetch_bw_equ = PrefetchBandwidth2; 1232 } else if (Case3OK) { 1233 prefetch_bw_equ = PrefetchBandwidth3; 1234 } else { 1235 prefetch_bw_equ = PrefetchBandwidth4; 1236 } 1237 1238 #ifdef __DML_VBA_DEBUG__ 1239 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1240 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1241 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1242 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1243 #endif 1244 1245 if (prefetch_bw_equ > 0) { 1246 if (GPUVMEnable == true) { 1247 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1248 } else { 1249 Tvm_equ = LineTime / 4; 1250 } 1251 1252 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1253 Tr0_equ = dml_max4( 1254 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1255 Tr0_trips, 1256 (LineTime - Tvm_equ) / 2, 1257 LineTime / 4); 1258 } else { 1259 Tr0_equ = (LineTime - Tvm_equ) / 2; 1260 } 1261 } else { 1262 Tvm_equ = 0; 1263 Tr0_equ = 0; 1264 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1265 } 1266 } 1267 1268 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1269 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1270 TimeForFetchingMetaPTE = Tvm_oto; 1271 TimeForFetchingRowInVBlank = Tr0_oto; 1272 *PrefetchBandwidth = prefetch_bw_oto; 1273 } else { 1274 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1275 TimeForFetchingMetaPTE = Tvm_equ; 1276 TimeForFetchingRowInVBlank = Tr0_equ; 1277 *PrefetchBandwidth = prefetch_bw_equ; 1278 } 1279 1280 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1281 1282 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1283 1284 #ifdef __DML_VBA_ALLOW_DELTA__ 1285 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1286 // See note above dated 5/30/2018 1287 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1288 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1289 #else 1290 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1291 #endif 1292 1293 #ifdef __DML_VBA_DEBUG__ 1294 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1295 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1296 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1297 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1298 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1299 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1300 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1301 #endif 1302 1303 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1304 1305 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1306 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1307 #ifdef __DML_VBA_DEBUG__ 1308 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1309 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1310 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1311 #endif 1312 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1313 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1314 *VRatioPrefetchY = dml_max( 1315 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1316 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1317 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1318 } else { 1319 MyError = true; 1320 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1321 *VRatioPrefetchY = 0; 1322 } 1323 #ifdef __DML_VBA_DEBUG__ 1324 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1325 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1326 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1327 #endif 1328 } 1329 1330 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1331 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1332 1333 #ifdef __DML_VBA_DEBUG__ 1334 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1335 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1336 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1337 #endif 1338 if ((SwathHeightC > 4) || VInitPreFillC > 3) { 1339 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1340 *VRatioPrefetchC = dml_max( 1341 *VRatioPrefetchC, 1342 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1343 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1344 } else { 1345 MyError = true; 1346 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1347 *VRatioPrefetchC = 0; 1348 } 1349 #ifdef __DML_VBA_DEBUG__ 1350 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1351 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1352 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1353 #endif 1354 } 1355 1356 #ifdef __DML_VBA_DEBUG__ 1357 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1358 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1359 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1360 #endif 1361 1362 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1363 1364 #ifdef __DML_VBA_DEBUG__ 1365 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1366 #endif 1367 1368 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1369 / LineTime; 1370 } else { 1371 MyError = true; 1372 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1373 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1374 *VRatioPrefetchY = 0; 1375 *VRatioPrefetchC = 0; 1376 *RequiredPrefetchPixDataBWLuma = 0; 1377 *RequiredPrefetchPixDataBWChroma = 0; 1378 } 1379 1380 dml_print( 1381 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1382 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1383 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1384 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1385 dml_print( 1386 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1387 (double) LinesToRequestPrefetchPixelData * LineTime); 1388 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 1389 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / 1390 (double) myPipe->HTotal)) * LineTime); 1391 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1392 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", 1393 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1394 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1395 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1396 1397 } else { 1398 MyError = true; 1399 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1400 } 1401 1402 { 1403 double prefetch_vm_bw; 1404 double prefetch_row_bw; 1405 1406 if (PDEAndMetaPTEBytesFrame == 0) { 1407 prefetch_vm_bw = 0; 1408 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1409 #ifdef __DML_VBA_DEBUG__ 1410 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1411 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1412 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1413 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1414 #endif 1415 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1416 #ifdef __DML_VBA_DEBUG__ 1417 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1418 #endif 1419 } else { 1420 prefetch_vm_bw = 0; 1421 MyError = true; 1422 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1423 } 1424 1425 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1426 prefetch_row_bw = 0; 1427 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1428 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1429 1430 #ifdef __DML_VBA_DEBUG__ 1431 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1432 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1433 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1434 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1435 #endif 1436 } else { 1437 prefetch_row_bw = 0; 1438 MyError = true; 1439 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1440 } 1441 1442 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1443 } 1444 1445 if (MyError) { 1446 *PrefetchBandwidth = 0; 1447 TimeForFetchingMetaPTE = 0; 1448 TimeForFetchingRowInVBlank = 0; 1449 *DestinationLinesToRequestVMInVBlank = 0; 1450 *DestinationLinesToRequestRowInVBlank = 0; 1451 *DestinationLinesForPrefetch = 0; 1452 LinesToRequestPrefetchPixelData = 0; 1453 *VRatioPrefetchY = 0; 1454 *VRatioPrefetchC = 0; 1455 *RequiredPrefetchPixDataBWLuma = 0; 1456 *RequiredPrefetchPixDataBWChroma = 0; 1457 } 1458 1459 return MyError; 1460 } 1461 1462 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1463 { 1464 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1465 } 1466 1467 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1468 { 1469 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1470 } 1471 1472 static void CalculateDCCConfiguration( 1473 bool DCCEnabled, 1474 bool DCCProgrammingAssumesScanDirectionUnknown, 1475 enum source_format_class SourcePixelFormat, 1476 unsigned int SurfaceWidthLuma, 1477 unsigned int SurfaceWidthChroma, 1478 unsigned int SurfaceHeightLuma, 1479 unsigned int SurfaceHeightChroma, 1480 double DETBufferSize, 1481 unsigned int RequestHeight256ByteLuma, 1482 unsigned int RequestHeight256ByteChroma, 1483 enum dm_swizzle_mode TilingFormat, 1484 unsigned int BytePerPixelY, 1485 unsigned int BytePerPixelC, 1486 double BytePerPixelDETY, 1487 double BytePerPixelDETC, 1488 enum scan_direction_class ScanOrientation, 1489 unsigned int *MaxUncompressedBlockLuma, 1490 unsigned int *MaxUncompressedBlockChroma, 1491 unsigned int *MaxCompressedBlockLuma, 1492 unsigned int *MaxCompressedBlockChroma, 1493 unsigned int *IndependentBlockLuma, 1494 unsigned int *IndependentBlockChroma) 1495 { 1496 int yuv420; 1497 int horz_div_l; 1498 int horz_div_c; 1499 int vert_div_l; 1500 int vert_div_c; 1501 1502 int swath_buf_size; 1503 double detile_buf_vp_horz_limit; 1504 double detile_buf_vp_vert_limit; 1505 1506 int MAS_vp_horz_limit; 1507 int MAS_vp_vert_limit; 1508 int max_vp_horz_width; 1509 int max_vp_vert_height; 1510 int eff_surf_width_l; 1511 int eff_surf_width_c; 1512 int eff_surf_height_l; 1513 int eff_surf_height_c; 1514 1515 int full_swath_bytes_horz_wc_l; 1516 int full_swath_bytes_horz_wc_c; 1517 int full_swath_bytes_vert_wc_l; 1518 int full_swath_bytes_vert_wc_c; 1519 int req128_horz_wc_l; 1520 int req128_horz_wc_c; 1521 int req128_vert_wc_l; 1522 int req128_vert_wc_c; 1523 int segment_order_horz_contiguous_luma; 1524 int segment_order_horz_contiguous_chroma; 1525 int segment_order_vert_contiguous_luma; 1526 int segment_order_vert_contiguous_chroma; 1527 1528 typedef enum { 1529 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1530 } RequestType; 1531 RequestType RequestLuma; 1532 RequestType RequestChroma; 1533 1534 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1535 horz_div_l = 1; 1536 horz_div_c = 1; 1537 vert_div_l = 1; 1538 vert_div_c = 1; 1539 1540 if (BytePerPixelY == 1) 1541 vert_div_l = 0; 1542 if (BytePerPixelC == 1) 1543 vert_div_c = 0; 1544 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1545 horz_div_l = 0; 1546 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1547 horz_div_c = 0; 1548 1549 if (BytePerPixelC == 0) { 1550 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1551 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1552 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1553 } else { 1554 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1555 detile_buf_vp_horz_limit = (double) swath_buf_size 1556 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1557 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1558 detile_buf_vp_vert_limit = (double) swath_buf_size 1559 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1560 } 1561 1562 if (SourcePixelFormat == dm_420_10) { 1563 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1564 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1565 } 1566 1567 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1568 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1569 1570 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1571 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1572 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1573 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1574 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1575 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1576 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1577 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1578 1579 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1580 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1581 if (BytePerPixelC > 0) { 1582 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1583 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1584 } else { 1585 full_swath_bytes_horz_wc_c = 0; 1586 full_swath_bytes_vert_wc_c = 0; 1587 } 1588 1589 if (SourcePixelFormat == dm_420_10) { 1590 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1591 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1592 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1593 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1594 } 1595 1596 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1597 req128_horz_wc_l = 0; 1598 req128_horz_wc_c = 0; 1599 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1600 req128_horz_wc_l = 0; 1601 req128_horz_wc_c = 1; 1602 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1603 req128_horz_wc_l = 1; 1604 req128_horz_wc_c = 0; 1605 } else { 1606 req128_horz_wc_l = 1; 1607 req128_horz_wc_c = 1; 1608 } 1609 1610 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1611 req128_vert_wc_l = 0; 1612 req128_vert_wc_c = 0; 1613 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1614 req128_vert_wc_l = 0; 1615 req128_vert_wc_c = 1; 1616 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1617 req128_vert_wc_l = 1; 1618 req128_vert_wc_c = 0; 1619 } else { 1620 req128_vert_wc_l = 1; 1621 req128_vert_wc_c = 1; 1622 } 1623 1624 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1625 segment_order_horz_contiguous_luma = 0; 1626 } else { 1627 segment_order_horz_contiguous_luma = 1; 1628 } 1629 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1630 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1631 segment_order_vert_contiguous_luma = 0; 1632 } else { 1633 segment_order_vert_contiguous_luma = 1; 1634 } 1635 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1636 segment_order_horz_contiguous_chroma = 0; 1637 } else { 1638 segment_order_horz_contiguous_chroma = 1; 1639 } 1640 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1641 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1642 segment_order_vert_contiguous_chroma = 0; 1643 } else { 1644 segment_order_vert_contiguous_chroma = 1; 1645 } 1646 1647 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1648 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1649 RequestLuma = REQ_256Bytes; 1650 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1651 RequestLuma = REQ_128BytesNonContiguous; 1652 } else { 1653 RequestLuma = REQ_128BytesContiguous; 1654 } 1655 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1656 RequestChroma = REQ_256Bytes; 1657 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1658 RequestChroma = REQ_128BytesNonContiguous; 1659 } else { 1660 RequestChroma = REQ_128BytesContiguous; 1661 } 1662 } else if (ScanOrientation != dm_vert) { 1663 if (req128_horz_wc_l == 0) { 1664 RequestLuma = REQ_256Bytes; 1665 } else if (segment_order_horz_contiguous_luma == 0) { 1666 RequestLuma = REQ_128BytesNonContiguous; 1667 } else { 1668 RequestLuma = REQ_128BytesContiguous; 1669 } 1670 if (req128_horz_wc_c == 0) { 1671 RequestChroma = REQ_256Bytes; 1672 } else if (segment_order_horz_contiguous_chroma == 0) { 1673 RequestChroma = REQ_128BytesNonContiguous; 1674 } else { 1675 RequestChroma = REQ_128BytesContiguous; 1676 } 1677 } else { 1678 if (req128_vert_wc_l == 0) { 1679 RequestLuma = REQ_256Bytes; 1680 } else if (segment_order_vert_contiguous_luma == 0) { 1681 RequestLuma = REQ_128BytesNonContiguous; 1682 } else { 1683 RequestLuma = REQ_128BytesContiguous; 1684 } 1685 if (req128_vert_wc_c == 0) { 1686 RequestChroma = REQ_256Bytes; 1687 } else if (segment_order_vert_contiguous_chroma == 0) { 1688 RequestChroma = REQ_128BytesNonContiguous; 1689 } else { 1690 RequestChroma = REQ_128BytesContiguous; 1691 } 1692 } 1693 1694 if (RequestLuma == REQ_256Bytes) { 1695 *MaxUncompressedBlockLuma = 256; 1696 *MaxCompressedBlockLuma = 256; 1697 *IndependentBlockLuma = 0; 1698 } else if (RequestLuma == REQ_128BytesContiguous) { 1699 *MaxUncompressedBlockLuma = 256; 1700 *MaxCompressedBlockLuma = 128; 1701 *IndependentBlockLuma = 128; 1702 } else { 1703 *MaxUncompressedBlockLuma = 256; 1704 *MaxCompressedBlockLuma = 64; 1705 *IndependentBlockLuma = 64; 1706 } 1707 1708 if (RequestChroma == REQ_256Bytes) { 1709 *MaxUncompressedBlockChroma = 256; 1710 *MaxCompressedBlockChroma = 256; 1711 *IndependentBlockChroma = 0; 1712 } else if (RequestChroma == REQ_128BytesContiguous) { 1713 *MaxUncompressedBlockChroma = 256; 1714 *MaxCompressedBlockChroma = 128; 1715 *IndependentBlockChroma = 128; 1716 } else { 1717 *MaxUncompressedBlockChroma = 256; 1718 *MaxCompressedBlockChroma = 64; 1719 *IndependentBlockChroma = 64; 1720 } 1721 1722 if (DCCEnabled != true || BytePerPixelC == 0) { 1723 *MaxUncompressedBlockChroma = 0; 1724 *MaxCompressedBlockChroma = 0; 1725 *IndependentBlockChroma = 0; 1726 } 1727 1728 if (DCCEnabled != true) { 1729 *MaxUncompressedBlockLuma = 0; 1730 *MaxCompressedBlockLuma = 0; 1731 *IndependentBlockLuma = 0; 1732 } 1733 } 1734 1735 static double CalculatePrefetchSourceLines( 1736 struct display_mode_lib *mode_lib, 1737 double VRatio, 1738 double vtaps, 1739 bool Interlace, 1740 bool ProgressiveToInterlaceUnitInOPP, 1741 unsigned int SwathHeight, 1742 unsigned int ViewportYStart, 1743 double *VInitPreFill, 1744 unsigned int *MaxNumSwath) 1745 { 1746 struct vba_vars_st *v = &mode_lib->vba; 1747 unsigned int MaxPartialSwath; 1748 1749 if (ProgressiveToInterlaceUnitInOPP) 1750 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1751 else 1752 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1753 1754 if (!v->IgnoreViewportPositioning) { 1755 1756 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1757 1758 if (*VInitPreFill > 1.0) 1759 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1760 else 1761 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1762 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1763 1764 } else { 1765 1766 if (ViewportYStart != 0) 1767 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1768 1769 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1770 1771 if (*VInitPreFill > 1.0) 1772 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1773 else 1774 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1775 } 1776 1777 #ifdef __DML_VBA_DEBUG__ 1778 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1779 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1780 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1781 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1782 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1783 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1784 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1785 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1786 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1787 #endif 1788 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1789 } 1790 1791 static unsigned int CalculateVMAndRowBytes( 1792 struct display_mode_lib *mode_lib, 1793 bool DCCEnable, 1794 unsigned int BlockHeight256Bytes, 1795 unsigned int BlockWidth256Bytes, 1796 enum source_format_class SourcePixelFormat, 1797 unsigned int SurfaceTiling, 1798 unsigned int BytePerPixel, 1799 enum scan_direction_class ScanDirection, 1800 unsigned int SwathWidth, 1801 unsigned int ViewportHeight, 1802 bool GPUVMEnable, 1803 bool HostVMEnable, 1804 unsigned int HostVMMaxNonCachedPageTableLevels, 1805 unsigned int GPUVMMinPageSize, 1806 unsigned int HostVMMinPageSize, 1807 unsigned int PTEBufferSizeInRequests, 1808 unsigned int Pitch, 1809 unsigned int DCCMetaPitch, 1810 unsigned int *MacroTileWidth, 1811 unsigned int *MetaRowByte, 1812 unsigned int *PixelPTEBytesPerRow, 1813 bool *PTEBufferSizeNotExceeded, 1814 int *dpte_row_width_ub, 1815 unsigned int *dpte_row_height, 1816 unsigned int *MetaRequestWidth, 1817 unsigned int *MetaRequestHeight, 1818 unsigned int *meta_row_width, 1819 unsigned int *meta_row_height, 1820 int *vm_group_bytes, 1821 unsigned int *dpte_group_bytes, 1822 unsigned int *PixelPTEReqWidth, 1823 unsigned int *PixelPTEReqHeight, 1824 unsigned int *PTERequestSize, 1825 int *DPDE0BytesFrame, 1826 int *MetaPTEBytesFrame) 1827 { 1828 struct vba_vars_st *v = &mode_lib->vba; 1829 unsigned int MPDEBytesFrame; 1830 unsigned int DCCMetaSurfaceBytes; 1831 unsigned int MacroTileSizeBytes; 1832 unsigned int MacroTileHeight; 1833 unsigned int ExtraDPDEBytesFrame; 1834 unsigned int PDEAndMetaPTEBytesFrame; 1835 unsigned int PixelPTEReqHeightPTEs = 0; 1836 unsigned int HostVMDynamicLevels = 0; 1837 double FractionOfPTEReturnDrop; 1838 1839 if (GPUVMEnable == true && HostVMEnable == true) { 1840 if (HostVMMinPageSize < 2048) { 1841 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1842 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1843 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1844 } else { 1845 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1846 } 1847 } 1848 1849 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1850 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1851 if (ScanDirection != dm_vert) { 1852 *meta_row_height = *MetaRequestHeight; 1853 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1854 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1855 } else { 1856 *meta_row_height = *MetaRequestWidth; 1857 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1858 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1859 } 1860 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1861 if (GPUVMEnable == true) { 1862 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1863 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1864 } else { 1865 *MetaPTEBytesFrame = 0; 1866 MPDEBytesFrame = 0; 1867 } 1868 1869 if (DCCEnable != true) { 1870 *MetaPTEBytesFrame = 0; 1871 MPDEBytesFrame = 0; 1872 *MetaRowByte = 0; 1873 } 1874 1875 if (SurfaceTiling == dm_sw_linear) { 1876 MacroTileSizeBytes = 256; 1877 MacroTileHeight = BlockHeight256Bytes; 1878 } else { 1879 MacroTileSizeBytes = 65536; 1880 MacroTileHeight = 16 * BlockHeight256Bytes; 1881 } 1882 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1883 1884 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1885 if (ScanDirection != dm_vert) { 1886 *DPDE0BytesFrame = 64 1887 * (dml_ceil( 1888 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1889 / (8 * 2097152), 1890 1) + 1); 1891 } else { 1892 *DPDE0BytesFrame = 64 1893 * (dml_ceil( 1894 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1895 / (8 * 2097152), 1896 1) + 1); 1897 } 1898 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 1899 } else { 1900 *DPDE0BytesFrame = 0; 1901 ExtraDPDEBytesFrame = 0; 1902 } 1903 1904 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 1905 1906 #ifdef __DML_VBA_DEBUG__ 1907 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 1908 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 1909 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 1910 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 1911 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1912 #endif 1913 1914 if (HostVMEnable == true) { 1915 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 1916 } 1917 #ifdef __DML_VBA_DEBUG__ 1918 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1919 #endif 1920 1921 if (SurfaceTiling == dm_sw_linear) { 1922 PixelPTEReqHeightPTEs = 1; 1923 *PixelPTEReqHeight = 1; 1924 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 1925 *PTERequestSize = 64; 1926 FractionOfPTEReturnDrop = 0; 1927 } else if (MacroTileSizeBytes == 4096) { 1928 PixelPTEReqHeightPTEs = 1; 1929 *PixelPTEReqHeight = MacroTileHeight; 1930 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1931 *PTERequestSize = 64; 1932 if (ScanDirection != dm_vert) 1933 FractionOfPTEReturnDrop = 0; 1934 else 1935 FractionOfPTEReturnDrop = 7 / 8; 1936 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 1937 PixelPTEReqHeightPTEs = 16; 1938 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 1939 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 1940 *PTERequestSize = 128; 1941 FractionOfPTEReturnDrop = 0; 1942 } else { 1943 PixelPTEReqHeightPTEs = 1; 1944 *PixelPTEReqHeight = MacroTileHeight; 1945 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1946 *PTERequestSize = 64; 1947 FractionOfPTEReturnDrop = 0; 1948 } 1949 1950 if (SurfaceTiling == dm_sw_linear) { 1951 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 1952 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1953 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1954 } else if (ScanDirection != dm_vert) { 1955 *dpte_row_height = *PixelPTEReqHeight; 1956 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1957 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1958 } else { 1959 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 1960 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 1961 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 1962 } 1963 1964 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 1965 *PTEBufferSizeNotExceeded = true; 1966 } else { 1967 *PTEBufferSizeNotExceeded = false; 1968 } 1969 1970 if (GPUVMEnable != true) { 1971 *PixelPTEBytesPerRow = 0; 1972 *PTEBufferSizeNotExceeded = true; 1973 } 1974 1975 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 1976 1977 if (HostVMEnable == true) { 1978 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 1979 } 1980 1981 if (HostVMEnable == true) { 1982 *vm_group_bytes = 512; 1983 *dpte_group_bytes = 512; 1984 } else if (GPUVMEnable == true) { 1985 *vm_group_bytes = 2048; 1986 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 1987 *dpte_group_bytes = 512; 1988 } else { 1989 *dpte_group_bytes = 2048; 1990 } 1991 } else { 1992 *vm_group_bytes = 0; 1993 *dpte_group_bytes = 0; 1994 } 1995 return PDEAndMetaPTEBytesFrame; 1996 } 1997 1998 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 1999 { 2000 struct vba_vars_st *v = &mode_lib->vba; 2001 unsigned int j, k; 2002 double HostVMInefficiencyFactor = 1.0; 2003 bool NoChromaPlanes = true; 2004 int ReorderBytes; 2005 double VMDataOnlyReturnBW; 2006 double MaxTotalRDBandwidth = 0; 2007 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2008 2009 v->WritebackDISPCLK = 0.0; 2010 v->DISPCLKWithRamping = 0; 2011 v->DISPCLKWithoutRamping = 0; 2012 v->GlobalDPPCLK = 0.0; 2013 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */ 2014 { 2015 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2016 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2017 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2018 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2019 if (v->HostVMEnable != true) { 2020 v->ReturnBW = dml_min( 2021 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2022 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2023 } else { 2024 v->ReturnBW = dml_min( 2025 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2026 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2027 } 2028 } 2029 /* End DAL custom code */ 2030 2031 // DISPCLK and DPPCLK Calculation 2032 // 2033 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2034 if (v->WritebackEnable[k]) { 2035 v->WritebackDISPCLK = dml_max( 2036 v->WritebackDISPCLK, 2037 dml31_CalculateWriteBackDISPCLK( 2038 v->WritebackPixelFormat[k], 2039 v->PixelClock[k], 2040 v->WritebackHRatio[k], 2041 v->WritebackVRatio[k], 2042 v->WritebackHTaps[k], 2043 v->WritebackVTaps[k], 2044 v->WritebackSourceWidth[k], 2045 v->WritebackDestinationWidth[k], 2046 v->HTotal[k], 2047 v->WritebackLineBufferSize)); 2048 } 2049 } 2050 2051 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2052 if (v->HRatio[k] > 1) { 2053 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2054 v->MaxDCHUBToPSCLThroughput, 2055 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2056 } else { 2057 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2058 } 2059 2060 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2061 * dml_max( 2062 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2063 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2064 2065 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2066 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2067 } 2068 2069 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2070 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2071 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2072 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2073 } else { 2074 if (v->HRatioChroma[k] > 1) { 2075 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2076 v->MaxDCHUBToPSCLThroughput, 2077 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2078 } else { 2079 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2080 } 2081 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2082 * dml_max3( 2083 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2084 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2085 1.0); 2086 2087 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2088 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2089 } 2090 2091 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2092 } 2093 } 2094 2095 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2096 if (v->BlendingAndTiming[k] != k) 2097 continue; 2098 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2099 v->DISPCLKWithRamping = dml_max( 2100 v->DISPCLKWithRamping, 2101 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2102 * (1 + v->DISPCLKRampingMargin / 100)); 2103 v->DISPCLKWithoutRamping = dml_max( 2104 v->DISPCLKWithoutRamping, 2105 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2106 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2107 v->DISPCLKWithRamping = dml_max( 2108 v->DISPCLKWithRamping, 2109 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2110 * (1 + v->DISPCLKRampingMargin / 100)); 2111 v->DISPCLKWithoutRamping = dml_max( 2112 v->DISPCLKWithoutRamping, 2113 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2114 } else { 2115 v->DISPCLKWithRamping = dml_max( 2116 v->DISPCLKWithRamping, 2117 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2118 v->DISPCLKWithoutRamping = dml_max( 2119 v->DISPCLKWithoutRamping, 2120 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2121 } 2122 } 2123 2124 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2125 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2126 2127 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2128 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2129 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2130 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2131 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2132 v->DISPCLKDPPCLKVCOSpeed); 2133 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2134 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2135 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2136 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2137 } else { 2138 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2139 } 2140 v->DISPCLK = v->DISPCLK_calculated; 2141 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2142 2143 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2144 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2145 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2146 } 2147 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2148 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2149 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2150 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2151 } 2152 2153 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2154 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2155 } 2156 2157 // Urgent and B P-State/DRAM Clock Change Watermark 2158 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2159 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2160 2161 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2162 dml30_CalculateBytePerPixelAnd256BBlockSizes( 2163 v->SourcePixelFormat[k], 2164 v->SurfaceTiling[k], 2165 &v->BytePerPixelY[k], 2166 &v->BytePerPixelC[k], 2167 &v->BytePerPixelDETY[k], 2168 &v->BytePerPixelDETC[k], 2169 &v->BlockHeight256BytesY[k], 2170 &v->BlockHeight256BytesC[k], 2171 &v->BlockWidth256BytesY[k], 2172 &v->BlockWidth256BytesC[k]); 2173 } 2174 2175 CalculateSwathWidth( 2176 false, 2177 v->NumberOfActivePlanes, 2178 v->SourcePixelFormat, 2179 v->SourceScan, 2180 v->ViewportWidth, 2181 v->ViewportHeight, 2182 v->SurfaceWidthY, 2183 v->SurfaceWidthC, 2184 v->SurfaceHeightY, 2185 v->SurfaceHeightC, 2186 v->ODMCombineEnabled, 2187 v->BytePerPixelY, 2188 v->BytePerPixelC, 2189 v->BlockHeight256BytesY, 2190 v->BlockHeight256BytesC, 2191 v->BlockWidth256BytesY, 2192 v->BlockWidth256BytesC, 2193 v->BlendingAndTiming, 2194 v->HActive, 2195 v->HRatio, 2196 v->DPPPerPlane, 2197 v->SwathWidthSingleDPPY, 2198 v->SwathWidthSingleDPPC, 2199 v->SwathWidthY, 2200 v->SwathWidthC, 2201 v->dummyinteger3, 2202 v->dummyinteger4, 2203 v->swath_width_luma_ub, 2204 v->swath_width_chroma_ub); 2205 2206 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2207 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2208 * v->VRatio[k]; 2209 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2210 * v->VRatioChroma[k]; 2211 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2212 } 2213 2214 // DCFCLK Deep Sleep 2215 CalculateDCFCLKDeepSleep( 2216 mode_lib, 2217 v->NumberOfActivePlanes, 2218 v->BytePerPixelY, 2219 v->BytePerPixelC, 2220 v->VRatio, 2221 v->VRatioChroma, 2222 v->SwathWidthY, 2223 v->SwathWidthC, 2224 v->DPPPerPlane, 2225 v->HRatio, 2226 v->HRatioChroma, 2227 v->PixelClock, 2228 v->PSCL_THROUGHPUT_LUMA, 2229 v->PSCL_THROUGHPUT_CHROMA, 2230 v->DPPCLK, 2231 v->ReadBandwidthPlaneLuma, 2232 v->ReadBandwidthPlaneChroma, 2233 v->ReturnBusWidth, 2234 &v->DCFCLKDeepSleep); 2235 2236 // DSCCLK 2237 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2238 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2239 v->DSCCLK_calculated[k] = 0.0; 2240 } else { 2241 if (v->OutputFormat[k] == dm_420) 2242 v->DSCFormatFactor = 2; 2243 else if (v->OutputFormat[k] == dm_444) 2244 v->DSCFormatFactor = 1; 2245 else if (v->OutputFormat[k] == dm_n422) 2246 v->DSCFormatFactor = 2; 2247 else 2248 v->DSCFormatFactor = 1; 2249 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2250 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2251 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2252 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2253 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2254 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2255 else 2256 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2257 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2258 } 2259 } 2260 2261 // DSC Delay 2262 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2263 double BPP = v->OutputBpp[k]; 2264 2265 if (v->DSCEnabled[k] && BPP != 0) { 2266 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2267 v->DSCDelay[k] = dscceComputeDelay( 2268 v->DSCInputBitPerComponent[k], 2269 BPP, 2270 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2271 v->NumberOfDSCSlices[k], 2272 v->OutputFormat[k], 2273 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2274 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2275 v->DSCDelay[k] = 2 2276 * (dscceComputeDelay( 2277 v->DSCInputBitPerComponent[k], 2278 BPP, 2279 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2280 v->NumberOfDSCSlices[k] / 2.0, 2281 v->OutputFormat[k], 2282 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2283 } else { 2284 v->DSCDelay[k] = 4 2285 * (dscceComputeDelay( 2286 v->DSCInputBitPerComponent[k], 2287 BPP, 2288 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2289 v->NumberOfDSCSlices[k] / 4.0, 2290 v->OutputFormat[k], 2291 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2292 } 2293 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2294 } else { 2295 v->DSCDelay[k] = 0; 2296 } 2297 } 2298 2299 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2300 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2301 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2302 v->DSCDelay[k] = v->DSCDelay[j]; 2303 2304 // Prefetch 2305 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2306 unsigned int PDEAndMetaPTEBytesFrameY; 2307 unsigned int PixelPTEBytesPerRowY; 2308 unsigned int MetaRowByteY; 2309 unsigned int MetaRowByteC; 2310 unsigned int PDEAndMetaPTEBytesFrameC; 2311 unsigned int PixelPTEBytesPerRowC; 2312 bool PTEBufferSizeNotExceededY; 2313 bool PTEBufferSizeNotExceededC; 2314 2315 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2316 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2317 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2318 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2319 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2320 } else { 2321 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2322 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2323 } 2324 2325 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2326 mode_lib, 2327 v->DCCEnable[k], 2328 v->BlockHeight256BytesC[k], 2329 v->BlockWidth256BytesC[k], 2330 v->SourcePixelFormat[k], 2331 v->SurfaceTiling[k], 2332 v->BytePerPixelC[k], 2333 v->SourceScan[k], 2334 v->SwathWidthC[k], 2335 v->ViewportHeightChroma[k], 2336 v->GPUVMEnable, 2337 v->HostVMEnable, 2338 v->HostVMMaxNonCachedPageTableLevels, 2339 v->GPUVMMinPageSize, 2340 v->HostVMMinPageSize, 2341 v->PTEBufferSizeInRequestsForChroma, 2342 v->PitchC[k], 2343 v->DCCMetaPitchC[k], 2344 &v->MacroTileWidthC[k], 2345 &MetaRowByteC, 2346 &PixelPTEBytesPerRowC, 2347 &PTEBufferSizeNotExceededC, 2348 &v->dpte_row_width_chroma_ub[k], 2349 &v->dpte_row_height_chroma[k], 2350 &v->meta_req_width_chroma[k], 2351 &v->meta_req_height_chroma[k], 2352 &v->meta_row_width_chroma[k], 2353 &v->meta_row_height_chroma[k], 2354 &v->dummyinteger1, 2355 &v->dummyinteger2, 2356 &v->PixelPTEReqWidthC[k], 2357 &v->PixelPTEReqHeightC[k], 2358 &v->PTERequestSizeC[k], 2359 &v->dpde0_bytes_per_frame_ub_c[k], 2360 &v->meta_pte_bytes_per_frame_ub_c[k]); 2361 2362 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2363 mode_lib, 2364 v->VRatioChroma[k], 2365 v->VTAPsChroma[k], 2366 v->Interlace[k], 2367 v->ProgressiveToInterlaceUnitInOPP, 2368 v->SwathHeightC[k], 2369 v->ViewportYStartC[k], 2370 &v->VInitPreFillC[k], 2371 &v->MaxNumSwathC[k]); 2372 } else { 2373 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2374 v->PTEBufferSizeInRequestsForChroma = 0; 2375 PixelPTEBytesPerRowC = 0; 2376 PDEAndMetaPTEBytesFrameC = 0; 2377 MetaRowByteC = 0; 2378 v->MaxNumSwathC[k] = 0; 2379 v->PrefetchSourceLinesC[k] = 0; 2380 } 2381 2382 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2383 mode_lib, 2384 v->DCCEnable[k], 2385 v->BlockHeight256BytesY[k], 2386 v->BlockWidth256BytesY[k], 2387 v->SourcePixelFormat[k], 2388 v->SurfaceTiling[k], 2389 v->BytePerPixelY[k], 2390 v->SourceScan[k], 2391 v->SwathWidthY[k], 2392 v->ViewportHeight[k], 2393 v->GPUVMEnable, 2394 v->HostVMEnable, 2395 v->HostVMMaxNonCachedPageTableLevels, 2396 v->GPUVMMinPageSize, 2397 v->HostVMMinPageSize, 2398 v->PTEBufferSizeInRequestsForLuma, 2399 v->PitchY[k], 2400 v->DCCMetaPitchY[k], 2401 &v->MacroTileWidthY[k], 2402 &MetaRowByteY, 2403 &PixelPTEBytesPerRowY, 2404 &PTEBufferSizeNotExceededY, 2405 &v->dpte_row_width_luma_ub[k], 2406 &v->dpte_row_height[k], 2407 &v->meta_req_width[k], 2408 &v->meta_req_height[k], 2409 &v->meta_row_width[k], 2410 &v->meta_row_height[k], 2411 &v->vm_group_bytes[k], 2412 &v->dpte_group_bytes[k], 2413 &v->PixelPTEReqWidthY[k], 2414 &v->PixelPTEReqHeightY[k], 2415 &v->PTERequestSizeY[k], 2416 &v->dpde0_bytes_per_frame_ub_l[k], 2417 &v->meta_pte_bytes_per_frame_ub_l[k]); 2418 2419 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2420 mode_lib, 2421 v->VRatio[k], 2422 v->vtaps[k], 2423 v->Interlace[k], 2424 v->ProgressiveToInterlaceUnitInOPP, 2425 v->SwathHeightY[k], 2426 v->ViewportYStartY[k], 2427 &v->VInitPreFillY[k], 2428 &v->MaxNumSwathY[k]); 2429 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2430 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2431 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2432 2433 CalculateRowBandwidth( 2434 v->GPUVMEnable, 2435 v->SourcePixelFormat[k], 2436 v->VRatio[k], 2437 v->VRatioChroma[k], 2438 v->DCCEnable[k], 2439 v->HTotal[k] / v->PixelClock[k], 2440 MetaRowByteY, 2441 MetaRowByteC, 2442 v->meta_row_height[k], 2443 v->meta_row_height_chroma[k], 2444 PixelPTEBytesPerRowY, 2445 PixelPTEBytesPerRowC, 2446 v->dpte_row_height[k], 2447 v->dpte_row_height_chroma[k], 2448 &v->meta_row_bw[k], 2449 &v->dpte_row_bw[k]); 2450 } 2451 2452 v->TotalDCCActiveDPP = 0; 2453 v->TotalActiveDPP = 0; 2454 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2455 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2456 if (v->DCCEnable[k]) 2457 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2458 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2459 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2460 NoChromaPlanes = false; 2461 } 2462 2463 ReorderBytes = v->NumberOfChannels 2464 * dml_max3( 2465 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2466 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2467 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2468 2469 VMDataOnlyReturnBW = dml_min( 2470 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2471 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2472 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2473 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2474 2475 #ifdef __DML_VBA_DEBUG__ 2476 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2477 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2478 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2479 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2480 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2481 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2482 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2483 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2484 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2485 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2486 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2487 #endif 2488 2489 if (v->GPUVMEnable && v->HostVMEnable) 2490 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2491 2492 v->UrgentExtraLatency = CalculateExtraLatency( 2493 v->RoundTripPingLatencyCycles, 2494 ReorderBytes, 2495 v->DCFCLK, 2496 v->TotalActiveDPP, 2497 v->PixelChunkSizeInKByte, 2498 v->TotalDCCActiveDPP, 2499 v->MetaChunkSize, 2500 v->ReturnBW, 2501 v->GPUVMEnable, 2502 v->HostVMEnable, 2503 v->NumberOfActivePlanes, 2504 v->DPPPerPlane, 2505 v->dpte_group_bytes, 2506 HostVMInefficiencyFactor, 2507 v->HostVMMinPageSize, 2508 v->HostVMMaxNonCachedPageTableLevels); 2509 2510 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2511 2512 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2513 if (v->BlendingAndTiming[k] == k) { 2514 if (v->WritebackEnable[k] == true) { 2515 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2516 + CalculateWriteBackDelay( 2517 v->WritebackPixelFormat[k], 2518 v->WritebackHRatio[k], 2519 v->WritebackVRatio[k], 2520 v->WritebackVTaps[k], 2521 v->WritebackDestinationWidth[k], 2522 v->WritebackDestinationHeight[k], 2523 v->WritebackSourceHeight[k], 2524 v->HTotal[k]) / v->DISPCLK; 2525 } else 2526 v->WritebackDelay[v->VoltageLevel][k] = 0; 2527 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2528 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2529 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2530 v->WritebackDelay[v->VoltageLevel][k], 2531 v->WritebackLatency 2532 + CalculateWriteBackDelay( 2533 v->WritebackPixelFormat[j], 2534 v->WritebackHRatio[j], 2535 v->WritebackVRatio[j], 2536 v->WritebackVTaps[j], 2537 v->WritebackDestinationWidth[j], 2538 v->WritebackDestinationHeight[j], 2539 v->WritebackSourceHeight[j], 2540 v->HTotal[k]) / v->DISPCLK); 2541 } 2542 } 2543 } 2544 } 2545 2546 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2547 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2548 if (v->BlendingAndTiming[k] == j) 2549 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2550 2551 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2552 v->MaxVStartupLines[k] = 2553 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 2554 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 2555 v->VTotal[k] - v->VActive[k] 2556 - dml_max( 2557 1.0, 2558 dml_ceil( 2559 (double) v->WritebackDelay[v->VoltageLevel][k] 2560 / (v->HTotal[k] / v->PixelClock[k]), 2561 1)); 2562 if (v->MaxVStartupLines[k] > 1023) 2563 v->MaxVStartupLines[k] = 1023; 2564 2565 #ifdef __DML_VBA_DEBUG__ 2566 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2567 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2568 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2569 #endif 2570 } 2571 2572 v->MaximumMaxVStartupLines = 0; 2573 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2574 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2575 2576 // VBA_DELTA 2577 // We don't really care to iterate between the various prefetch modes 2578 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2579 2580 v->UrgentLatency = CalculateUrgentLatency( 2581 v->UrgentLatencyPixelDataOnly, 2582 v->UrgentLatencyPixelMixedWithVMData, 2583 v->UrgentLatencyVMDataOnly, 2584 v->DoUrgentLatencyAdjustment, 2585 v->UrgentLatencyAdjustmentFabricClockComponent, 2586 v->UrgentLatencyAdjustmentFabricClockReference, 2587 v->FabricClock); 2588 2589 v->FractionOfUrgentBandwidth = 0.0; 2590 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2591 2592 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2593 2594 do { 2595 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2596 bool DestinationLineTimesForPrefetchLessThan2 = false; 2597 bool VRatioPrefetchMoreThan4 = false; 2598 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2599 MaxTotalRDBandwidth = 0; 2600 2601 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2602 2603 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2604 Pipe myPipe; 2605 2606 myPipe.DPPCLK = v->DPPCLK[k]; 2607 myPipe.DISPCLK = v->DISPCLK; 2608 myPipe.PixelClock = v->PixelClock[k]; 2609 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2610 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2611 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2612 myPipe.VRatio = v->VRatio[k]; 2613 myPipe.VRatioChroma = v->VRatioChroma[k]; 2614 myPipe.SourceScan = v->SourceScan[k]; 2615 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2616 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2617 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2618 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2619 myPipe.InterlaceEnable = v->Interlace[k]; 2620 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2621 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2622 myPipe.HTotal = v->HTotal[k]; 2623 myPipe.DCCEnable = v->DCCEnable[k]; 2624 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2625 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2626 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2627 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2628 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2629 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2630 v->ErrorResult[k] = CalculatePrefetchSchedule( 2631 mode_lib, 2632 HostVMInefficiencyFactor, 2633 &myPipe, 2634 v->DSCDelay[k], 2635 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2636 v->DPPCLKDelaySCL, 2637 v->DPPCLKDelaySCLLBOnly, 2638 v->DPPCLKDelayCNVCCursor, 2639 v->DISPCLKDelaySubtotal, 2640 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2641 v->OutputFormat[k], 2642 v->MaxInterDCNTileRepeaters, 2643 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2644 v->MaxVStartupLines[k], 2645 v->GPUVMMaxPageTableLevels, 2646 v->GPUVMEnable, 2647 v->HostVMEnable, 2648 v->HostVMMaxNonCachedPageTableLevels, 2649 v->HostVMMinPageSize, 2650 v->DynamicMetadataEnable[k], 2651 v->DynamicMetadataVMEnabled, 2652 v->DynamicMetadataLinesBeforeActiveRequired[k], 2653 v->DynamicMetadataTransmittedBytes[k], 2654 v->UrgentLatency, 2655 v->UrgentExtraLatency, 2656 v->TCalc, 2657 v->PDEAndMetaPTEBytesFrame[k], 2658 v->MetaRowByte[k], 2659 v->PixelPTEBytesPerRow[k], 2660 v->PrefetchSourceLinesY[k], 2661 v->SwathWidthY[k], 2662 v->VInitPreFillY[k], 2663 v->MaxNumSwathY[k], 2664 v->PrefetchSourceLinesC[k], 2665 v->SwathWidthC[k], 2666 v->VInitPreFillC[k], 2667 v->MaxNumSwathC[k], 2668 v->swath_width_luma_ub[k], 2669 v->swath_width_chroma_ub[k], 2670 v->SwathHeightY[k], 2671 v->SwathHeightC[k], 2672 TWait, 2673 &v->DSTXAfterScaler[k], 2674 &v->DSTYAfterScaler[k], 2675 &v->DestinationLinesForPrefetch[k], 2676 &v->PrefetchBandwidth[k], 2677 &v->DestinationLinesToRequestVMInVBlank[k], 2678 &v->DestinationLinesToRequestRowInVBlank[k], 2679 &v->VRatioPrefetchY[k], 2680 &v->VRatioPrefetchC[k], 2681 &v->RequiredPrefetchPixDataBWLuma[k], 2682 &v->RequiredPrefetchPixDataBWChroma[k], 2683 &v->NotEnoughTimeForDynamicMetadata[k], 2684 &v->Tno_bw[k], 2685 &v->prefetch_vmrow_bw[k], 2686 &v->Tdmdl_vm[k], 2687 &v->Tdmdl[k], 2688 &v->TSetup[k], 2689 &v->VUpdateOffsetPix[k], 2690 &v->VUpdateWidthPix[k], 2691 &v->VReadyOffsetPix[k]); 2692 2693 #ifdef __DML_VBA_DEBUG__ 2694 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2695 #endif 2696 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2697 } 2698 2699 v->NoEnoughUrgentLatencyHiding = false; 2700 v->NoEnoughUrgentLatencyHidingPre = false; 2701 2702 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2703 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2704 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2705 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2706 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2707 2708 CalculateUrgentBurstFactor( 2709 v->swath_width_luma_ub[k], 2710 v->swath_width_chroma_ub[k], 2711 v->SwathHeightY[k], 2712 v->SwathHeightC[k], 2713 v->HTotal[k] / v->PixelClock[k], 2714 v->UrgentLatency, 2715 v->CursorBufferSize, 2716 v->CursorWidth[k][0], 2717 v->CursorBPP[k][0], 2718 v->VRatio[k], 2719 v->VRatioChroma[k], 2720 v->BytePerPixelDETY[k], 2721 v->BytePerPixelDETC[k], 2722 v->DETBufferSizeY[k], 2723 v->DETBufferSizeC[k], 2724 &v->UrgBurstFactorCursor[k], 2725 &v->UrgBurstFactorLuma[k], 2726 &v->UrgBurstFactorChroma[k], 2727 &v->NoUrgentLatencyHiding[k]); 2728 2729 CalculateUrgentBurstFactor( 2730 v->swath_width_luma_ub[k], 2731 v->swath_width_chroma_ub[k], 2732 v->SwathHeightY[k], 2733 v->SwathHeightC[k], 2734 v->HTotal[k] / v->PixelClock[k], 2735 v->UrgentLatency, 2736 v->CursorBufferSize, 2737 v->CursorWidth[k][0], 2738 v->CursorBPP[k][0], 2739 v->VRatioPrefetchY[k], 2740 v->VRatioPrefetchC[k], 2741 v->BytePerPixelDETY[k], 2742 v->BytePerPixelDETC[k], 2743 v->DETBufferSizeY[k], 2744 v->DETBufferSizeC[k], 2745 &v->UrgBurstFactorCursorPre[k], 2746 &v->UrgBurstFactorLumaPre[k], 2747 &v->UrgBurstFactorChromaPre[k], 2748 &v->NoUrgentLatencyHidingPre[k]); 2749 2750 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2751 + dml_max3( 2752 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2753 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2754 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2755 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2756 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2757 v->DPPPerPlane[k] 2758 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2759 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2760 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2761 2762 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2763 + dml_max3( 2764 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2765 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2766 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2767 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2768 + v->cursor_bw_pre[k]); 2769 2770 #ifdef __DML_VBA_DEBUG__ 2771 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2772 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2773 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2774 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2775 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2776 2777 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2778 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2779 2780 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2781 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2782 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2783 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2784 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2785 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2786 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2787 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2788 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2789 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2790 #endif 2791 2792 if (v->DestinationLinesForPrefetch[k] < 2) 2793 DestinationLineTimesForPrefetchLessThan2 = true; 2794 2795 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2796 VRatioPrefetchMoreThan4 = true; 2797 2798 if (v->NoUrgentLatencyHiding[k] == true) 2799 v->NoEnoughUrgentLatencyHiding = true; 2800 2801 if (v->NoUrgentLatencyHidingPre[k] == true) 2802 v->NoEnoughUrgentLatencyHidingPre = true; 2803 } 2804 2805 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2806 2807 #ifdef __DML_VBA_DEBUG__ 2808 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2809 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW); 2810 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth); 2811 #endif 2812 2813 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2814 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2815 v->PrefetchModeSupported = true; 2816 else { 2817 v->PrefetchModeSupported = false; 2818 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2819 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2820 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2821 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2822 } 2823 2824 // PREVIOUS_ERROR 2825 // This error result check was done after the PrefetchModeSupported. So we will 2826 // still try to calculate flip schedule even prefetch mode not supported 2827 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2828 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2829 v->PrefetchModeSupported = false; 2830 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2831 } 2832 } 2833 2834 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2835 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2836 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2837 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2838 - dml_max( 2839 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2840 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2841 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2842 v->DPPPerPlane[k] 2843 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2844 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2845 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2846 } 2847 2848 v->TotImmediateFlipBytes = 0; 2849 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2850 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2851 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2852 } 2853 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2854 CalculateFlipSchedule( 2855 mode_lib, 2856 k, 2857 HostVMInefficiencyFactor, 2858 v->UrgentExtraLatency, 2859 v->UrgentLatency, 2860 v->PDEAndMetaPTEBytesFrame[k], 2861 v->MetaRowByte[k], 2862 v->PixelPTEBytesPerRow[k]); 2863 } 2864 2865 v->total_dcn_read_bw_with_flip = 0.0; 2866 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2867 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2868 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2869 + dml_max3( 2870 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2871 v->DPPPerPlane[k] * v->final_flip_bw[k] 2872 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2873 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 2874 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2875 v->DPPPerPlane[k] 2876 * (v->final_flip_bw[k] 2877 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2878 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2879 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2880 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 2881 + dml_max3( 2882 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2883 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 2884 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 2885 v->DPPPerPlane[k] 2886 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 2887 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 2888 } 2889 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 2890 2891 v->ImmediateFlipSupported = true; 2892 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 2893 #ifdef __DML_VBA_DEBUG__ 2894 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 2895 #endif 2896 v->ImmediateFlipSupported = false; 2897 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 2898 } 2899 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2900 if (v->ImmediateFlipSupportedForPipe[k] == false) { 2901 #ifdef __DML_VBA_DEBUG__ 2902 dml_print("DML::%s: Pipe %0d not supporting iflip\n", 2903 __func__, k); 2904 #endif 2905 v->ImmediateFlipSupported = false; 2906 } 2907 } 2908 } else { 2909 v->ImmediateFlipSupported = false; 2910 } 2911 2912 v->PrefetchAndImmediateFlipSupported = 2913 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable 2914 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || 2915 v->ImmediateFlipSupported)) ? true : false; 2916 #ifdef __DML_VBA_DEBUG__ 2917 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 2918 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required); 2919 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 2920 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 2921 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 2922 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 2923 #endif 2924 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 2925 2926 v->VStartupLines = v->VStartupLines + 1; 2927 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 2928 ASSERT(v->PrefetchAndImmediateFlipSupported); 2929 2930 // Unbounded Request Enabled 2931 CalculateUnboundedRequestAndCompressedBufferSize( 2932 v->DETBufferSizeInKByte[0], 2933 v->ConfigReturnBufferSizeInKByte, 2934 v->UseUnboundedRequesting, 2935 v->TotalActiveDPP, 2936 NoChromaPlanes, 2937 v->MaxNumDPP, 2938 v->CompressedBufferSegmentSizeInkByte, 2939 v->Output, 2940 &v->UnboundedRequestEnabled, 2941 &v->CompressedBufferSizeInkByte); 2942 2943 //Watermarks and NB P-State/DRAM Clock Change Support 2944 { 2945 enum clock_change_support DRAMClockChangeSupport; // dummy 2946 CalculateWatermarksAndDRAMSpeedChangeSupport( 2947 mode_lib, 2948 PrefetchMode, 2949 v->DCFCLK, 2950 v->ReturnBW, 2951 v->UrgentLatency, 2952 v->UrgentExtraLatency, 2953 v->SOCCLK, 2954 v->DCFCLKDeepSleep, 2955 v->DETBufferSizeY, 2956 v->DETBufferSizeC, 2957 v->SwathHeightY, 2958 v->SwathHeightC, 2959 v->SwathWidthY, 2960 v->SwathWidthC, 2961 v->DPPPerPlane, 2962 v->BytePerPixelDETY, 2963 v->BytePerPixelDETC, 2964 v->UnboundedRequestEnabled, 2965 v->CompressedBufferSizeInkByte, 2966 &DRAMClockChangeSupport, 2967 &v->StutterExitWatermark, 2968 &v->StutterEnterPlusExitWatermark, 2969 &v->Z8StutterExitWatermark, 2970 &v->Z8StutterEnterPlusExitWatermark); 2971 2972 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2973 if (v->WritebackEnable[k] == true) { 2974 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 2975 0, 2976 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 2977 } else { 2978 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 2979 } 2980 } 2981 } 2982 2983 //Display Pipeline Delivery Time in Prefetch, Groups 2984 CalculatePixelDeliveryTimes( 2985 v->NumberOfActivePlanes, 2986 v->VRatio, 2987 v->VRatioChroma, 2988 v->VRatioPrefetchY, 2989 v->VRatioPrefetchC, 2990 v->swath_width_luma_ub, 2991 v->swath_width_chroma_ub, 2992 v->DPPPerPlane, 2993 v->HRatio, 2994 v->HRatioChroma, 2995 v->PixelClock, 2996 v->PSCL_THROUGHPUT_LUMA, 2997 v->PSCL_THROUGHPUT_CHROMA, 2998 v->DPPCLK, 2999 v->BytePerPixelC, 3000 v->SourceScan, 3001 v->NumberOfCursors, 3002 v->CursorWidth, 3003 v->CursorBPP, 3004 v->BlockWidth256BytesY, 3005 v->BlockHeight256BytesY, 3006 v->BlockWidth256BytesC, 3007 v->BlockHeight256BytesC, 3008 v->DisplayPipeLineDeliveryTimeLuma, 3009 v->DisplayPipeLineDeliveryTimeChroma, 3010 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3011 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3012 v->DisplayPipeRequestDeliveryTimeLuma, 3013 v->DisplayPipeRequestDeliveryTimeChroma, 3014 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3015 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3016 v->CursorRequestDeliveryTime, 3017 v->CursorRequestDeliveryTimePrefetch); 3018 3019 CalculateMetaAndPTETimes( 3020 v->NumberOfActivePlanes, 3021 v->GPUVMEnable, 3022 v->MetaChunkSize, 3023 v->MinMetaChunkSizeBytes, 3024 v->HTotal, 3025 v->VRatio, 3026 v->VRatioChroma, 3027 v->DestinationLinesToRequestRowInVBlank, 3028 v->DestinationLinesToRequestRowInImmediateFlip, 3029 v->DCCEnable, 3030 v->PixelClock, 3031 v->BytePerPixelY, 3032 v->BytePerPixelC, 3033 v->SourceScan, 3034 v->dpte_row_height, 3035 v->dpte_row_height_chroma, 3036 v->meta_row_width, 3037 v->meta_row_width_chroma, 3038 v->meta_row_height, 3039 v->meta_row_height_chroma, 3040 v->meta_req_width, 3041 v->meta_req_width_chroma, 3042 v->meta_req_height, 3043 v->meta_req_height_chroma, 3044 v->dpte_group_bytes, 3045 v->PTERequestSizeY, 3046 v->PTERequestSizeC, 3047 v->PixelPTEReqWidthY, 3048 v->PixelPTEReqHeightY, 3049 v->PixelPTEReqWidthC, 3050 v->PixelPTEReqHeightC, 3051 v->dpte_row_width_luma_ub, 3052 v->dpte_row_width_chroma_ub, 3053 v->DST_Y_PER_PTE_ROW_NOM_L, 3054 v->DST_Y_PER_PTE_ROW_NOM_C, 3055 v->DST_Y_PER_META_ROW_NOM_L, 3056 v->DST_Y_PER_META_ROW_NOM_C, 3057 v->TimePerMetaChunkNominal, 3058 v->TimePerChromaMetaChunkNominal, 3059 v->TimePerMetaChunkVBlank, 3060 v->TimePerChromaMetaChunkVBlank, 3061 v->TimePerMetaChunkFlip, 3062 v->TimePerChromaMetaChunkFlip, 3063 v->time_per_pte_group_nom_luma, 3064 v->time_per_pte_group_vblank_luma, 3065 v->time_per_pte_group_flip_luma, 3066 v->time_per_pte_group_nom_chroma, 3067 v->time_per_pte_group_vblank_chroma, 3068 v->time_per_pte_group_flip_chroma); 3069 3070 CalculateVMGroupAndRequestTimes( 3071 v->NumberOfActivePlanes, 3072 v->GPUVMEnable, 3073 v->GPUVMMaxPageTableLevels, 3074 v->HTotal, 3075 v->BytePerPixelC, 3076 v->DestinationLinesToRequestVMInVBlank, 3077 v->DestinationLinesToRequestVMInImmediateFlip, 3078 v->DCCEnable, 3079 v->PixelClock, 3080 v->dpte_row_width_luma_ub, 3081 v->dpte_row_width_chroma_ub, 3082 v->vm_group_bytes, 3083 v->dpde0_bytes_per_frame_ub_l, 3084 v->dpde0_bytes_per_frame_ub_c, 3085 v->meta_pte_bytes_per_frame_ub_l, 3086 v->meta_pte_bytes_per_frame_ub_c, 3087 v->TimePerVMGroupVBlank, 3088 v->TimePerVMGroupFlip, 3089 v->TimePerVMRequestVBlank, 3090 v->TimePerVMRequestFlip); 3091 3092 // Min TTUVBlank 3093 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3094 if (PrefetchMode == 0) { 3095 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3096 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3097 v->MinTTUVBlank[k] = dml_max( 3098 v->DRAMClockChangeWatermark, 3099 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3100 } else if (PrefetchMode == 1) { 3101 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3102 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3103 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3104 } else { 3105 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3106 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3107 v->MinTTUVBlank[k] = v->UrgentWatermark; 3108 } 3109 if (!v->DynamicMetadataEnable[k]) 3110 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3111 } 3112 3113 // DCC Configuration 3114 v->ActiveDPPs = 0; 3115 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3116 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3117 v->SourcePixelFormat[k], 3118 v->SurfaceWidthY[k], 3119 v->SurfaceWidthC[k], 3120 v->SurfaceHeightY[k], 3121 v->SurfaceHeightC[k], 3122 v->DETBufferSizeInKByte[k] * 1024, 3123 v->BlockHeight256BytesY[k], 3124 v->BlockHeight256BytesC[k], 3125 v->SurfaceTiling[k], 3126 v->BytePerPixelY[k], 3127 v->BytePerPixelC[k], 3128 v->BytePerPixelDETY[k], 3129 v->BytePerPixelDETC[k], 3130 v->SourceScan[k], 3131 &v->DCCYMaxUncompressedBlock[k], 3132 &v->DCCCMaxUncompressedBlock[k], 3133 &v->DCCYMaxCompressedBlock[k], 3134 &v->DCCCMaxCompressedBlock[k], 3135 &v->DCCYIndependentBlock[k], 3136 &v->DCCCIndependentBlock[k]); 3137 } 3138 3139 // VStartup Adjustment 3140 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3141 bool isInterlaceTiming; 3142 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3143 #ifdef __DML_VBA_DEBUG__ 3144 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3145 #endif 3146 3147 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3148 3149 #ifdef __DML_VBA_DEBUG__ 3150 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3151 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3152 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3153 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3154 #endif 3155 3156 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3157 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3158 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3159 } 3160 3161 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3162 3163 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k]) 3164 - v->VFrontPorch[k]) 3165 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)) 3166 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; 3167 3168 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3169 3170 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3171 <= (isInterlaceTiming ? 3172 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3173 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3174 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3175 } else { 3176 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3177 } 3178 #ifdef __DML_VBA_DEBUG__ 3179 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3180 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3181 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3182 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3183 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3184 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3185 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3186 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3187 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3188 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3189 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3190 #endif 3191 } 3192 3193 { 3194 //Maximum Bandwidth Used 3195 double TotalWRBandwidth = 0; 3196 double MaxPerPlaneVActiveWRBandwidth = 0; 3197 double WRBandwidth = 0; 3198 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3199 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3200 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3201 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3202 } else if (v->WritebackEnable[k] == true) { 3203 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3204 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3205 } 3206 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3207 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3208 } 3209 3210 v->TotalDataReadBandwidth = 0; 3211 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3212 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3213 } 3214 } 3215 // Stutter Efficiency 3216 CalculateStutterEfficiency( 3217 mode_lib, 3218 v->CompressedBufferSizeInkByte, 3219 v->UnboundedRequestEnabled, 3220 v->ConfigReturnBufferSizeInKByte, 3221 v->MetaFIFOSizeInKEntries, 3222 v->ZeroSizeBufferEntries, 3223 v->NumberOfActivePlanes, 3224 v->ROBBufferSizeInKByte, 3225 v->TotalDataReadBandwidth, 3226 v->DCFCLK, 3227 v->ReturnBW, 3228 v->COMPBUF_RESERVED_SPACE_64B, 3229 v->COMPBUF_RESERVED_SPACE_ZS, 3230 v->SRExitTime, 3231 v->SRExitZ8Time, 3232 v->SynchronizedVBlank, 3233 v->StutterEnterPlusExitWatermark, 3234 v->Z8StutterEnterPlusExitWatermark, 3235 v->ProgressiveToInterlaceUnitInOPP, 3236 v->Interlace, 3237 v->MinTTUVBlank, 3238 v->DPPPerPlane, 3239 v->DETBufferSizeY, 3240 v->BytePerPixelY, 3241 v->BytePerPixelDETY, 3242 v->SwathWidthY, 3243 v->SwathHeightY, 3244 v->SwathHeightC, 3245 v->DCCRateLuma, 3246 v->DCCRateChroma, 3247 v->DCCFractionOfZeroSizeRequestsLuma, 3248 v->DCCFractionOfZeroSizeRequestsChroma, 3249 v->HTotal, 3250 v->VTotal, 3251 v->PixelClock, 3252 v->VRatio, 3253 v->SourceScan, 3254 v->BlockHeight256BytesY, 3255 v->BlockWidth256BytesY, 3256 v->BlockHeight256BytesC, 3257 v->BlockWidth256BytesC, 3258 v->DCCYMaxUncompressedBlock, 3259 v->DCCCMaxUncompressedBlock, 3260 v->VActive, 3261 v->DCCEnable, 3262 v->WritebackEnable, 3263 v->ReadBandwidthPlaneLuma, 3264 v->ReadBandwidthPlaneChroma, 3265 v->meta_row_bw, 3266 v->dpte_row_bw, 3267 &v->StutterEfficiencyNotIncludingVBlank, 3268 &v->StutterEfficiency, 3269 &v->NumberOfStutterBurstsPerFrame, 3270 &v->Z8StutterEfficiencyNotIncludingVBlank, 3271 &v->Z8StutterEfficiency, 3272 &v->Z8NumberOfStutterBurstsPerFrame, 3273 &v->StutterPeriod); 3274 } 3275 3276 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3277 { 3278 struct vba_vars_st *v = &mode_lib->vba; 3279 // Display Pipe Configuration 3280 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3281 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3282 int BytePerPixY[DC__NUM_DPP__MAX]; 3283 int BytePerPixC[DC__NUM_DPP__MAX]; 3284 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3285 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3286 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3287 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3288 double dummy1[DC__NUM_DPP__MAX]; 3289 double dummy2[DC__NUM_DPP__MAX]; 3290 double dummy3[DC__NUM_DPP__MAX]; 3291 double dummy4[DC__NUM_DPP__MAX]; 3292 int dummy5[DC__NUM_DPP__MAX]; 3293 int dummy6[DC__NUM_DPP__MAX]; 3294 bool dummy7[DC__NUM_DPP__MAX]; 3295 bool dummysinglestring; 3296 3297 unsigned int k; 3298 3299 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3300 3301 dml30_CalculateBytePerPixelAnd256BBlockSizes( 3302 v->SourcePixelFormat[k], 3303 v->SurfaceTiling[k], 3304 &BytePerPixY[k], 3305 &BytePerPixC[k], 3306 &BytePerPixDETY[k], 3307 &BytePerPixDETC[k], 3308 &Read256BytesBlockHeightY[k], 3309 &Read256BytesBlockHeightC[k], 3310 &Read256BytesBlockWidthY[k], 3311 &Read256BytesBlockWidthC[k]); 3312 } 3313 3314 CalculateSwathAndDETConfiguration( 3315 false, 3316 v->NumberOfActivePlanes, 3317 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], 3318 v->DETBufferSizeInKByte, 3319 dummy1, 3320 dummy2, 3321 v->SourceScan, 3322 v->SourcePixelFormat, 3323 v->SurfaceTiling, 3324 v->ViewportWidth, 3325 v->ViewportHeight, 3326 v->SurfaceWidthY, 3327 v->SurfaceWidthC, 3328 v->SurfaceHeightY, 3329 v->SurfaceHeightC, 3330 Read256BytesBlockHeightY, 3331 Read256BytesBlockHeightC, 3332 Read256BytesBlockWidthY, 3333 Read256BytesBlockWidthC, 3334 v->ODMCombineEnabled, 3335 v->BlendingAndTiming, 3336 BytePerPixY, 3337 BytePerPixC, 3338 BytePerPixDETY, 3339 BytePerPixDETC, 3340 v->HActive, 3341 v->HRatio, 3342 v->HRatioChroma, 3343 v->DPPPerPlane, 3344 dummy5, 3345 dummy6, 3346 dummy3, 3347 dummy4, 3348 v->SwathHeightY, 3349 v->SwathHeightC, 3350 v->DETBufferSizeY, 3351 v->DETBufferSizeC, 3352 dummy7, 3353 &dummysinglestring); 3354 } 3355 3356 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3357 { 3358 if (PrefetchMode == 0) { 3359 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3360 } else if (PrefetchMode == 1) { 3361 return dml_max(SREnterPlusExitTime, UrgentLatency); 3362 } else { 3363 return UrgentLatency; 3364 } 3365 } 3366 3367 double dml31_CalculateWriteBackDISPCLK( 3368 enum source_format_class WritebackPixelFormat, 3369 double PixelClock, 3370 double WritebackHRatio, 3371 double WritebackVRatio, 3372 unsigned int WritebackHTaps, 3373 unsigned int WritebackVTaps, 3374 long WritebackSourceWidth, 3375 long WritebackDestinationWidth, 3376 unsigned int HTotal, 3377 unsigned int WritebackLineBufferSize) 3378 { 3379 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3380 3381 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3382 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3383 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3384 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3385 } 3386 3387 static double CalculateWriteBackDelay( 3388 enum source_format_class WritebackPixelFormat, 3389 double WritebackHRatio, 3390 double WritebackVRatio, 3391 unsigned int WritebackVTaps, 3392 int WritebackDestinationWidth, 3393 int WritebackDestinationHeight, 3394 int WritebackSourceHeight, 3395 unsigned int HTotal) 3396 { 3397 double CalculateWriteBackDelay; 3398 double Line_length; 3399 double Output_lines_last_notclamped; 3400 double WritebackVInit; 3401 3402 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3403 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3404 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3405 if (Output_lines_last_notclamped < 0) { 3406 CalculateWriteBackDelay = 0; 3407 } else { 3408 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3409 } 3410 return CalculateWriteBackDelay; 3411 } 3412 3413 static void CalculateVupdateAndDynamicMetadataParameters( 3414 int MaxInterDCNTileRepeaters, 3415 double DPPCLK, 3416 double DISPCLK, 3417 double DCFClkDeepSleep, 3418 double PixelClock, 3419 int HTotal, 3420 int VBlank, 3421 int DynamicMetadataTransmittedBytes, 3422 int DynamicMetadataLinesBeforeActiveRequired, 3423 int InterlaceEnable, 3424 bool ProgressiveToInterlaceUnitInOPP, 3425 double *TSetup, 3426 double *Tdmbf, 3427 double *Tdmec, 3428 double *Tdmsks, 3429 int *VUpdateOffsetPix, 3430 double *VUpdateWidthPix, 3431 double *VReadyOffsetPix) 3432 { 3433 double TotalRepeaterDelayTime; 3434 3435 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3436 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3437 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3438 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3439 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3440 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3441 *Tdmec = HTotal / PixelClock; 3442 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3443 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3444 } else { 3445 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3446 } 3447 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3448 *Tdmsks = *Tdmsks / 2; 3449 } 3450 #ifdef __DML_VBA_DEBUG__ 3451 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3452 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3453 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3454 #endif 3455 } 3456 3457 static void CalculateRowBandwidth( 3458 bool GPUVMEnable, 3459 enum source_format_class SourcePixelFormat, 3460 double VRatio, 3461 double VRatioChroma, 3462 bool DCCEnable, 3463 double LineTime, 3464 unsigned int MetaRowByteLuma, 3465 unsigned int MetaRowByteChroma, 3466 unsigned int meta_row_height_luma, 3467 unsigned int meta_row_height_chroma, 3468 unsigned int PixelPTEBytesPerRowLuma, 3469 unsigned int PixelPTEBytesPerRowChroma, 3470 unsigned int dpte_row_height_luma, 3471 unsigned int dpte_row_height_chroma, 3472 double *meta_row_bw, 3473 double *dpte_row_bw) 3474 { 3475 if (DCCEnable != true) { 3476 *meta_row_bw = 0; 3477 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3478 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3479 } else { 3480 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3481 } 3482 3483 if (GPUVMEnable != true) { 3484 *dpte_row_bw = 0; 3485 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3486 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3487 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3488 } else { 3489 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3490 } 3491 } 3492 3493 static void CalculateFlipSchedule( 3494 struct display_mode_lib *mode_lib, 3495 unsigned int k, 3496 double HostVMInefficiencyFactor, 3497 double UrgentExtraLatency, 3498 double UrgentLatency, 3499 double PDEAndMetaPTEBytesPerFrame, 3500 double MetaRowBytes, 3501 double DPTEBytesPerRow) 3502 { 3503 struct vba_vars_st *v = &mode_lib->vba; 3504 double min_row_time = 0.0; 3505 unsigned int HostVMDynamicLevelsTrips; 3506 double TimeForFetchingMetaPTEImmediateFlip; 3507 double TimeForFetchingRowInVBlankImmediateFlip; 3508 double ImmediateFlipBW; 3509 double LineTime = v->HTotal[k] / v->PixelClock[k]; 3510 3511 if (v->GPUVMEnable == true && v->HostVMEnable == true) { 3512 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3513 } else { 3514 HostVMDynamicLevelsTrips = 0; 3515 } 3516 3517 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { 3518 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; 3519 } 3520 3521 if (v->GPUVMEnable == true) { 3522 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3523 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3524 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3525 LineTime / 4.0); 3526 } else { 3527 TimeForFetchingMetaPTEImmediateFlip = 0; 3528 } 3529 3530 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3531 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3532 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3533 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3534 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3535 LineTime / 4); 3536 } else { 3537 TimeForFetchingRowInVBlankImmediateFlip = 0; 3538 } 3539 3540 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3541 3542 if (v->GPUVMEnable == true) { 3543 v->final_flip_bw[k] = dml_max( 3544 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), 3545 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); 3546 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3547 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); 3548 } else { 3549 v->final_flip_bw[k] = 0; 3550 } 3551 3552 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 3553 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3554 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3555 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3556 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3557 } else { 3558 min_row_time = dml_min4( 3559 v->dpte_row_height[k] * LineTime / v->VRatio[k], 3560 v->meta_row_height[k] * LineTime / v->VRatio[k], 3561 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], 3562 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3563 } 3564 } else { 3565 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3566 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; 3567 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3568 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; 3569 } else { 3570 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); 3571 } 3572 } 3573 3574 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 3575 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3576 v->ImmediateFlipSupportedForPipe[k] = false; 3577 } else { 3578 v->ImmediateFlipSupportedForPipe[k] = true; 3579 } 3580 3581 #ifdef __DML_VBA_DEBUG__ 3582 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); 3583 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); 3584 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3585 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3586 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3587 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); 3588 #endif 3589 3590 } 3591 3592 static double TruncToValidBPP( 3593 double LinkBitRate, 3594 int Lanes, 3595 int HTotal, 3596 int HActive, 3597 double PixelClock, 3598 double DesiredBPP, 3599 bool DSCEnable, 3600 enum output_encoder_class Output, 3601 enum output_format_class Format, 3602 unsigned int DSCInputBitPerComponent, 3603 int DSCSlices, 3604 int AudioRate, 3605 int AudioLayout, 3606 enum odm_combine_mode ODMCombine) 3607 { 3608 double MaxLinkBPP; 3609 int MinDSCBPP; 3610 double MaxDSCBPP; 3611 int NonDSCBPP0; 3612 int NonDSCBPP1; 3613 int NonDSCBPP2; 3614 3615 if (Format == dm_420) { 3616 NonDSCBPP0 = 12; 3617 NonDSCBPP1 = 15; 3618 NonDSCBPP2 = 18; 3619 MinDSCBPP = 6; 3620 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3621 } else if (Format == dm_444) { 3622 NonDSCBPP0 = 24; 3623 NonDSCBPP1 = 30; 3624 NonDSCBPP2 = 36; 3625 MinDSCBPP = 8; 3626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3627 } else { 3628 3629 NonDSCBPP0 = 16; 3630 NonDSCBPP1 = 20; 3631 NonDSCBPP2 = 24; 3632 3633 if (Format == dm_n422) { 3634 MinDSCBPP = 7; 3635 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3636 } else { 3637 MinDSCBPP = 8; 3638 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3639 } 3640 } 3641 3642 if (DSCEnable && Output == dm_dp) { 3643 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3644 } else { 3645 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3646 } 3647 3648 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3649 MaxLinkBPP = 16; 3650 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3651 MaxLinkBPP = 32; 3652 } 3653 3654 if (DesiredBPP == 0) { 3655 if (DSCEnable) { 3656 if (MaxLinkBPP < MinDSCBPP) { 3657 return BPP_INVALID; 3658 } else if (MaxLinkBPP >= MaxDSCBPP) { 3659 return MaxDSCBPP; 3660 } else { 3661 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3662 } 3663 } else { 3664 if (MaxLinkBPP >= NonDSCBPP2) { 3665 return NonDSCBPP2; 3666 } else if (MaxLinkBPP >= NonDSCBPP1) { 3667 return NonDSCBPP1; 3668 } else if (MaxLinkBPP >= NonDSCBPP0) { 3669 return 16.0; 3670 } else { 3671 return BPP_INVALID; 3672 } 3673 } 3674 } else { 3675 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3676 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3677 return BPP_INVALID; 3678 } else { 3679 return DesiredBPP; 3680 } 3681 } 3682 return BPP_INVALID; 3683 } 3684 3685 static noinline void CalculatePrefetchSchedulePerPlane( 3686 struct display_mode_lib *mode_lib, 3687 double HostVMInefficiencyFactor, 3688 int i, 3689 unsigned j, 3690 unsigned k) 3691 { 3692 struct vba_vars_st *v = &mode_lib->vba; 3693 Pipe myPipe; 3694 3695 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 3696 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 3697 myPipe.PixelClock = v->PixelClock[k]; 3698 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 3699 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 3700 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 3701 myPipe.VRatio = mode_lib->vba.VRatio[k]; 3702 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; 3703 3704 myPipe.SourceScan = v->SourceScan[k]; 3705 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 3706 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 3707 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 3708 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 3709 myPipe.InterlaceEnable = v->Interlace[k]; 3710 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 3711 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 3712 myPipe.HTotal = v->HTotal[k]; 3713 myPipe.DCCEnable = v->DCCEnable[k]; 3714 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 3715 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 3716 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 3717 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 3718 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 3719 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 3720 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 3721 mode_lib, 3722 HostVMInefficiencyFactor, 3723 &myPipe, 3724 v->DSCDelayPerState[i][k], 3725 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 3726 v->DPPCLKDelaySCL, 3727 v->DPPCLKDelaySCLLBOnly, 3728 v->DPPCLKDelayCNVCCursor, 3729 v->DISPCLKDelaySubtotal, 3730 v->SwathWidthYThisState[k] / v->HRatio[k], 3731 v->OutputFormat[k], 3732 v->MaxInterDCNTileRepeaters, 3733 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 3734 v->MaximumVStartup[i][j][k], 3735 v->GPUVMMaxPageTableLevels, 3736 v->GPUVMEnable, 3737 v->HostVMEnable, 3738 v->HostVMMaxNonCachedPageTableLevels, 3739 v->HostVMMinPageSize, 3740 v->DynamicMetadataEnable[k], 3741 v->DynamicMetadataVMEnabled, 3742 v->DynamicMetadataLinesBeforeActiveRequired[k], 3743 v->DynamicMetadataTransmittedBytes[k], 3744 v->UrgLatency[i], 3745 v->ExtraLatency, 3746 v->TimeCalc, 3747 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 3748 v->MetaRowBytes[i][j][k], 3749 v->DPTEBytesPerRow[i][j][k], 3750 v->PrefetchLinesY[i][j][k], 3751 v->SwathWidthYThisState[k], 3752 v->PrefillY[k], 3753 v->MaxNumSwY[k], 3754 v->PrefetchLinesC[i][j][k], 3755 v->SwathWidthCThisState[k], 3756 v->PrefillC[k], 3757 v->MaxNumSwC[k], 3758 v->swath_width_luma_ub_this_state[k], 3759 v->swath_width_chroma_ub_this_state[k], 3760 v->SwathHeightYThisState[k], 3761 v->SwathHeightCThisState[k], 3762 v->TWait, 3763 &v->DSTXAfterScaler[k], 3764 &v->DSTYAfterScaler[k], 3765 &v->LineTimesForPrefetch[k], 3766 &v->PrefetchBW[k], 3767 &v->LinesForMetaPTE[k], 3768 &v->LinesForMetaAndDPTERow[k], 3769 &v->VRatioPreY[i][j][k], 3770 &v->VRatioPreC[i][j][k], 3771 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 3772 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 3773 &v->NoTimeForDynamicMetadata[i][j][k], 3774 &v->Tno_bw[k], 3775 &v->prefetch_vmrow_bw[k], 3776 &v->dummy7[k], 3777 &v->dummy8[k], 3778 &v->dummy13[k], 3779 &v->VUpdateOffsetPix[k], 3780 &v->VUpdateWidthPix[k], 3781 &v->VReadyOffsetPix[k]); 3782 } 3783 3784 static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[]) 3785 { 3786 int i, total_pipes = 0; 3787 for (i = 0; i < NumberOfActivePlanes; i++) 3788 total_pipes += NoOfDPPThisState[i]; 3789 DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64; 3790 if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE) 3791 DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE; 3792 for (i = 1; i < NumberOfActivePlanes; i++) 3793 DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0]; 3794 } 3795 3796 3797 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 3798 { 3799 struct vba_vars_st *v = &mode_lib->vba; 3800 3801 int i, j; 3802 unsigned int k, m; 3803 int ReorderingBytes; 3804 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 3805 bool NoChroma = true; 3806 bool EnoughWritebackUnits = true; 3807 bool P2IWith420 = false; 3808 bool DSCOnlyIfNecessaryWithBPP = false; 3809 bool DSC422NativeNotSupported = false; 3810 double MaxTotalVActiveRDBandwidth; 3811 bool ViewportExceedsSurface = false; 3812 bool FMTBufferExceeded = false; 3813 3814 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 3815 3816 CalculateMinAndMaxPrefetchMode( 3817 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 3818 &MinPrefetchMode, &MaxPrefetchMode); 3819 3820 /*Scale Ratio, taps Support Check*/ 3821 3822 v->ScaleRatioAndTapsSupport = true; 3823 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3824 if (v->ScalerEnabled[k] == false 3825 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3826 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3827 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3828 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 3829 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 3830 v->ScaleRatioAndTapsSupport = false; 3831 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 3832 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 3833 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 3834 || v->VRatio[k] > v->vtaps[k] 3835 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3836 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3837 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3838 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 3839 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 3840 || v->HRatioChroma[k] > v->MaxHSCLRatio 3841 || v->VRatioChroma[k] > v->MaxVSCLRatio 3842 || v->HRatioChroma[k] > v->HTAPsChroma[k] 3843 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 3844 v->ScaleRatioAndTapsSupport = false; 3845 } 3846 } 3847 /*Source Format, Pixel Format and Scan Support Check*/ 3848 3849 v->SourceFormatPixelAndScanSupport = true; 3850 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3851 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) 3852 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t 3853 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { 3854 v->SourceFormatPixelAndScanSupport = false; 3855 } 3856 } 3857 /*Bandwidth Support Check*/ 3858 3859 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3860 dml30_CalculateBytePerPixelAnd256BBlockSizes( 3861 v->SourcePixelFormat[k], 3862 v->SurfaceTiling[k], 3863 &v->BytePerPixelY[k], 3864 &v->BytePerPixelC[k], 3865 &v->BytePerPixelInDETY[k], 3866 &v->BytePerPixelInDETC[k], 3867 &v->Read256BlockHeightY[k], 3868 &v->Read256BlockHeightC[k], 3869 &v->Read256BlockWidthY[k], 3870 &v->Read256BlockWidthC[k]); 3871 } 3872 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3873 if (v->SourceScan[k] != dm_vert) { 3874 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 3875 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 3876 } else { 3877 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 3878 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 3879 } 3880 } 3881 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3882 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 3883 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 3884 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 3885 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 3886 } 3887 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3888 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 3889 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3890 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 3891 } else if (v->WritebackEnable[k] == true) { 3892 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3893 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 3894 } else { 3895 v->WriteBandwidth[k] = 0.0; 3896 } 3897 } 3898 3899 /*Writeback Latency support check*/ 3900 3901 v->WritebackLatencySupport = true; 3902 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3903 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 3904 v->WritebackLatencySupport = false; 3905 } 3906 } 3907 3908 /*Writeback Mode Support Check*/ 3909 3910 v->TotalNumberOfActiveWriteback = 0; 3911 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3912 if (v->WritebackEnable[k] == true) { 3913 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 3914 } 3915 } 3916 3917 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 3918 EnoughWritebackUnits = false; 3919 } 3920 3921 /*Writeback Scale Ratio and Taps Support Check*/ 3922 3923 v->WritebackScaleRatioAndTapsSupport = true; 3924 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3925 if (v->WritebackEnable[k] == true) { 3926 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 3927 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 3928 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 3929 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 3930 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 3931 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 3932 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 3933 v->WritebackScaleRatioAndTapsSupport = false; 3934 } 3935 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 3936 v->WritebackScaleRatioAndTapsSupport = false; 3937 } 3938 } 3939 } 3940 /*Maximum DISPCLK/DPPCLK Support check*/ 3941 3942 v->WritebackRequiredDISPCLK = 0.0; 3943 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3944 if (v->WritebackEnable[k] == true) { 3945 v->WritebackRequiredDISPCLK = dml_max( 3946 v->WritebackRequiredDISPCLK, 3947 dml31_CalculateWriteBackDISPCLK( 3948 v->WritebackPixelFormat[k], 3949 v->PixelClock[k], 3950 v->WritebackHRatio[k], 3951 v->WritebackVRatio[k], 3952 v->WritebackHTaps[k], 3953 v->WritebackVTaps[k], 3954 v->WritebackSourceWidth[k], 3955 v->WritebackDestinationWidth[k], 3956 v->HTotal[k], 3957 v->WritebackLineBufferSize)); 3958 } 3959 } 3960 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3961 if (v->HRatio[k] > 1.0) { 3962 v->PSCL_FACTOR[k] = dml_min( 3963 v->MaxDCHUBToPSCLThroughput, 3964 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 3965 } else { 3966 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 3967 } 3968 if (v->BytePerPixelC[k] == 0.0) { 3969 v->PSCL_FACTOR_CHROMA[k] = 0.0; 3970 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 3971 * dml_max3( 3972 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 3973 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 3974 1.0); 3975 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 3976 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 3977 } 3978 } else { 3979 if (v->HRatioChroma[k] > 1.0) { 3980 v->PSCL_FACTOR_CHROMA[k] = dml_min( 3981 v->MaxDCHUBToPSCLThroughput, 3982 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 3983 } else { 3984 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 3985 } 3986 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 3987 * dml_max5( 3988 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 3989 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 3990 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 3991 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 3992 1.0); 3993 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 3994 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 3995 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 3996 } 3997 } 3998 } 3999 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4000 int MaximumSwathWidthSupportLuma; 4001 int MaximumSwathWidthSupportChroma; 4002 4003 if (v->SurfaceTiling[k] == dm_sw_linear) { 4004 MaximumSwathWidthSupportLuma = 8192.0; 4005 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4006 MaximumSwathWidthSupportLuma = 2880.0; 4007 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4008 MaximumSwathWidthSupportLuma = 3840.0; 4009 } else { 4010 MaximumSwathWidthSupportLuma = 5760.0; 4011 } 4012 4013 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4014 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4015 } else { 4016 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4017 } 4018 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4019 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4020 if (v->BytePerPixelC[k] == 0.0) { 4021 v->MaximumSwathWidthInLineBufferChroma = 0; 4022 } else { 4023 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4024 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4025 } 4026 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4027 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4028 } 4029 4030 CalculateSwathAndDETConfiguration( 4031 true, 4032 v->NumberOfActivePlanes, 4033 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], 4034 v->DETBufferSizeInKByte, 4035 v->MaximumSwathWidthLuma, 4036 v->MaximumSwathWidthChroma, 4037 v->SourceScan, 4038 v->SourcePixelFormat, 4039 v->SurfaceTiling, 4040 v->ViewportWidth, 4041 v->ViewportHeight, 4042 v->SurfaceWidthY, 4043 v->SurfaceWidthC, 4044 v->SurfaceHeightY, 4045 v->SurfaceHeightC, 4046 v->Read256BlockHeightY, 4047 v->Read256BlockHeightC, 4048 v->Read256BlockWidthY, 4049 v->Read256BlockWidthC, 4050 v->odm_combine_dummy, 4051 v->BlendingAndTiming, 4052 v->BytePerPixelY, 4053 v->BytePerPixelC, 4054 v->BytePerPixelInDETY, 4055 v->BytePerPixelInDETC, 4056 v->HActive, 4057 v->HRatio, 4058 v->HRatioChroma, 4059 v->NoOfDPPThisState, 4060 v->swath_width_luma_ub_this_state, 4061 v->swath_width_chroma_ub_this_state, 4062 v->SwathWidthYThisState, 4063 v->SwathWidthCThisState, 4064 v->SwathHeightYThisState, 4065 v->SwathHeightCThisState, 4066 v->DETBufferSizeYThisState, 4067 v->DETBufferSizeCThisState, 4068 v->SingleDPPViewportSizeSupportPerPlane, 4069 &v->ViewportSizeSupport[0][0]); 4070 4071 for (i = 0; i < v->soc.num_states; i++) { 4072 for (j = 0; j < 2; j++) { 4073 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4074 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4075 v->RequiredDISPCLK[i][j] = 0.0; 4076 v->DISPCLK_DPPCLK_Support[i][j] = true; 4077 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4078 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4079 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4080 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4081 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4082 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4083 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4084 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4085 } 4086 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4087 * (1 + v->DISPCLKRampingMargin / 100.0); 4088 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4089 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4090 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4091 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4092 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4093 } 4094 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4095 * (1 + v->DISPCLKRampingMargin / 100.0); 4096 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4097 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4098 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4099 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4100 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4101 } 4102 4103 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4104 || !(v->Output[k] == dm_dp || 4105 v->Output[k] == dm_dp2p0 || 4106 v->Output[k] == dm_edp)) { 4107 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4108 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4109 4110 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4111 FMTBufferExceeded = true; 4112 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4113 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4114 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4115 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4116 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4117 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4118 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4119 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4120 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4121 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4122 } else { 4123 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4124 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4125 } 4126 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH 4127 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4128 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) { 4129 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4130 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4131 } else { 4132 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4133 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4134 } 4135 } 4136 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH 4137 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4138 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) { 4139 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4140 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4141 4142 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4143 FMTBufferExceeded = true; 4144 } else { 4145 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4146 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4147 } 4148 } 4149 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4150 v->MPCCombine[i][j][k] = false; 4151 v->NoOfDPP[i][j][k] = 4; 4152 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4153 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4154 v->MPCCombine[i][j][k] = false; 4155 v->NoOfDPP[i][j][k] = 2; 4156 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4157 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4158 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4159 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4160 v->MPCCombine[i][j][k] = false; 4161 v->NoOfDPP[i][j][k] = 1; 4162 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4163 } else { 4164 v->MPCCombine[i][j][k] = true; 4165 v->NoOfDPP[i][j][k] = 2; 4166 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4167 } 4168 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4169 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4170 > v->MaxDppclkRoundedDownToDFSGranularity) 4171 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4172 v->DISPCLK_DPPCLK_Support[i][j] = false; 4173 } 4174 if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) { 4175 v->MPCCombine[i][j][k] = true; 4176 v->NoOfDPP[i][j][k] = 2; 4177 } 4178 } 4179 v->TotalNumberOfActiveDPP[i][j] = 0; 4180 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4181 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4182 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4183 if (v->NoOfDPP[i][j][k] == 1) 4184 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4185 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4186 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4187 NoChroma = false; 4188 } 4189 4190 // UPTO 4191 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4192 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4193 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4194 double BWOfNonSplitPlaneOfMaximumBandwidth; 4195 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4196 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4197 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4198 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4199 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4200 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4201 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4202 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4203 } 4204 } 4205 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4206 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4207 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4208 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4209 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4210 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4211 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4212 } 4213 } 4214 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4215 v->RequiredDISPCLK[i][j] = 0.0; 4216 v->DISPCLK_DPPCLK_Support[i][j] = true; 4217 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4218 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4219 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4220 v->MPCCombine[i][j][k] = true; 4221 v->NoOfDPP[i][j][k] = 2; 4222 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4223 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4224 } else { 4225 v->MPCCombine[i][j][k] = false; 4226 v->NoOfDPP[i][j][k] = 1; 4227 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4228 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4229 } 4230 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4231 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4232 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4233 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4234 } else { 4235 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4236 } 4237 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4238 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4239 > v->MaxDppclkRoundedDownToDFSGranularity) 4240 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4241 v->DISPCLK_DPPCLK_Support[i][j] = false; 4242 } 4243 } 4244 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4245 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4246 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4247 } 4248 } 4249 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4250 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4251 v->DISPCLK_DPPCLK_Support[i][j] = false; 4252 } 4253 } 4254 } 4255 4256 /*Total Available Pipes Support Check*/ 4257 4258 for (i = 0; i < v->soc.num_states; i++) { 4259 for (j = 0; j < 2; j++) { 4260 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4261 v->TotalAvailablePipesSupport[i][j] = true; 4262 } else { 4263 v->TotalAvailablePipesSupport[i][j] = false; 4264 } 4265 } 4266 } 4267 /*Display IO and DSC Support Check*/ 4268 4269 v->NonsupportedDSCInputBPC = false; 4270 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4271 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4272 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4273 v->NonsupportedDSCInputBPC = true; 4274 } 4275 } 4276 4277 /*Number Of DSC Slices*/ 4278 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4279 if (v->BlendingAndTiming[k] == k) { 4280 if (v->PixelClockBackEnd[k] > 3200) { 4281 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4282 } else if (v->PixelClockBackEnd[k] > 1360) { 4283 v->NumberOfDSCSlices[k] = 8; 4284 } else if (v->PixelClockBackEnd[k] > 680) { 4285 v->NumberOfDSCSlices[k] = 4; 4286 } else if (v->PixelClockBackEnd[k] > 340) { 4287 v->NumberOfDSCSlices[k] = 2; 4288 } else { 4289 v->NumberOfDSCSlices[k] = 1; 4290 } 4291 } else { 4292 v->NumberOfDSCSlices[k] = 0; 4293 } 4294 } 4295 4296 for (i = 0; i < v->soc.num_states; i++) { 4297 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4298 v->RequiresDSC[i][k] = false; 4299 v->RequiresFEC[i][k] = false; 4300 if (v->BlendingAndTiming[k] == k) { 4301 if (v->Output[k] == dm_hdmi) { 4302 v->RequiresDSC[i][k] = false; 4303 v->RequiresFEC[i][k] = false; 4304 v->OutputBppPerState[i][k] = TruncToValidBPP( 4305 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4306 3, 4307 v->HTotal[k], 4308 v->HActive[k], 4309 v->PixelClockBackEnd[k], 4310 v->ForcedOutputLinkBPP[k], 4311 false, 4312 v->Output[k], 4313 v->OutputFormat[k], 4314 v->DSCInputBitPerComponent[k], 4315 v->NumberOfDSCSlices[k], 4316 v->AudioSampleRate[k], 4317 v->AudioSampleLayout[k], 4318 v->ODMCombineEnablePerState[i][k]); 4319 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) { 4320 if (v->DSCEnable[k] == true) { 4321 v->RequiresDSC[i][k] = true; 4322 v->LinkDSCEnable = true; 4323 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) { 4324 v->RequiresFEC[i][k] = true; 4325 } else { 4326 v->RequiresFEC[i][k] = false; 4327 } 4328 } else { 4329 v->RequiresDSC[i][k] = false; 4330 v->LinkDSCEnable = false; 4331 if (v->Output[k] == dm_dp2p0) { 4332 v->RequiresFEC[i][k] = true; 4333 } else { 4334 v->RequiresFEC[i][k] = false; 4335 } 4336 } 4337 if (v->Output[k] == dm_dp2p0) { 4338 v->Outbpp = BPP_INVALID; 4339 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) && 4340 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) { 4341 v->Outbpp = TruncToValidBPP( 4342 (1.0 - v->Downspreading / 100.0) * 10000, 4343 v->OutputLinkDPLanes[k], 4344 v->HTotal[k], 4345 v->HActive[k], 4346 v->PixelClockBackEnd[k], 4347 v->ForcedOutputLinkBPP[k], 4348 v->LinkDSCEnable, 4349 v->Output[k], 4350 v->OutputFormat[k], 4351 v->DSCInputBitPerComponent[k], 4352 v->NumberOfDSCSlices[k], 4353 v->AudioSampleRate[k], 4354 v->AudioSampleLayout[k], 4355 v->ODMCombineEnablePerState[i][k]); 4356 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 && 4357 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { 4358 v->RequiresDSC[i][k] = true; 4359 v->LinkDSCEnable = true; 4360 v->Outbpp = TruncToValidBPP( 4361 (1.0 - v->Downspreading / 100.0) * 10000, 4362 v->OutputLinkDPLanes[k], 4363 v->HTotal[k], 4364 v->HActive[k], 4365 v->PixelClockBackEnd[k], 4366 v->ForcedOutputLinkBPP[k], 4367 v->LinkDSCEnable, 4368 v->Output[k], 4369 v->OutputFormat[k], 4370 v->DSCInputBitPerComponent[k], 4371 v->NumberOfDSCSlices[k], 4372 v->AudioSampleRate[k], 4373 v->AudioSampleLayout[k], 4374 v->ODMCombineEnablePerState[i][k]); 4375 } 4376 v->OutputBppPerState[i][k] = v->Outbpp; 4377 // TODO: Need some other way to handle this nonsense 4378 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10" 4379 } 4380 if (v->Outbpp == BPP_INVALID && 4381 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) && 4382 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) { 4383 v->Outbpp = TruncToValidBPP( 4384 (1.0 - v->Downspreading / 100.0) * 13500, 4385 v->OutputLinkDPLanes[k], 4386 v->HTotal[k], 4387 v->HActive[k], 4388 v->PixelClockBackEnd[k], 4389 v->ForcedOutputLinkBPP[k], 4390 v->LinkDSCEnable, 4391 v->Output[k], 4392 v->OutputFormat[k], 4393 v->DSCInputBitPerComponent[k], 4394 v->NumberOfDSCSlices[k], 4395 v->AudioSampleRate[k], 4396 v->AudioSampleLayout[k], 4397 v->ODMCombineEnablePerState[i][k]); 4398 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 && 4399 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { 4400 v->RequiresDSC[i][k] = true; 4401 v->LinkDSCEnable = true; 4402 v->Outbpp = TruncToValidBPP( 4403 (1.0 - v->Downspreading / 100.0) * 13500, 4404 v->OutputLinkDPLanes[k], 4405 v->HTotal[k], 4406 v->HActive[k], 4407 v->PixelClockBackEnd[k], 4408 v->ForcedOutputLinkBPP[k], 4409 v->LinkDSCEnable, 4410 v->Output[k], 4411 v->OutputFormat[k], 4412 v->DSCInputBitPerComponent[k], 4413 v->NumberOfDSCSlices[k], 4414 v->AudioSampleRate[k], 4415 v->AudioSampleLayout[k], 4416 v->ODMCombineEnablePerState[i][k]); 4417 } 4418 v->OutputBppPerState[i][k] = v->Outbpp; 4419 // TODO: Need some other way to handle this nonsense 4420 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5" 4421 } 4422 if (v->Outbpp == BPP_INVALID && 4423 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) && 4424 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) { 4425 v->Outbpp = TruncToValidBPP( 4426 (1.0 - v->Downspreading / 100.0) * 20000, 4427 v->OutputLinkDPLanes[k], 4428 v->HTotal[k], 4429 v->HActive[k], 4430 v->PixelClockBackEnd[k], 4431 v->ForcedOutputLinkBPP[k], 4432 v->LinkDSCEnable, 4433 v->Output[k], 4434 v->OutputFormat[k], 4435 v->DSCInputBitPerComponent[k], 4436 v->NumberOfDSCSlices[k], 4437 v->AudioSampleRate[k], 4438 v->AudioSampleLayout[k], 4439 v->ODMCombineEnablePerState[i][k]); 4440 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true && 4441 v->ForcedOutputLinkBPP[k] == 0) { 4442 v->RequiresDSC[i][k] = true; 4443 v->LinkDSCEnable = true; 4444 v->Outbpp = TruncToValidBPP( 4445 (1.0 - v->Downspreading / 100.0) * 20000, 4446 v->OutputLinkDPLanes[k], 4447 v->HTotal[k], 4448 v->HActive[k], 4449 v->PixelClockBackEnd[k], 4450 v->ForcedOutputLinkBPP[k], 4451 v->LinkDSCEnable, 4452 v->Output[k], 4453 v->OutputFormat[k], 4454 v->DSCInputBitPerComponent[k], 4455 v->NumberOfDSCSlices[k], 4456 v->AudioSampleRate[k], 4457 v->AudioSampleLayout[k], 4458 v->ODMCombineEnablePerState[i][k]); 4459 } 4460 v->OutputBppPerState[i][k] = v->Outbpp; 4461 // TODO: Need some other way to handle this nonsense 4462 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20" 4463 } 4464 } else { 4465 v->Outbpp = BPP_INVALID; 4466 if (v->PHYCLKPerState[i] >= 270.0) { 4467 v->Outbpp = TruncToValidBPP( 4468 (1.0 - v->Downspreading / 100.0) * 2700, 4469 v->OutputLinkDPLanes[k], 4470 v->HTotal[k], 4471 v->HActive[k], 4472 v->PixelClockBackEnd[k], 4473 v->ForcedOutputLinkBPP[k], 4474 v->LinkDSCEnable, 4475 v->Output[k], 4476 v->OutputFormat[k], 4477 v->DSCInputBitPerComponent[k], 4478 v->NumberOfDSCSlices[k], 4479 v->AudioSampleRate[k], 4480 v->AudioSampleLayout[k], 4481 v->ODMCombineEnablePerState[i][k]); 4482 v->OutputBppPerState[i][k] = v->Outbpp; 4483 // TODO: Need some other way to handle this nonsense 4484 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4485 } 4486 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4487 v->Outbpp = TruncToValidBPP( 4488 (1.0 - v->Downspreading / 100.0) * 5400, 4489 v->OutputLinkDPLanes[k], 4490 v->HTotal[k], 4491 v->HActive[k], 4492 v->PixelClockBackEnd[k], 4493 v->ForcedOutputLinkBPP[k], 4494 v->LinkDSCEnable, 4495 v->Output[k], 4496 v->OutputFormat[k], 4497 v->DSCInputBitPerComponent[k], 4498 v->NumberOfDSCSlices[k], 4499 v->AudioSampleRate[k], 4500 v->AudioSampleLayout[k], 4501 v->ODMCombineEnablePerState[i][k]); 4502 v->OutputBppPerState[i][k] = v->Outbpp; 4503 // TODO: Need some other way to handle this nonsense 4504 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4505 } 4506 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4507 v->Outbpp = TruncToValidBPP( 4508 (1.0 - v->Downspreading / 100.0) * 8100, 4509 v->OutputLinkDPLanes[k], 4510 v->HTotal[k], 4511 v->HActive[k], 4512 v->PixelClockBackEnd[k], 4513 v->ForcedOutputLinkBPP[k], 4514 v->LinkDSCEnable, 4515 v->Output[k], 4516 v->OutputFormat[k], 4517 v->DSCInputBitPerComponent[k], 4518 v->NumberOfDSCSlices[k], 4519 v->AudioSampleRate[k], 4520 v->AudioSampleLayout[k], 4521 v->ODMCombineEnablePerState[i][k]); 4522 v->OutputBppPerState[i][k] = v->Outbpp; 4523 // TODO: Need some other way to handle this nonsense 4524 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4525 } 4526 } 4527 } 4528 } else { 4529 v->OutputBppPerState[i][k] = 0; 4530 } 4531 } 4532 } 4533 4534 for (i = 0; i < v->soc.num_states; i++) { 4535 v->LinkCapacitySupport[i] = true; 4536 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4537 if (v->BlendingAndTiming[k] == k 4538 && (v->Output[k] == dm_dp || 4539 v->Output[k] == dm_edp || 4540 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4541 v->LinkCapacitySupport[i] = false; 4542 } 4543 } 4544 } 4545 4546 // UPTO 2172 4547 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4548 if (v->BlendingAndTiming[k] == k 4549 && (v->Output[k] == dm_dp || 4550 v->Output[k] == dm_edp || 4551 v->Output[k] == dm_hdmi)) { 4552 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4553 P2IWith420 = true; 4554 } 4555 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4556 && !v->DSC422NativeSupport) { 4557 DSC422NativeNotSupported = true; 4558 } 4559 } 4560 } 4561 4562 for (i = 0; i < v->soc.num_states; ++i) { 4563 v->ODMCombine4To1SupportCheckOK[i] = true; 4564 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4565 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4566 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4567 || v->Output[k] == dm_hdmi)) { 4568 v->ODMCombine4To1SupportCheckOK[i] = false; 4569 } 4570 } 4571 } 4572 4573 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4574 4575 for (i = 0; i < v->soc.num_states; i++) { 4576 v->NotEnoughDSCUnits[i] = false; 4577 v->TotalDSCUnitsRequired = 0.0; 4578 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4579 if (v->RequiresDSC[i][k] == true) { 4580 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4581 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4582 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4583 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4584 } else { 4585 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4586 } 4587 } 4588 } 4589 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4590 v->NotEnoughDSCUnits[i] = true; 4591 } 4592 } 4593 /*DSC Delay per state*/ 4594 4595 for (i = 0; i < v->soc.num_states; i++) { 4596 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4597 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4598 v->BPP = 0.0; 4599 } else { 4600 v->BPP = v->OutputBppPerState[i][k]; 4601 } 4602 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4603 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4604 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4605 v->DSCInputBitPerComponent[k], 4606 v->BPP, 4607 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4608 v->NumberOfDSCSlices[k], 4609 v->OutputFormat[k], 4610 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4611 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4612 v->DSCDelayPerState[i][k] = 2.0 4613 * (dscceComputeDelay( 4614 v->DSCInputBitPerComponent[k], 4615 v->BPP, 4616 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4617 v->NumberOfDSCSlices[k] / 2, 4618 v->OutputFormat[k], 4619 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4620 } else { 4621 v->DSCDelayPerState[i][k] = 4.0 4622 * (dscceComputeDelay( 4623 v->DSCInputBitPerComponent[k], 4624 v->BPP, 4625 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4626 v->NumberOfDSCSlices[k] / 4, 4627 v->OutputFormat[k], 4628 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4629 } 4630 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4631 } else { 4632 v->DSCDelayPerState[i][k] = 0.0; 4633 } 4634 } 4635 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4636 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4637 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4638 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4639 } 4640 } 4641 } 4642 } 4643 4644 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4645 // 4646 for (i = 0; i < v->soc.num_states; ++i) { 4647 for (j = 0; j <= 1; ++j) { 4648 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4649 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4650 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4651 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4652 } 4653 4654 if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0]) 4655 PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte); 4656 CalculateSwathAndDETConfiguration( 4657 false, 4658 v->NumberOfActivePlanes, 4659 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], 4660 v->DETBufferSizeInKByte, 4661 v->MaximumSwathWidthLuma, 4662 v->MaximumSwathWidthChroma, 4663 v->SourceScan, 4664 v->SourcePixelFormat, 4665 v->SurfaceTiling, 4666 v->ViewportWidth, 4667 v->ViewportHeight, 4668 v->SurfaceWidthY, 4669 v->SurfaceWidthC, 4670 v->SurfaceHeightY, 4671 v->SurfaceHeightC, 4672 v->Read256BlockHeightY, 4673 v->Read256BlockHeightC, 4674 v->Read256BlockWidthY, 4675 v->Read256BlockWidthC, 4676 v->ODMCombineEnableThisState, 4677 v->BlendingAndTiming, 4678 v->BytePerPixelY, 4679 v->BytePerPixelC, 4680 v->BytePerPixelInDETY, 4681 v->BytePerPixelInDETC, 4682 v->HActive, 4683 v->HRatio, 4684 v->HRatioChroma, 4685 v->NoOfDPPThisState, 4686 v->swath_width_luma_ub_this_state, 4687 v->swath_width_chroma_ub_this_state, 4688 v->SwathWidthYThisState, 4689 v->SwathWidthCThisState, 4690 v->SwathHeightYThisState, 4691 v->SwathHeightCThisState, 4692 v->DETBufferSizeYThisState, 4693 v->DETBufferSizeCThisState, 4694 v->dummystring, 4695 &v->ViewportSizeSupport[i][j]); 4696 4697 CalculateDCFCLKDeepSleep( 4698 mode_lib, 4699 v->NumberOfActivePlanes, 4700 v->BytePerPixelY, 4701 v->BytePerPixelC, 4702 v->VRatio, 4703 v->VRatioChroma, 4704 v->SwathWidthYThisState, 4705 v->SwathWidthCThisState, 4706 v->NoOfDPPThisState, 4707 v->HRatio, 4708 v->HRatioChroma, 4709 v->PixelClock, 4710 v->PSCL_FACTOR, 4711 v->PSCL_FACTOR_CHROMA, 4712 v->RequiredDPPCLKThisState, 4713 v->ReadBandwidthLuma, 4714 v->ReadBandwidthChroma, 4715 v->ReturnBusWidth, 4716 &v->ProjectedDCFCLKDeepSleep[i][j]); 4717 4718 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4719 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4720 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4721 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4722 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4723 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4724 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4725 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4726 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4727 } 4728 } 4729 } 4730 4731 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4732 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4733 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4734 } 4735 4736 for (i = 0; i < v->soc.num_states; i++) { 4737 for (j = 0; j < 2; j++) { 4738 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4739 4740 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4741 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4742 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4743 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4744 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4745 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4746 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4747 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4748 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4749 } 4750 4751 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4752 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4753 if (v->DCCEnable[k] == true) { 4754 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4755 } 4756 } 4757 4758 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4759 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4760 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4761 4762 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4763 && v->SourceScan[k] != dm_vert) { 4764 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4765 / 2; 4766 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4767 } else { 4768 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4769 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4770 } 4771 4772 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4773 mode_lib, 4774 v->DCCEnable[k], 4775 v->Read256BlockHeightC[k], 4776 v->Read256BlockWidthC[k], 4777 v->SourcePixelFormat[k], 4778 v->SurfaceTiling[k], 4779 v->BytePerPixelC[k], 4780 v->SourceScan[k], 4781 v->SwathWidthCThisState[k], 4782 v->ViewportHeightChroma[k], 4783 v->GPUVMEnable, 4784 v->HostVMEnable, 4785 v->HostVMMaxNonCachedPageTableLevels, 4786 v->GPUVMMinPageSize, 4787 v->HostVMMinPageSize, 4788 v->PTEBufferSizeInRequestsForChroma, 4789 v->PitchC[k], 4790 0.0, 4791 &v->MacroTileWidthC[k], 4792 &v->MetaRowBytesC, 4793 &v->DPTEBytesPerRowC, 4794 &v->PTEBufferSizeNotExceededC[i][j][k], 4795 &v->dummyinteger7, 4796 &v->dpte_row_height_chroma[k], 4797 &v->dummyinteger28, 4798 &v->dummyinteger26, 4799 &v->dummyinteger23, 4800 &v->meta_row_height_chroma[k], 4801 &v->dummyinteger8, 4802 &v->dummyinteger9, 4803 &v->dummyinteger19, 4804 &v->dummyinteger20, 4805 &v->dummyinteger17, 4806 &v->dummyinteger10, 4807 &v->dummyinteger11); 4808 4809 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4810 mode_lib, 4811 v->VRatioChroma[k], 4812 v->VTAPsChroma[k], 4813 v->Interlace[k], 4814 v->ProgressiveToInterlaceUnitInOPP, 4815 v->SwathHeightCThisState[k], 4816 v->ViewportYStartC[k], 4817 &v->PrefillC[k], 4818 &v->MaxNumSwC[k]); 4819 } else { 4820 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4821 v->PTEBufferSizeInRequestsForChroma = 0; 4822 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4823 v->MetaRowBytesC = 0.0; 4824 v->DPTEBytesPerRowC = 0.0; 4825 v->PrefetchLinesC[i][j][k] = 0.0; 4826 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4827 } 4828 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4829 mode_lib, 4830 v->DCCEnable[k], 4831 v->Read256BlockHeightY[k], 4832 v->Read256BlockWidthY[k], 4833 v->SourcePixelFormat[k], 4834 v->SurfaceTiling[k], 4835 v->BytePerPixelY[k], 4836 v->SourceScan[k], 4837 v->SwathWidthYThisState[k], 4838 v->ViewportHeight[k], 4839 v->GPUVMEnable, 4840 v->HostVMEnable, 4841 v->HostVMMaxNonCachedPageTableLevels, 4842 v->GPUVMMinPageSize, 4843 v->HostVMMinPageSize, 4844 v->PTEBufferSizeInRequestsForLuma, 4845 v->PitchY[k], 4846 v->DCCMetaPitchY[k], 4847 &v->MacroTileWidthY[k], 4848 &v->MetaRowBytesY, 4849 &v->DPTEBytesPerRowY, 4850 &v->PTEBufferSizeNotExceededY[i][j][k], 4851 &v->dummyinteger7, 4852 &v->dpte_row_height[k], 4853 &v->dummyinteger29, 4854 &v->dummyinteger27, 4855 &v->dummyinteger24, 4856 &v->meta_row_height[k], 4857 &v->dummyinteger25, 4858 &v->dpte_group_bytes[k], 4859 &v->dummyinteger21, 4860 &v->dummyinteger22, 4861 &v->dummyinteger18, 4862 &v->dummyinteger5, 4863 &v->dummyinteger6); 4864 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4865 mode_lib, 4866 v->VRatio[k], 4867 v->vtaps[k], 4868 v->Interlace[k], 4869 v->ProgressiveToInterlaceUnitInOPP, 4870 v->SwathHeightYThisState[k], 4871 v->ViewportYStartY[k], 4872 &v->PrefillY[k], 4873 &v->MaxNumSwY[k]); 4874 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4875 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4876 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 4877 4878 CalculateRowBandwidth( 4879 v->GPUVMEnable, 4880 v->SourcePixelFormat[k], 4881 v->VRatio[k], 4882 v->VRatioChroma[k], 4883 v->DCCEnable[k], 4884 v->HTotal[k] / v->PixelClock[k], 4885 v->MetaRowBytesY, 4886 v->MetaRowBytesC, 4887 v->meta_row_height[k], 4888 v->meta_row_height_chroma[k], 4889 v->DPTEBytesPerRowY, 4890 v->DPTEBytesPerRowC, 4891 v->dpte_row_height[k], 4892 v->dpte_row_height_chroma[k], 4893 &v->meta_row_bandwidth[i][j][k], 4894 &v->dpte_row_bandwidth[i][j][k]); 4895 } 4896 /*DCCMetaBufferSizeSupport(i, j) = True 4897 For k = 0 To NumberOfActivePlanes - 1 4898 If MetaRowBytes(i, j, k) > 24064 Then 4899 DCCMetaBufferSizeSupport(i, j) = False 4900 End If 4901 Next k*/ 4902 v->DCCMetaBufferSizeSupport[i][j] = true; 4903 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4904 if (v->MetaRowBytes[i][j][k] > 24064) 4905 v->DCCMetaBufferSizeSupport[i][j] = false; 4906 } 4907 v->UrgLatency[i] = CalculateUrgentLatency( 4908 v->UrgentLatencyPixelDataOnly, 4909 v->UrgentLatencyPixelMixedWithVMData, 4910 v->UrgentLatencyVMDataOnly, 4911 v->DoUrgentLatencyAdjustment, 4912 v->UrgentLatencyAdjustmentFabricClockComponent, 4913 v->UrgentLatencyAdjustmentFabricClockReference, 4914 v->FabricClockPerState[i]); 4915 4916 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4917 CalculateUrgentBurstFactor( 4918 v->swath_width_luma_ub_this_state[k], 4919 v->swath_width_chroma_ub_this_state[k], 4920 v->SwathHeightYThisState[k], 4921 v->SwathHeightCThisState[k], 4922 v->HTotal[k] / v->PixelClock[k], 4923 v->UrgLatency[i], 4924 v->CursorBufferSize, 4925 v->CursorWidth[k][0], 4926 v->CursorBPP[k][0], 4927 v->VRatio[k], 4928 v->VRatioChroma[k], 4929 v->BytePerPixelInDETY[k], 4930 v->BytePerPixelInDETC[k], 4931 v->DETBufferSizeYThisState[k], 4932 v->DETBufferSizeCThisState[k], 4933 &v->UrgentBurstFactorCursor[k], 4934 &v->UrgentBurstFactorLuma[k], 4935 &v->UrgentBurstFactorChroma[k], 4936 &NotUrgentLatencyHiding[k]); 4937 } 4938 4939 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 4940 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4941 if (NotUrgentLatencyHiding[k]) { 4942 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 4943 } 4944 } 4945 4946 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4947 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 4948 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 4949 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 4950 } 4951 4952 v->TotalVActivePixelBandwidth[i][j] = 0; 4953 v->TotalVActiveCursorBandwidth[i][j] = 0; 4954 v->TotalMetaRowBandwidth[i][j] = 0; 4955 v->TotalDPTERowBandwidth[i][j] = 0; 4956 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4957 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 4958 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 4959 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 4960 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 4961 } 4962 } 4963 } 4964 4965 //Calculate Return BW 4966 for (i = 0; i < v->soc.num_states; ++i) { 4967 for (j = 0; j <= 1; ++j) { 4968 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4969 if (v->BlendingAndTiming[k] == k) { 4970 if (v->WritebackEnable[k] == true) { 4971 v->WritebackDelayTime[k] = v->WritebackLatency 4972 + CalculateWriteBackDelay( 4973 v->WritebackPixelFormat[k], 4974 v->WritebackHRatio[k], 4975 v->WritebackVRatio[k], 4976 v->WritebackVTaps[k], 4977 v->WritebackDestinationWidth[k], 4978 v->WritebackDestinationHeight[k], 4979 v->WritebackSourceHeight[k], 4980 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 4981 } else { 4982 v->WritebackDelayTime[k] = 0.0; 4983 } 4984 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4985 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 4986 v->WritebackDelayTime[k] = dml_max( 4987 v->WritebackDelayTime[k], 4988 v->WritebackLatency 4989 + CalculateWriteBackDelay( 4990 v->WritebackPixelFormat[m], 4991 v->WritebackHRatio[m], 4992 v->WritebackVRatio[m], 4993 v->WritebackVTaps[m], 4994 v->WritebackDestinationWidth[m], 4995 v->WritebackDestinationHeight[m], 4996 v->WritebackSourceHeight[m], 4997 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 4998 } 4999 } 5000 } 5001 } 5002 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5003 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5004 if (v->BlendingAndTiming[k] == m) { 5005 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 5006 } 5007 } 5008 } 5009 v->MaxMaxVStartup[i][j] = 0; 5010 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5011 v->MaximumVStartup[i][j][k] = 5012 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 5013 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 5014 v->VTotal[k] - v->VActive[k] 5015 - dml_max( 5016 1.0, 5017 dml_ceil( 5018 1.0 * v->WritebackDelayTime[k] 5019 / (v->HTotal[k] 5020 / v->PixelClock[k]), 5021 1.0)); 5022 if (v->MaximumVStartup[i][j][k] > 1023) 5023 v->MaximumVStartup[i][j][k] = 1023; 5024 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 5025 } 5026 } 5027 } 5028 5029 ReorderingBytes = v->NumberOfChannels 5030 * dml_max3( 5031 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 5032 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 5033 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 5034 5035 for (i = 0; i < v->soc.num_states; ++i) { 5036 for (j = 0; j <= 1; ++j) { 5037 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 5038 } 5039 } 5040 5041 if (v->UseMinimumRequiredDCFCLK == true) 5042 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); 5043 5044 for (i = 0; i < v->soc.num_states; ++i) { 5045 for (j = 0; j <= 1; ++j) { 5046 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 5047 v->ReturnBusWidth * v->DCFCLKState[i][j], 5048 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 5049 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 5050 double PixelDataOnlyReturnBWPerState = dml_min( 5051 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5052 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 5053 double PixelMixedWithVMDataReturnBWPerState = dml_min( 5054 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5055 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 5056 5057 if (v->HostVMEnable != true) { 5058 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 5059 } else { 5060 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 5061 } 5062 } 5063 } 5064 5065 //Re-ordering Buffer Support Check 5066 for (i = 0; i < v->soc.num_states; ++i) { 5067 for (j = 0; j <= 1; ++j) { 5068 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 5069 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 5070 v->ROBSupport[i][j] = true; 5071 } else { 5072 v->ROBSupport[i][j] = false; 5073 } 5074 } 5075 } 5076 5077 //Vertical Active BW support check 5078 5079 MaxTotalVActiveRDBandwidth = 0; 5080 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5081 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 5082 } 5083 5084 for (i = 0; i < v->soc.num_states; ++i) { 5085 for (j = 0; j <= 1; ++j) { 5086 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 5087 dml_min( 5088 v->ReturnBusWidth * v->DCFCLKState[i][j], 5089 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5090 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 5091 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5092 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 5093 5094 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 5095 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 5096 } else { 5097 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 5098 } 5099 } 5100 } 5101 5102 v->UrgentLatency = CalculateUrgentLatency( 5103 v->UrgentLatencyPixelDataOnly, 5104 v->UrgentLatencyPixelMixedWithVMData, 5105 v->UrgentLatencyVMDataOnly, 5106 v->DoUrgentLatencyAdjustment, 5107 v->UrgentLatencyAdjustmentFabricClockComponent, 5108 v->UrgentLatencyAdjustmentFabricClockReference, 5109 v->FabricClock); 5110 //Prefetch Check 5111 for (i = 0; i < v->soc.num_states; ++i) { 5112 for (j = 0; j <= 1; ++j) { 5113 double VMDataOnlyReturnBWPerState; 5114 double HostVMInefficiencyFactor = 1; 5115 int NextPrefetchModeState = MinPrefetchMode; 5116 bool UnboundedRequestEnabledThisState = false; 5117 int CompressedBufferSizeInkByteThisState = 0; 5118 double dummy; 5119 5120 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5121 5122 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5123 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5124 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5125 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5126 } 5127 5128 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5129 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5130 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5131 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5132 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5133 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5134 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5135 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5136 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5137 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5138 } 5139 5140 VMDataOnlyReturnBWPerState = dml_min( 5141 dml_min( 5142 v->ReturnBusWidth * v->DCFCLKState[i][j], 5143 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5144 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5145 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5146 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5147 if (v->GPUVMEnable && v->HostVMEnable) 5148 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5149 5150 v->ExtraLatency = CalculateExtraLatency( 5151 v->RoundTripPingLatencyCycles, 5152 ReorderingBytes, 5153 v->DCFCLKState[i][j], 5154 v->TotalNumberOfActiveDPP[i][j], 5155 v->PixelChunkSizeInKByte, 5156 v->TotalNumberOfDCCActiveDPP[i][j], 5157 v->MetaChunkSize, 5158 v->ReturnBWPerState[i][j], 5159 v->GPUVMEnable, 5160 v->HostVMEnable, 5161 v->NumberOfActivePlanes, 5162 v->NoOfDPPThisState, 5163 v->dpte_group_bytes, 5164 HostVMInefficiencyFactor, 5165 v->HostVMMinPageSize, 5166 v->HostVMMaxNonCachedPageTableLevels); 5167 5168 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5169 do { 5170 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5171 v->MaxVStartup = v->NextMaxVStartup; 5172 5173 v->TWait = CalculateTWait( 5174 v->PrefetchModePerState[i][j], 5175 v->DRAMClockChangeLatency, 5176 v->UrgLatency[i], 5177 v->SREnterPlusExitTime); 5178 5179 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5180 CalculatePrefetchSchedulePerPlane(mode_lib, 5181 HostVMInefficiencyFactor, 5182 i, j, k); 5183 } 5184 5185 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5186 CalculateUrgentBurstFactor( 5187 v->swath_width_luma_ub_this_state[k], 5188 v->swath_width_chroma_ub_this_state[k], 5189 v->SwathHeightYThisState[k], 5190 v->SwathHeightCThisState[k], 5191 v->HTotal[k] / v->PixelClock[k], 5192 v->UrgLatency[i], 5193 v->CursorBufferSize, 5194 v->CursorWidth[k][0], 5195 v->CursorBPP[k][0], 5196 v->VRatioPreY[i][j][k], 5197 v->VRatioPreC[i][j][k], 5198 v->BytePerPixelInDETY[k], 5199 v->BytePerPixelInDETC[k], 5200 v->DETBufferSizeYThisState[k], 5201 v->DETBufferSizeCThisState[k], 5202 &v->UrgentBurstFactorCursorPre[k], 5203 &v->UrgentBurstFactorLumaPre[k], 5204 &v->UrgentBurstFactorChromaPre[k], 5205 &v->NotUrgentLatencyHidingPre[k]); 5206 } 5207 5208 v->MaximumReadBandwidthWithPrefetch = 0.0; 5209 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5210 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5211 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5212 5213 v->MaximumReadBandwidthWithPrefetch = 5214 v->MaximumReadBandwidthWithPrefetch 5215 + dml_max3( 5216 v->VActivePixelBandwidth[i][j][k] 5217 + v->VActiveCursorBandwidth[i][j][k] 5218 + v->NoOfDPP[i][j][k] 5219 * (v->meta_row_bandwidth[i][j][k] 5220 + v->dpte_row_bandwidth[i][j][k]), 5221 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5222 v->NoOfDPP[i][j][k] 5223 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5224 * v->UrgentBurstFactorLumaPre[k] 5225 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5226 * v->UrgentBurstFactorChromaPre[k]) 5227 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5228 } 5229 5230 v->NotEnoughUrgentLatencyHidingPre = false; 5231 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5232 if (v->NotUrgentLatencyHidingPre[k] == true) { 5233 v->NotEnoughUrgentLatencyHidingPre = true; 5234 } 5235 } 5236 5237 v->PrefetchSupported[i][j] = true; 5238 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5239 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5240 v->PrefetchSupported[i][j] = false; 5241 } 5242 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5243 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5244 || v->NoTimeForPrefetch[i][j][k] == true) { 5245 v->PrefetchSupported[i][j] = false; 5246 } 5247 } 5248 5249 v->DynamicMetadataSupported[i][j] = true; 5250 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5251 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5252 v->DynamicMetadataSupported[i][j] = false; 5253 } 5254 } 5255 5256 v->VRatioInPrefetchSupported[i][j] = true; 5257 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5258 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5259 v->VRatioInPrefetchSupported[i][j] = false; 5260 } 5261 } 5262 v->AnyLinesForVMOrRowTooLarge = false; 5263 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5264 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5265 v->AnyLinesForVMOrRowTooLarge = true; 5266 } 5267 } 5268 5269 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5270 5271 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5272 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5273 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5274 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5275 - dml_max( 5276 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5277 v->NoOfDPP[i][j][k] 5278 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5279 * v->UrgentBurstFactorLumaPre[k] 5280 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5281 * v->UrgentBurstFactorChromaPre[k]) 5282 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5283 } 5284 v->TotImmediateFlipBytes = 0.0; 5285 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5286 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5287 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5288 + v->DPTEBytesPerRow[i][j][k]); 5289 } 5290 5291 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5292 CalculateFlipSchedule( 5293 mode_lib, 5294 k, 5295 HostVMInefficiencyFactor, 5296 v->ExtraLatency, 5297 v->UrgLatency[i], 5298 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5299 v->MetaRowBytes[i][j][k], 5300 v->DPTEBytesPerRow[i][j][k]); 5301 } 5302 v->total_dcn_read_bw_with_flip = 0.0; 5303 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5304 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5305 + dml_max3( 5306 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5307 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5308 + v->VActiveCursorBandwidth[i][j][k], 5309 v->NoOfDPP[i][j][k] 5310 * (v->final_flip_bw[k] 5311 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5312 * v->UrgentBurstFactorLumaPre[k] 5313 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5314 * v->UrgentBurstFactorChromaPre[k]) 5315 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5316 } 5317 v->ImmediateFlipSupportedForState[i][j] = true; 5318 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5319 v->ImmediateFlipSupportedForState[i][j] = false; 5320 } 5321 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5322 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5323 v->ImmediateFlipSupportedForState[i][j] = false; 5324 } 5325 } 5326 } else { 5327 v->ImmediateFlipSupportedForState[i][j] = false; 5328 } 5329 5330 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5331 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5332 NextPrefetchModeState = NextPrefetchModeState + 1; 5333 } else { 5334 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5335 } 5336 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5337 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5338 && ((v->HostVMEnable == false && 5339 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5340 || v->ImmediateFlipSupportedForState[i][j] == true)) 5341 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5342 5343 CalculateUnboundedRequestAndCompressedBufferSize( 5344 v->DETBufferSizeInKByte[0], 5345 v->ConfigReturnBufferSizeInKByte, 5346 v->UseUnboundedRequesting, 5347 v->TotalNumberOfActiveDPP[i][j], 5348 NoChroma, 5349 v->MaxNumDPP, 5350 v->CompressedBufferSegmentSizeInkByte, 5351 v->Output, 5352 &UnboundedRequestEnabledThisState, 5353 &CompressedBufferSizeInkByteThisState); 5354 5355 CalculateWatermarksAndDRAMSpeedChangeSupport( 5356 mode_lib, 5357 v->PrefetchModePerState[i][j], 5358 v->DCFCLKState[i][j], 5359 v->ReturnBWPerState[i][j], 5360 v->UrgLatency[i], 5361 v->ExtraLatency, 5362 v->SOCCLKPerState[i], 5363 v->ProjectedDCFCLKDeepSleep[i][j], 5364 v->DETBufferSizeYThisState, 5365 v->DETBufferSizeCThisState, 5366 v->SwathHeightYThisState, 5367 v->SwathHeightCThisState, 5368 v->SwathWidthYThisState, 5369 v->SwathWidthCThisState, 5370 v->NoOfDPPThisState, 5371 v->BytePerPixelInDETY, 5372 v->BytePerPixelInDETC, 5373 UnboundedRequestEnabledThisState, 5374 CompressedBufferSizeInkByteThisState, 5375 &v->DRAMClockChangeSupport[i][j], 5376 &dummy, 5377 &dummy, 5378 &dummy, 5379 &dummy); 5380 } 5381 } 5382 5383 /*PTE Buffer Size Check*/ 5384 for (i = 0; i < v->soc.num_states; i++) { 5385 for (j = 0; j < 2; j++) { 5386 v->PTEBufferSizeNotExceeded[i][j] = true; 5387 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5388 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5389 v->PTEBufferSizeNotExceeded[i][j] = false; 5390 } 5391 } 5392 } 5393 } 5394 5395 /*Cursor Support Check*/ 5396 v->CursorSupport = true; 5397 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5398 if (v->CursorWidth[k][0] > 0.0) { 5399 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5400 v->CursorSupport = false; 5401 } 5402 } 5403 } 5404 5405 /*Valid Pitch Check*/ 5406 v->PitchSupport = true; 5407 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5408 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5409 if (v->DCCEnable[k] == true) { 5410 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5411 } else { 5412 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5413 } 5414 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5415 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5416 && v->SourcePixelFormat[k] != dm_mono_8) { 5417 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5418 if (v->DCCEnable[k] == true) { 5419 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5420 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5421 64.0 * v->Read256BlockWidthC[k]); 5422 } else { 5423 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5424 } 5425 } else { 5426 v->AlignedCPitch[k] = v->PitchC[k]; 5427 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5428 } 5429 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5430 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5431 v->PitchSupport = false; 5432 } 5433 } 5434 5435 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5436 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5437 ViewportExceedsSurface = true; 5438 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5439 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5440 && v->SourcePixelFormat[k] != dm_rgbe) { 5441 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5442 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5443 ViewportExceedsSurface = true; 5444 } 5445 } 5446 } 5447 } 5448 5449 /*Mode Support, Voltage State and SOC Configuration*/ 5450 for (i = v->soc.num_states - 1; i >= 0; i--) { 5451 for (j = 0; j < 2; j++) { 5452 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5453 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5454 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5455 && v->DTBCLKRequiredMoreThanSupported[i] == false 5456 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5457 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5458 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5459 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5460 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5461 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5462 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5463 && ((v->HostVMEnable == false 5464 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5465 || v->ImmediateFlipSupportedForState[i][j] == true) 5466 && FMTBufferExceeded == false) { 5467 v->ModeSupport[i][j] = true; 5468 } else { 5469 v->ModeSupport[i][j] = false; 5470 #ifdef __DML_VBA_DEBUG__ 5471 if (v->ScaleRatioAndTapsSupport == false) 5472 dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed"); 5473 if (v->SourceFormatPixelAndScanSupport == false) 5474 dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed"); 5475 if (v->ViewportSizeSupport[i][j] == false) 5476 dml_print("DML SUPPORT: ViewportSizeSupport failed"); 5477 if (v->LinkCapacitySupport[i] == false) 5478 dml_print("DML SUPPORT: LinkCapacitySupport failed"); 5479 if (v->ODMCombine4To1SupportCheckOK[i] == false) 5480 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5481 if (v->NotEnoughDSCUnits[i] == true) 5482 dml_print("DML SUPPORT: NotEnoughDSCUnits"); 5483 if (v->DTBCLKRequiredMoreThanSupported[i] == true) 5484 dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported"); 5485 if (v->ROBSupport[i][j] == false) 5486 dml_print("DML SUPPORT: ROBSupport failed"); 5487 if (v->DISPCLK_DPPCLK_Support[i][j] == false) 5488 dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed"); 5489 if (v->TotalAvailablePipesSupport[i][j] == false) 5490 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5491 if (EnoughWritebackUnits == false) 5492 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5493 if (v->WritebackLatencySupport == false) 5494 dml_print("DML SUPPORT: WritebackLatencySupport failed"); 5495 if (v->WritebackScaleRatioAndTapsSupport == false) 5496 dml_print("DML SUPPORT: DSC422NativeNotSupported "); 5497 if (v->CursorSupport == false) 5498 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5499 if (v->PitchSupport == false) 5500 dml_print("DML SUPPORT: PitchSupport failed"); 5501 if (ViewportExceedsSurface == true) 5502 dml_print("DML SUPPORT: ViewportExceedsSurface failed"); 5503 if (v->PrefetchSupported[i][j] == false) 5504 dml_print("DML SUPPORT: PrefetchSupported failed"); 5505 if (v->DynamicMetadataSupported[i][j] == false) 5506 dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); 5507 if (v->TotalVerticalActiveBandwidthSupport[i][j] == false) 5508 dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed"); 5509 if (v->VRatioInPrefetchSupported[i][j] == false) 5510 dml_print("DML SUPPORT: VRatioInPrefetchSupported failed"); 5511 if (v->PTEBufferSizeNotExceeded[i][j] == false) 5512 dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed"); 5513 if (v->NonsupportedDSCInputBPC == true) 5514 dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed"); 5515 if (!((v->HostVMEnable == false 5516 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5517 || v->ImmediateFlipSupportedForState[i][j] == true)) 5518 dml_print("DML SUPPORT: ImmediateFlipRequirement failed"); 5519 if (FMTBufferExceeded == true) 5520 dml_print("DML SUPPORT: FMTBufferExceeded failed"); 5521 #endif 5522 } 5523 } 5524 } 5525 5526 { 5527 unsigned int MaximumMPCCombine = 0; 5528 for (i = v->soc.num_states; i >= 0; i--) { 5529 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5530 v->VoltageLevel = i; 5531 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5532 if (v->ModeSupport[i][0] == true) { 5533 MaximumMPCCombine = 0; 5534 } else { 5535 MaximumMPCCombine = 1; 5536 } 5537 } 5538 } 5539 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5540 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5541 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5542 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5543 } 5544 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5545 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5546 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5547 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5548 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5549 v->maxMpcComb = MaximumMPCCombine; 5550 } 5551 } 5552 5553 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5554 struct display_mode_lib *mode_lib, 5555 unsigned int PrefetchMode, 5556 double DCFCLK, 5557 double ReturnBW, 5558 double UrgentLatency, 5559 double ExtraLatency, 5560 double SOCCLK, 5561 double DCFCLKDeepSleep, 5562 unsigned int DETBufferSizeY[], 5563 unsigned int DETBufferSizeC[], 5564 unsigned int SwathHeightY[], 5565 unsigned int SwathHeightC[], 5566 double SwathWidthY[], 5567 double SwathWidthC[], 5568 unsigned int DPPPerPlane[], 5569 double BytePerPixelDETY[], 5570 double BytePerPixelDETC[], 5571 bool UnboundedRequestEnabled, 5572 int unsigned CompressedBufferSizeInkByte, 5573 enum clock_change_support *DRAMClockChangeSupport, 5574 double *StutterExitWatermark, 5575 double *StutterEnterPlusExitWatermark, 5576 double *Z8StutterExitWatermark, 5577 double *Z8StutterEnterPlusExitWatermark) 5578 { 5579 struct vba_vars_st *v = &mode_lib->vba; 5580 double EffectiveLBLatencyHidingY; 5581 double EffectiveLBLatencyHidingC; 5582 double LinesInDETY[DC__NUM_DPP__MAX]; 5583 double LinesInDETC; 5584 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5585 unsigned int LinesInDETCRoundedDownToSwath; 5586 double FullDETBufferingTimeY; 5587 double FullDETBufferingTimeC; 5588 double ActiveDRAMClockChangeLatencyMarginY; 5589 double ActiveDRAMClockChangeLatencyMarginC; 5590 double WritebackDRAMClockChangeLatencyMargin; 5591 double PlaneWithMinActiveDRAMClockChangeMargin; 5592 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5593 double WritebackDRAMClockChangeLatencyHiding; 5594 double TotalPixelBW = 0.0; 5595 int k, j; 5596 5597 v->UrgentWatermark = UrgentLatency + ExtraLatency; 5598 5599 #ifdef __DML_VBA_DEBUG__ 5600 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5601 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5602 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); 5603 #endif 5604 5605 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; 5606 5607 #ifdef __DML_VBA_DEBUG__ 5608 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); 5609 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); 5610 #endif 5611 5612 v->TotalActiveWriteback = 0; 5613 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5614 if (v->WritebackEnable[k] == true) { 5615 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5616 } 5617 } 5618 5619 if (v->TotalActiveWriteback <= 1) { 5620 v->WritebackUrgentWatermark = v->WritebackLatency; 5621 } else { 5622 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5623 } 5624 5625 if (v->TotalActiveWriteback <= 1) { 5626 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; 5627 } else { 5628 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5629 } 5630 5631 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5632 TotalPixelBW = TotalPixelBW 5633 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) 5634 / (v->HTotal[k] / v->PixelClock[k]); 5635 } 5636 5637 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5638 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5639 5640 v->LBLatencyHidingSourceLinesY = dml_min( 5641 (double) v->MaxLineBufferLines, 5642 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 5643 5644 v->LBLatencyHidingSourceLinesC = dml_min( 5645 (double) v->MaxLineBufferLines, 5646 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 5647 5648 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 5649 5650 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 5651 5652 if (UnboundedRequestEnabled) { 5653 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5654 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 5655 } 5656 5657 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5658 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5659 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 5660 if (BytePerPixelDETC[k] > 0) { 5661 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5662 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5663 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; 5664 } else { 5665 LinesInDETC = 0; 5666 FullDETBufferingTimeC = 999999; 5667 } 5668 5669 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5670 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5671 5672 if (v->NumberOfActivePlanes > 1) { 5673 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5674 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; 5675 } 5676 5677 if (BytePerPixelDETC[k] > 0) { 5678 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5679 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5680 5681 if (v->NumberOfActivePlanes > 1) { 5682 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5683 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; 5684 } 5685 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5686 } else { 5687 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5688 } 5689 5690 if (v->WritebackEnable[k] == true) { 5691 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 5692 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 5693 if (v->WritebackPixelFormat[k] == dm_444_64) { 5694 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5695 } 5696 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5697 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5698 } 5699 } 5700 5701 v->MinActiveDRAMClockChangeMargin = 999999; 5702 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5703 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5704 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5705 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5706 if (v->BlendingAndTiming[k] == k) { 5707 PlaneWithMinActiveDRAMClockChangeMargin = k; 5708 } else { 5709 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 5710 if (v->BlendingAndTiming[k] == j) { 5711 PlaneWithMinActiveDRAMClockChangeMargin = j; 5712 } 5713 } 5714 } 5715 } 5716 } 5717 5718 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; 5719 5720 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5721 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5722 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5723 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5724 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5725 } 5726 } 5727 5728 v->TotalNumberOfActiveOTG = 0; 5729 5730 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5731 if (v->BlendingAndTiming[k] == k) { 5732 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5733 } 5734 } 5735 5736 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5737 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5738 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5739 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5740 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5741 } else { 5742 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5743 } 5744 5745 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5746 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5747 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5748 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5749 5750 #ifdef __DML_VBA_DEBUG__ 5751 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5752 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5753 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5754 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5755 #endif 5756 } 5757 5758 static void CalculateDCFCLKDeepSleep( 5759 struct display_mode_lib *mode_lib, 5760 unsigned int NumberOfActivePlanes, 5761 int BytePerPixelY[], 5762 int BytePerPixelC[], 5763 double VRatio[], 5764 double VRatioChroma[], 5765 double SwathWidthY[], 5766 double SwathWidthC[], 5767 unsigned int DPPPerPlane[], 5768 double HRatio[], 5769 double HRatioChroma[], 5770 double PixelClock[], 5771 double PSCL_THROUGHPUT[], 5772 double PSCL_THROUGHPUT_CHROMA[], 5773 double DPPCLK[], 5774 double ReadBandwidthLuma[], 5775 double ReadBandwidthChroma[], 5776 int ReturnBusWidth, 5777 double *DCFCLKDeepSleep) 5778 { 5779 struct vba_vars_st *v = &mode_lib->vba; 5780 double DisplayPipeLineDeliveryTimeLuma; 5781 double DisplayPipeLineDeliveryTimeChroma; 5782 double ReadBandwidth = 0.0; 5783 int k; 5784 5785 for (k = 0; k < NumberOfActivePlanes; ++k) { 5786 5787 if (VRatio[k] <= 1) { 5788 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5789 } else { 5790 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5791 } 5792 if (BytePerPixelC[k] == 0) { 5793 DisplayPipeLineDeliveryTimeChroma = 0; 5794 } else { 5795 if (VRatioChroma[k] <= 1) { 5796 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5797 } else { 5798 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5799 } 5800 } 5801 5802 if (BytePerPixelC[k] > 0) { 5803 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 5804 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 5805 } else { 5806 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 5807 } 5808 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 5809 5810 } 5811 5812 for (k = 0; k < NumberOfActivePlanes; ++k) { 5813 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 5814 } 5815 5816 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 5817 5818 for (k = 0; k < NumberOfActivePlanes; ++k) { 5819 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 5820 } 5821 } 5822 5823 static void CalculateUrgentBurstFactor( 5824 int swath_width_luma_ub, 5825 int swath_width_chroma_ub, 5826 unsigned int SwathHeightY, 5827 unsigned int SwathHeightC, 5828 double LineTime, 5829 double UrgentLatency, 5830 double CursorBufferSize, 5831 unsigned int CursorWidth, 5832 unsigned int CursorBPP, 5833 double VRatio, 5834 double VRatioC, 5835 double BytePerPixelInDETY, 5836 double BytePerPixelInDETC, 5837 double DETBufferSizeY, 5838 double DETBufferSizeC, 5839 double *UrgentBurstFactorCursor, 5840 double *UrgentBurstFactorLuma, 5841 double *UrgentBurstFactorChroma, 5842 bool *NotEnoughUrgentLatencyHiding) 5843 { 5844 double LinesInDETLuma; 5845 double LinesInDETChroma; 5846 unsigned int LinesInCursorBuffer; 5847 double CursorBufferSizeInTime; 5848 double DETBufferSizeInTimeLuma; 5849 double DETBufferSizeInTimeChroma; 5850 5851 *NotEnoughUrgentLatencyHiding = 0; 5852 5853 if (CursorWidth > 0) { 5854 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 5855 if (VRatio > 0) { 5856 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 5857 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 5858 *NotEnoughUrgentLatencyHiding = 1; 5859 *UrgentBurstFactorCursor = 0; 5860 } else { 5861 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 5862 } 5863 } else { 5864 *UrgentBurstFactorCursor = 1; 5865 } 5866 } 5867 5868 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 5869 if (VRatio > 0) { 5870 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 5871 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 5872 *NotEnoughUrgentLatencyHiding = 1; 5873 *UrgentBurstFactorLuma = 0; 5874 } else { 5875 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 5876 } 5877 } else { 5878 *UrgentBurstFactorLuma = 1; 5879 } 5880 5881 if (BytePerPixelInDETC > 0) { 5882 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 5883 if (VRatio > 0) { 5884 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 5885 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 5886 *NotEnoughUrgentLatencyHiding = 1; 5887 *UrgentBurstFactorChroma = 0; 5888 } else { 5889 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 5890 } 5891 } else { 5892 *UrgentBurstFactorChroma = 1; 5893 } 5894 } 5895 } 5896 5897 static void CalculatePixelDeliveryTimes( 5898 unsigned int NumberOfActivePlanes, 5899 double VRatio[], 5900 double VRatioChroma[], 5901 double VRatioPrefetchY[], 5902 double VRatioPrefetchC[], 5903 unsigned int swath_width_luma_ub[], 5904 unsigned int swath_width_chroma_ub[], 5905 unsigned int DPPPerPlane[], 5906 double HRatio[], 5907 double HRatioChroma[], 5908 double PixelClock[], 5909 double PSCL_THROUGHPUT[], 5910 double PSCL_THROUGHPUT_CHROMA[], 5911 double DPPCLK[], 5912 int BytePerPixelC[], 5913 enum scan_direction_class SourceScan[], 5914 unsigned int NumberOfCursors[], 5915 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 5916 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 5917 unsigned int BlockWidth256BytesY[], 5918 unsigned int BlockHeight256BytesY[], 5919 unsigned int BlockWidth256BytesC[], 5920 unsigned int BlockHeight256BytesC[], 5921 double DisplayPipeLineDeliveryTimeLuma[], 5922 double DisplayPipeLineDeliveryTimeChroma[], 5923 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 5924 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 5925 double DisplayPipeRequestDeliveryTimeLuma[], 5926 double DisplayPipeRequestDeliveryTimeChroma[], 5927 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 5928 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 5929 double CursorRequestDeliveryTime[], 5930 double CursorRequestDeliveryTimePrefetch[]) 5931 { 5932 double req_per_swath_ub; 5933 int k; 5934 5935 for (k = 0; k < NumberOfActivePlanes; ++k) { 5936 if (VRatio[k] <= 1) { 5937 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5938 } else { 5939 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5940 } 5941 5942 if (BytePerPixelC[k] == 0) { 5943 DisplayPipeLineDeliveryTimeChroma[k] = 0; 5944 } else { 5945 if (VRatioChroma[k] <= 1) { 5946 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5947 } else { 5948 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5949 } 5950 } 5951 5952 if (VRatioPrefetchY[k] <= 1) { 5953 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5954 } else { 5955 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5956 } 5957 5958 if (BytePerPixelC[k] == 0) { 5959 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 5960 } else { 5961 if (VRatioPrefetchC[k] <= 1) { 5962 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5963 } else { 5964 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5965 } 5966 } 5967 } 5968 5969 for (k = 0; k < NumberOfActivePlanes; ++k) { 5970 if (SourceScan[k] != dm_vert) { 5971 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 5972 } else { 5973 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 5974 } 5975 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 5976 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 5977 if (BytePerPixelC[k] == 0) { 5978 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 5979 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 5980 } else { 5981 if (SourceScan[k] != dm_vert) { 5982 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 5983 } else { 5984 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 5985 } 5986 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 5987 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 5988 } 5989 #ifdef __DML_VBA_DEBUG__ 5990 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 5991 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 5992 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 5993 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 5994 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 5995 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 5996 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 5997 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 5998 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 5999 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 6000 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 6001 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 6002 #endif 6003 } 6004 6005 for (k = 0; k < NumberOfActivePlanes; ++k) { 6006 int cursor_req_per_width; 6007 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 6008 if (NumberOfCursors[k] > 0) { 6009 if (VRatio[k] <= 1) { 6010 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6011 } else { 6012 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6013 } 6014 if (VRatioPrefetchY[k] <= 1) { 6015 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6016 } else { 6017 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6018 } 6019 } else { 6020 CursorRequestDeliveryTime[k] = 0; 6021 CursorRequestDeliveryTimePrefetch[k] = 0; 6022 } 6023 #ifdef __DML_VBA_DEBUG__ 6024 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 6025 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 6026 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 6027 #endif 6028 } 6029 } 6030 6031 static void CalculateMetaAndPTETimes( 6032 int NumberOfActivePlanes, 6033 bool GPUVMEnable, 6034 int MetaChunkSize, 6035 int MinMetaChunkSizeBytes, 6036 int HTotal[], 6037 double VRatio[], 6038 double VRatioChroma[], 6039 double DestinationLinesToRequestRowInVBlank[], 6040 double DestinationLinesToRequestRowInImmediateFlip[], 6041 bool DCCEnable[], 6042 double PixelClock[], 6043 int BytePerPixelY[], 6044 int BytePerPixelC[], 6045 enum scan_direction_class SourceScan[], 6046 int dpte_row_height[], 6047 int dpte_row_height_chroma[], 6048 int meta_row_width[], 6049 int meta_row_width_chroma[], 6050 int meta_row_height[], 6051 int meta_row_height_chroma[], 6052 int meta_req_width[], 6053 int meta_req_width_chroma[], 6054 int meta_req_height[], 6055 int meta_req_height_chroma[], 6056 int dpte_group_bytes[], 6057 int PTERequestSizeY[], 6058 int PTERequestSizeC[], 6059 int PixelPTEReqWidthY[], 6060 int PixelPTEReqHeightY[], 6061 int PixelPTEReqWidthC[], 6062 int PixelPTEReqHeightC[], 6063 int dpte_row_width_luma_ub[], 6064 int dpte_row_width_chroma_ub[], 6065 double DST_Y_PER_PTE_ROW_NOM_L[], 6066 double DST_Y_PER_PTE_ROW_NOM_C[], 6067 double DST_Y_PER_META_ROW_NOM_L[], 6068 double DST_Y_PER_META_ROW_NOM_C[], 6069 double TimePerMetaChunkNominal[], 6070 double TimePerChromaMetaChunkNominal[], 6071 double TimePerMetaChunkVBlank[], 6072 double TimePerChromaMetaChunkVBlank[], 6073 double TimePerMetaChunkFlip[], 6074 double TimePerChromaMetaChunkFlip[], 6075 double time_per_pte_group_nom_luma[], 6076 double time_per_pte_group_vblank_luma[], 6077 double time_per_pte_group_flip_luma[], 6078 double time_per_pte_group_nom_chroma[], 6079 double time_per_pte_group_vblank_chroma[], 6080 double time_per_pte_group_flip_chroma[]) 6081 { 6082 unsigned int meta_chunk_width; 6083 unsigned int min_meta_chunk_width; 6084 unsigned int meta_chunk_per_row_int; 6085 unsigned int meta_row_remainder; 6086 unsigned int meta_chunk_threshold; 6087 unsigned int meta_chunks_per_row_ub; 6088 unsigned int meta_chunk_width_chroma; 6089 unsigned int min_meta_chunk_width_chroma; 6090 unsigned int meta_chunk_per_row_int_chroma; 6091 unsigned int meta_row_remainder_chroma; 6092 unsigned int meta_chunk_threshold_chroma; 6093 unsigned int meta_chunks_per_row_ub_chroma; 6094 unsigned int dpte_group_width_luma; 6095 unsigned int dpte_groups_per_row_luma_ub; 6096 unsigned int dpte_group_width_chroma; 6097 unsigned int dpte_groups_per_row_chroma_ub; 6098 int k; 6099 6100 for (k = 0; k < NumberOfActivePlanes; ++k) { 6101 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 6102 if (BytePerPixelC[k] == 0) { 6103 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 6104 } else { 6105 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 6106 } 6107 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 6108 if (BytePerPixelC[k] == 0) { 6109 DST_Y_PER_META_ROW_NOM_C[k] = 0; 6110 } else { 6111 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 6112 } 6113 } 6114 6115 for (k = 0; k < NumberOfActivePlanes; ++k) { 6116 if (DCCEnable[k] == true) { 6117 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 6118 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 6119 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 6120 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 6121 if (SourceScan[k] != dm_vert) { 6122 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 6123 } else { 6124 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 6125 } 6126 if (meta_row_remainder <= meta_chunk_threshold) { 6127 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 6128 } else { 6129 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 6130 } 6131 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6132 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6133 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6134 if (BytePerPixelC[k] == 0) { 6135 TimePerChromaMetaChunkNominal[k] = 0; 6136 TimePerChromaMetaChunkVBlank[k] = 0; 6137 TimePerChromaMetaChunkFlip[k] = 0; 6138 } else { 6139 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6140 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6141 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 6142 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 6143 if (SourceScan[k] != dm_vert) { 6144 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 6145 } else { 6146 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 6147 } 6148 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 6149 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 6150 } else { 6151 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 6152 } 6153 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6154 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6155 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6156 } 6157 } else { 6158 TimePerMetaChunkNominal[k] = 0; 6159 TimePerMetaChunkVBlank[k] = 0; 6160 TimePerMetaChunkFlip[k] = 0; 6161 TimePerChromaMetaChunkNominal[k] = 0; 6162 TimePerChromaMetaChunkVBlank[k] = 0; 6163 TimePerChromaMetaChunkFlip[k] = 0; 6164 } 6165 } 6166 6167 for (k = 0; k < NumberOfActivePlanes; ++k) { 6168 if (GPUVMEnable == true) { 6169 if (SourceScan[k] != dm_vert) { 6170 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6171 } else { 6172 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6173 } 6174 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6175 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6176 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6177 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6178 if (BytePerPixelC[k] == 0) { 6179 time_per_pte_group_nom_chroma[k] = 0; 6180 time_per_pte_group_vblank_chroma[k] = 0; 6181 time_per_pte_group_flip_chroma[k] = 0; 6182 } else { 6183 if (SourceScan[k] != dm_vert) { 6184 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6185 } else { 6186 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6187 } 6188 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6189 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6190 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6191 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6192 } 6193 } else { 6194 time_per_pte_group_nom_luma[k] = 0; 6195 time_per_pte_group_vblank_luma[k] = 0; 6196 time_per_pte_group_flip_luma[k] = 0; 6197 time_per_pte_group_nom_chroma[k] = 0; 6198 time_per_pte_group_vblank_chroma[k] = 0; 6199 time_per_pte_group_flip_chroma[k] = 0; 6200 } 6201 } 6202 } 6203 6204 static void CalculateVMGroupAndRequestTimes( 6205 unsigned int NumberOfActivePlanes, 6206 bool GPUVMEnable, 6207 unsigned int GPUVMMaxPageTableLevels, 6208 unsigned int HTotal[], 6209 int BytePerPixelC[], 6210 double DestinationLinesToRequestVMInVBlank[], 6211 double DestinationLinesToRequestVMInImmediateFlip[], 6212 bool DCCEnable[], 6213 double PixelClock[], 6214 int dpte_row_width_luma_ub[], 6215 int dpte_row_width_chroma_ub[], 6216 int vm_group_bytes[], 6217 unsigned int dpde0_bytes_per_frame_ub_l[], 6218 unsigned int dpde0_bytes_per_frame_ub_c[], 6219 int meta_pte_bytes_per_frame_ub_l[], 6220 int meta_pte_bytes_per_frame_ub_c[], 6221 double TimePerVMGroupVBlank[], 6222 double TimePerVMGroupFlip[], 6223 double TimePerVMRequestVBlank[], 6224 double TimePerVMRequestFlip[]) 6225 { 6226 int num_group_per_lower_vm_stage; 6227 int num_req_per_lower_vm_stage; 6228 int k; 6229 6230 for (k = 0; k < NumberOfActivePlanes; ++k) { 6231 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6232 if (DCCEnable[k] == false) { 6233 if (BytePerPixelC[k] > 0) { 6234 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6235 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6236 } else { 6237 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6238 } 6239 } else { 6240 if (GPUVMMaxPageTableLevels == 1) { 6241 if (BytePerPixelC[k] > 0) { 6242 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6243 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6244 } else { 6245 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6246 } 6247 } else { 6248 if (BytePerPixelC[k] > 0) { 6249 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6250 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6251 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6252 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6253 } else { 6254 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6255 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6256 } 6257 } 6258 } 6259 6260 if (DCCEnable[k] == false) { 6261 if (BytePerPixelC[k] > 0) { 6262 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6263 } else { 6264 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6265 } 6266 } else { 6267 if (GPUVMMaxPageTableLevels == 1) { 6268 if (BytePerPixelC[k] > 0) { 6269 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6270 } else { 6271 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6272 } 6273 } else { 6274 if (BytePerPixelC[k] > 0) { 6275 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6276 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6277 } else { 6278 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6279 } 6280 } 6281 } 6282 6283 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6284 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6285 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6286 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6287 6288 if (GPUVMMaxPageTableLevels > 2) { 6289 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6290 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6291 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6292 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6293 } 6294 6295 } else { 6296 TimePerVMGroupVBlank[k] = 0; 6297 TimePerVMGroupFlip[k] = 0; 6298 TimePerVMRequestVBlank[k] = 0; 6299 TimePerVMRequestFlip[k] = 0; 6300 } 6301 } 6302 } 6303 6304 static void CalculateStutterEfficiency( 6305 struct display_mode_lib *mode_lib, 6306 int CompressedBufferSizeInkByte, 6307 bool UnboundedRequestEnabled, 6308 int ConfigReturnBufferSizeInKByte, 6309 int MetaFIFOSizeInKEntries, 6310 int ZeroSizeBufferEntries, 6311 int NumberOfActivePlanes, 6312 int ROBBufferSizeInKByte, 6313 double TotalDataReadBandwidth, 6314 double DCFCLK, 6315 double ReturnBW, 6316 double COMPBUF_RESERVED_SPACE_64B, 6317 double COMPBUF_RESERVED_SPACE_ZS, 6318 double SRExitTime, 6319 double SRExitZ8Time, 6320 bool SynchronizedVBlank, 6321 double Z8StutterEnterPlusExitWatermark, 6322 double StutterEnterPlusExitWatermark, 6323 bool ProgressiveToInterlaceUnitInOPP, 6324 bool Interlace[], 6325 double MinTTUVBlank[], 6326 int DPPPerPlane[], 6327 unsigned int DETBufferSizeY[], 6328 int BytePerPixelY[], 6329 double BytePerPixelDETY[], 6330 double SwathWidthY[], 6331 int SwathHeightY[], 6332 int SwathHeightC[], 6333 double NetDCCRateLuma[], 6334 double NetDCCRateChroma[], 6335 double DCCFractionOfZeroSizeRequestsLuma[], 6336 double DCCFractionOfZeroSizeRequestsChroma[], 6337 int HTotal[], 6338 int VTotal[], 6339 double PixelClock[], 6340 double VRatio[], 6341 enum scan_direction_class SourceScan[], 6342 int BlockHeight256BytesY[], 6343 int BlockWidth256BytesY[], 6344 int BlockHeight256BytesC[], 6345 int BlockWidth256BytesC[], 6346 int DCCYMaxUncompressedBlock[], 6347 int DCCCMaxUncompressedBlock[], 6348 int VActive[], 6349 bool DCCEnable[], 6350 bool WritebackEnable[], 6351 double ReadBandwidthPlaneLuma[], 6352 double ReadBandwidthPlaneChroma[], 6353 double meta_row_bw[], 6354 double dpte_row_bw[], 6355 double *StutterEfficiencyNotIncludingVBlank, 6356 double *StutterEfficiency, 6357 int *NumberOfStutterBurstsPerFrame, 6358 double *Z8StutterEfficiencyNotIncludingVBlank, 6359 double *Z8StutterEfficiency, 6360 int *Z8NumberOfStutterBurstsPerFrame, 6361 double *StutterPeriod) 6362 { 6363 struct vba_vars_st *v = &mode_lib->vba; 6364 6365 double DETBufferingTimeY; 6366 double SwathWidthYCriticalPlane = 0; 6367 double VActiveTimeCriticalPlane = 0; 6368 double FrameTimeCriticalPlane = 0; 6369 int BytePerPixelYCriticalPlane = 0; 6370 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6371 double MinTTUVBlankCriticalPlane = 0; 6372 double TotalCompressedReadBandwidth; 6373 double TotalRowReadBandwidth; 6374 double AverageDCCCompressionRate; 6375 double EffectiveCompressedBufferSize; 6376 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6377 double StutterBurstTime; 6378 int TotalActiveWriteback; 6379 double LinesInDETY; 6380 double LinesInDETYRoundedDownToSwath; 6381 double MaximumEffectiveCompressionLuma; 6382 double MaximumEffectiveCompressionChroma; 6383 double TotalZeroSizeRequestReadBandwidth; 6384 double TotalZeroSizeCompressedReadBandwidth; 6385 double AverageDCCZeroSizeFraction; 6386 double AverageZeroSizeCompressionRate; 6387 int TotalNumberOfActiveOTG = 0; 6388 double LastStutterPeriod = 0.0; 6389 double LastZ8StutterPeriod = 0.0; 6390 int k; 6391 6392 TotalZeroSizeRequestReadBandwidth = 0; 6393 TotalZeroSizeCompressedReadBandwidth = 0; 6394 TotalRowReadBandwidth = 0; 6395 TotalCompressedReadBandwidth = 0; 6396 6397 for (k = 0; k < NumberOfActivePlanes; ++k) { 6398 if (DCCEnable[k] == true) { 6399 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6400 || DCCYMaxUncompressedBlock[k] < 256) { 6401 MaximumEffectiveCompressionLuma = 2; 6402 } else { 6403 MaximumEffectiveCompressionLuma = 4; 6404 } 6405 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6406 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6407 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6408 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6409 if (ReadBandwidthPlaneChroma[k] > 0) { 6410 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6411 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6412 MaximumEffectiveCompressionChroma = 2; 6413 } else { 6414 MaximumEffectiveCompressionChroma = 4; 6415 } 6416 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6417 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6418 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6419 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6420 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6421 } 6422 } else { 6423 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6424 } 6425 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6426 } 6427 6428 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6429 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6430 6431 #ifdef __DML_VBA_DEBUG__ 6432 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6433 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6434 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6435 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6436 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6437 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6438 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6439 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6440 #endif 6441 6442 if (AverageDCCZeroSizeFraction == 1) { 6443 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6444 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6445 } else if (AverageDCCZeroSizeFraction > 0) { 6446 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6447 EffectiveCompressedBufferSize = dml_min( 6448 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6449 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6450 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6451 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6452 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6453 dml_print( 6454 "DML::%s: min 2 = %f\n", 6455 __func__, 6456 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6457 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6458 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6459 } else { 6460 EffectiveCompressedBufferSize = dml_min( 6461 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6462 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6463 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6464 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6465 } 6466 6467 #ifdef __DML_VBA_DEBUG__ 6468 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6469 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6470 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6471 #endif 6472 6473 *StutterPeriod = 0; 6474 for (k = 0; k < NumberOfActivePlanes; ++k) { 6475 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6476 / BytePerPixelDETY[k] / SwathWidthY[k]; 6477 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6478 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6479 #ifdef __DML_VBA_DEBUG__ 6480 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6481 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6482 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6483 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6484 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6485 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6486 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6487 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6488 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6489 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6490 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6491 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6492 #endif 6493 6494 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6495 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6496 6497 *StutterPeriod = DETBufferingTimeY; 6498 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6499 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6500 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6501 SwathWidthYCriticalPlane = SwathWidthY[k]; 6502 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6503 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6504 6505 #ifdef __DML_VBA_DEBUG__ 6506 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6507 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6508 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6509 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6510 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6511 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6512 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6513 #endif 6514 } 6515 } 6516 6517 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6518 #ifdef __DML_VBA_DEBUG__ 6519 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6520 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6521 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6522 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6523 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6524 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6525 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6526 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6527 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6528 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6529 #endif 6530 6531 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6532 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6533 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6534 #ifdef __DML_VBA_DEBUG__ 6535 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6536 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6537 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6538 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6539 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6540 #endif 6541 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6542 6543 dml_print( 6544 "DML::%s: Time to finish residue swath=%f\n", 6545 __func__, 6546 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6547 6548 TotalActiveWriteback = 0; 6549 for (k = 0; k < NumberOfActivePlanes; ++k) { 6550 if (WritebackEnable[k]) { 6551 TotalActiveWriteback = TotalActiveWriteback + 1; 6552 } 6553 } 6554 6555 if (TotalActiveWriteback == 0) { 6556 #ifdef __DML_VBA_DEBUG__ 6557 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6558 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6559 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6560 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6561 #endif 6562 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6563 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6564 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6565 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6566 } else { 6567 *StutterEfficiencyNotIncludingVBlank = 0.; 6568 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6569 *NumberOfStutterBurstsPerFrame = 0; 6570 *Z8NumberOfStutterBurstsPerFrame = 0; 6571 } 6572 #ifdef __DML_VBA_DEBUG__ 6573 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6574 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6575 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6576 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6577 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6578 #endif 6579 6580 for (k = 0; k < NumberOfActivePlanes; ++k) { 6581 if (v->BlendingAndTiming[k] == k) { 6582 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6583 } 6584 } 6585 6586 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6587 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6588 6589 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6590 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6591 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6592 } else { 6593 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6594 } 6595 } else { 6596 *StutterEfficiency = 0; 6597 } 6598 6599 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6600 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6601 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6602 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6603 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6604 } else { 6605 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6606 } 6607 } else { 6608 *Z8StutterEfficiency = 0.; 6609 } 6610 6611 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6612 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6613 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6614 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6615 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6616 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6617 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6618 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6619 } 6620 6621 static void CalculateSwathAndDETConfiguration( 6622 bool ForceSingleDPP, 6623 int NumberOfActivePlanes, 6624 bool DETSharedByAllDPP, 6625 unsigned int DETBufferSizeInKByteA[], 6626 double MaximumSwathWidthLuma[], 6627 double MaximumSwathWidthChroma[], 6628 enum scan_direction_class SourceScan[], 6629 enum source_format_class SourcePixelFormat[], 6630 enum dm_swizzle_mode SurfaceTiling[], 6631 int ViewportWidth[], 6632 int ViewportHeight[], 6633 int SurfaceWidthY[], 6634 int SurfaceWidthC[], 6635 int SurfaceHeightY[], 6636 int SurfaceHeightC[], 6637 int Read256BytesBlockHeightY[], 6638 int Read256BytesBlockHeightC[], 6639 int Read256BytesBlockWidthY[], 6640 int Read256BytesBlockWidthC[], 6641 enum odm_combine_mode ODMCombineEnabled[], 6642 int BlendingAndTiming[], 6643 int BytePerPixY[], 6644 int BytePerPixC[], 6645 double BytePerPixDETY[], 6646 double BytePerPixDETC[], 6647 int HActive[], 6648 double HRatio[], 6649 double HRatioChroma[], 6650 int DPPPerPlane[], 6651 int swath_width_luma_ub[], 6652 int swath_width_chroma_ub[], 6653 double SwathWidth[], 6654 double SwathWidthChroma[], 6655 int SwathHeightY[], 6656 int SwathHeightC[], 6657 unsigned int DETBufferSizeY[], 6658 unsigned int DETBufferSizeC[], 6659 bool ViewportSizeSupportPerPlane[], 6660 bool *ViewportSizeSupport) 6661 { 6662 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6663 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6664 int MinimumSwathHeightY; 6665 int MinimumSwathHeightC; 6666 int RoundedUpMaxSwathSizeBytesY; 6667 int RoundedUpMaxSwathSizeBytesC; 6668 int RoundedUpMinSwathSizeBytesY; 6669 int RoundedUpMinSwathSizeBytesC; 6670 int RoundedUpSwathSizeBytesY; 6671 int RoundedUpSwathSizeBytesC; 6672 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6673 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6674 int k; 6675 6676 CalculateSwathWidth( 6677 ForceSingleDPP, 6678 NumberOfActivePlanes, 6679 SourcePixelFormat, 6680 SourceScan, 6681 ViewportWidth, 6682 ViewportHeight, 6683 SurfaceWidthY, 6684 SurfaceWidthC, 6685 SurfaceHeightY, 6686 SurfaceHeightC, 6687 ODMCombineEnabled, 6688 BytePerPixY, 6689 BytePerPixC, 6690 Read256BytesBlockHeightY, 6691 Read256BytesBlockHeightC, 6692 Read256BytesBlockWidthY, 6693 Read256BytesBlockWidthC, 6694 BlendingAndTiming, 6695 HActive, 6696 HRatio, 6697 DPPPerPlane, 6698 SwathWidthSingleDPP, 6699 SwathWidthSingleDPPChroma, 6700 SwathWidth, 6701 SwathWidthChroma, 6702 MaximumSwathHeightY, 6703 MaximumSwathHeightC, 6704 swath_width_luma_ub, 6705 swath_width_chroma_ub); 6706 6707 *ViewportSizeSupport = true; 6708 for (k = 0; k < NumberOfActivePlanes; ++k) { 6709 unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k]; 6710 6711 if (DETSharedByAllDPP && DPPPerPlane[k]) 6712 DETBufferSizeInKByte /= DPPPerPlane[k]; 6713 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6714 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6715 if (SurfaceTiling[k] == dm_sw_linear 6716 || (SourcePixelFormat[k] == dm_444_64 6717 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6718 && SourceScan[k] != dm_vert)) { 6719 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6720 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6721 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6722 } else { 6723 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6724 } 6725 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6726 } else { 6727 if (SurfaceTiling[k] == dm_sw_linear) { 6728 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6729 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6730 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6731 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6732 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6733 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6734 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6735 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6736 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6737 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6738 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6739 } else { 6740 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6741 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6742 } 6743 } 6744 6745 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6746 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6747 if (SourcePixelFormat[k] == dm_420_10) { 6748 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6749 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6750 } 6751 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6752 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6753 if (SourcePixelFormat[k] == dm_420_10) { 6754 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6755 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6756 } 6757 6758 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6759 SwathHeightY[k] = MaximumSwathHeightY[k]; 6760 SwathHeightC[k] = MaximumSwathHeightC[k]; 6761 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6762 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6763 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6764 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6765 SwathHeightY[k] = MinimumSwathHeightY; 6766 SwathHeightC[k] = MaximumSwathHeightC[k]; 6767 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6768 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6769 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6770 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6771 SwathHeightY[k] = MaximumSwathHeightY[k]; 6772 SwathHeightC[k] = MinimumSwathHeightC; 6773 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6774 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6775 } else { 6776 SwathHeightY[k] = MinimumSwathHeightY; 6777 SwathHeightC[k] = MinimumSwathHeightC; 6778 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6779 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6780 } 6781 { 6782 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 6783 if (SwathHeightC[k] == 0) { 6784 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 6785 DETBufferSizeC[k] = 0; 6786 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 6787 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 6788 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 6789 } else { 6790 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 6791 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 6792 } 6793 6794 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 6795 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 6796 *ViewportSizeSupport = false; 6797 ViewportSizeSupportPerPlane[k] = false; 6798 } else { 6799 ViewportSizeSupportPerPlane[k] = true; 6800 } 6801 } 6802 } 6803 } 6804 6805 static void CalculateSwathWidth( 6806 bool ForceSingleDPP, 6807 int NumberOfActivePlanes, 6808 enum source_format_class SourcePixelFormat[], 6809 enum scan_direction_class SourceScan[], 6810 int ViewportWidth[], 6811 int ViewportHeight[], 6812 int SurfaceWidthY[], 6813 int SurfaceWidthC[], 6814 int SurfaceHeightY[], 6815 int SurfaceHeightC[], 6816 enum odm_combine_mode ODMCombineEnabled[], 6817 int BytePerPixY[], 6818 int BytePerPixC[], 6819 int Read256BytesBlockHeightY[], 6820 int Read256BytesBlockHeightC[], 6821 int Read256BytesBlockWidthY[], 6822 int Read256BytesBlockWidthC[], 6823 int BlendingAndTiming[], 6824 int HActive[], 6825 double HRatio[], 6826 int DPPPerPlane[], 6827 double SwathWidthSingleDPPY[], 6828 double SwathWidthSingleDPPC[], 6829 double SwathWidthY[], 6830 double SwathWidthC[], 6831 int MaximumSwathHeightY[], 6832 int MaximumSwathHeightC[], 6833 int swath_width_luma_ub[], 6834 int swath_width_chroma_ub[]) 6835 { 6836 enum odm_combine_mode MainPlaneODMCombine; 6837 int j, k; 6838 6839 #ifdef __DML_VBA_DEBUG__ 6840 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 6841 #endif 6842 6843 for (k = 0; k < NumberOfActivePlanes; ++k) { 6844 if (SourceScan[k] != dm_vert) { 6845 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 6846 } else { 6847 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 6848 } 6849 6850 #ifdef __DML_VBA_DEBUG__ 6851 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 6852 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 6853 #endif 6854 6855 MainPlaneODMCombine = ODMCombineEnabled[k]; 6856 for (j = 0; j < NumberOfActivePlanes; ++j) { 6857 if (BlendingAndTiming[k] == j) { 6858 MainPlaneODMCombine = ODMCombineEnabled[j]; 6859 } 6860 } 6861 6862 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) { 6863 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 6864 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) { 6865 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 6866 } else if (DPPPerPlane[k] == 2) { 6867 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 6868 } else { 6869 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6870 } 6871 6872 #ifdef __DML_VBA_DEBUG__ 6873 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 6874 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 6875 #endif 6876 6877 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 6878 SwathWidthC[k] = SwathWidthY[k] / 2; 6879 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 6880 } else { 6881 SwathWidthC[k] = SwathWidthY[k]; 6882 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 6883 } 6884 6885 if (ForceSingleDPP == true) { 6886 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6887 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 6888 } 6889 { 6890 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 6891 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 6892 6893 #ifdef __DML_VBA_DEBUG__ 6894 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 6895 #endif 6896 6897 if (SourceScan[k] != dm_vert) { 6898 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 6899 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 6900 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 6901 if (BytePerPixC[k] > 0) { 6902 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 6903 6904 swath_width_chroma_ub[k] = dml_min( 6905 surface_width_ub_c, 6906 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 6907 } else { 6908 swath_width_chroma_ub[k] = 0; 6909 } 6910 } else { 6911 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 6912 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 6913 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 6914 if (BytePerPixC[k] > 0) { 6915 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 6916 6917 swath_width_chroma_ub[k] = dml_min( 6918 surface_height_ub_c, 6919 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 6920 } else { 6921 swath_width_chroma_ub[k] = 0; 6922 } 6923 } 6924 } 6925 } 6926 } 6927 6928 static double CalculateExtraLatency( 6929 int RoundTripPingLatencyCycles, 6930 int ReorderingBytes, 6931 double DCFCLK, 6932 int TotalNumberOfActiveDPP, 6933 int PixelChunkSizeInKByte, 6934 int TotalNumberOfDCCActiveDPP, 6935 int MetaChunkSize, 6936 double ReturnBW, 6937 bool GPUVMEnable, 6938 bool HostVMEnable, 6939 int NumberOfActivePlanes, 6940 int NumberOfDPP[], 6941 int dpte_group_bytes[], 6942 double HostVMInefficiencyFactor, 6943 double HostVMMinPageSize, 6944 int HostVMMaxNonCachedPageTableLevels) 6945 { 6946 double ExtraLatencyBytes; 6947 double ExtraLatency; 6948 6949 ExtraLatencyBytes = CalculateExtraLatencyBytes( 6950 ReorderingBytes, 6951 TotalNumberOfActiveDPP, 6952 PixelChunkSizeInKByte, 6953 TotalNumberOfDCCActiveDPP, 6954 MetaChunkSize, 6955 GPUVMEnable, 6956 HostVMEnable, 6957 NumberOfActivePlanes, 6958 NumberOfDPP, 6959 dpte_group_bytes, 6960 HostVMInefficiencyFactor, 6961 HostVMMinPageSize, 6962 HostVMMaxNonCachedPageTableLevels); 6963 6964 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 6965 6966 #ifdef __DML_VBA_DEBUG__ 6967 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 6968 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 6969 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 6970 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 6971 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 6972 #endif 6973 6974 return ExtraLatency; 6975 } 6976 6977 static double CalculateExtraLatencyBytes( 6978 int ReorderingBytes, 6979 int TotalNumberOfActiveDPP, 6980 int PixelChunkSizeInKByte, 6981 int TotalNumberOfDCCActiveDPP, 6982 int MetaChunkSize, 6983 bool GPUVMEnable, 6984 bool HostVMEnable, 6985 int NumberOfActivePlanes, 6986 int NumberOfDPP[], 6987 int dpte_group_bytes[], 6988 double HostVMInefficiencyFactor, 6989 double HostVMMinPageSize, 6990 int HostVMMaxNonCachedPageTableLevels) 6991 { 6992 double ret; 6993 int HostVMDynamicLevels = 0, k; 6994 6995 if (GPUVMEnable == true && HostVMEnable == true) { 6996 if (HostVMMinPageSize < 2048) { 6997 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 6998 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 6999 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 7000 } else { 7001 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 7002 } 7003 } else { 7004 HostVMDynamicLevels = 0; 7005 } 7006 7007 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 7008 7009 if (GPUVMEnable == true) { 7010 for (k = 0; k < NumberOfActivePlanes; ++k) { 7011 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 7012 } 7013 } 7014 return ret; 7015 } 7016 7017 static double CalculateUrgentLatency( 7018 double UrgentLatencyPixelDataOnly, 7019 double UrgentLatencyPixelMixedWithVMData, 7020 double UrgentLatencyVMDataOnly, 7021 bool DoUrgentLatencyAdjustment, 7022 double UrgentLatencyAdjustmentFabricClockComponent, 7023 double UrgentLatencyAdjustmentFabricClockReference, 7024 double FabricClock) 7025 { 7026 double ret; 7027 7028 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 7029 if (DoUrgentLatencyAdjustment == true) { 7030 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 7031 } 7032 return ret; 7033 } 7034 7035 static void UseMinimumDCFCLK( 7036 struct display_mode_lib *mode_lib, 7037 int MaxPrefetchMode, 7038 int ReorderingBytes) 7039 { 7040 struct vba_vars_st *v = &mode_lib->vba; 7041 int dummy1, i, j, k; 7042 double NormalEfficiency, dummy2, dummy3; 7043 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 7044 7045 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 7046 for (i = 0; i < v->soc.num_states; ++i) { 7047 for (j = 0; j <= 1; ++j) { 7048 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 7049 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 7050 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 7051 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 7052 double MinimumTWait; 7053 double NonDPTEBandwidth; 7054 double DPTEBandwidth; 7055 double DCFCLKRequiredForAverageBandwidth; 7056 double ExtraLatencyBytes; 7057 double ExtraLatencyCycles; 7058 double DCFCLKRequiredForPeakBandwidth; 7059 int NoOfDPPState[DC__NUM_DPP__MAX]; 7060 double MinimumTvmPlus2Tr0; 7061 7062 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 7063 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7064 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 7065 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); 7066 } 7067 7068 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { 7069 NoOfDPPState[k] = v->NoOfDPP[i][j][k]; 7070 } 7071 7072 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); 7073 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; 7074 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? 7075 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; 7076 DCFCLKRequiredForAverageBandwidth = dml_max3( 7077 v->ProjectedDCFCLKDeepSleep[i][j], 7078 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth 7079 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 7080 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); 7081 7082 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7083 ReorderingBytes, 7084 v->TotalNumberOfActiveDPP[i][j], 7085 v->PixelChunkSizeInKByte, 7086 v->TotalNumberOfDCCActiveDPP[i][j], 7087 v->MetaChunkSize, 7088 v->GPUVMEnable, 7089 v->HostVMEnable, 7090 v->NumberOfActivePlanes, 7091 NoOfDPPState, 7092 v->dpte_group_bytes, 7093 1, 7094 v->HostVMMinPageSize, 7095 v->HostVMMaxNonCachedPageTableLevels); 7096 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; 7097 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7098 double DCFCLKCyclesRequiredInPrefetch; 7099 double ExpectedPrefetchBWAcceleration; 7100 double PrefetchTime; 7101 7102 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] 7103 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; 7104 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 7105 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) 7106 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth 7107 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 7108 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; 7109 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) 7110 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); 7111 DynamicMetadataVMExtraLatency[k] = 7112 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? 7113 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 7114 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait 7115 - v->UrgLatency[i] 7116 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) 7117 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 7118 - DynamicMetadataVMExtraLatency[k]; 7119 7120 if (PrefetchTime > 0) { 7121 double ExpectedVRatioPrefetch; 7122 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 7123 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 7124 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 7125 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 7126 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { 7127 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 7128 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; 7129 } 7130 } else { 7131 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7132 } 7133 if (v->DynamicMetadataEnable[k] == true) { 7134 double TSetupPipe; 7135 double TdmbfPipe; 7136 double TdmsksPipe; 7137 double TdmecPipe; 7138 double AllowedTimeForUrgentExtraLatency; 7139 7140 CalculateVupdateAndDynamicMetadataParameters( 7141 v->MaxInterDCNTileRepeaters, 7142 v->RequiredDPPCLK[i][j][k], 7143 v->RequiredDISPCLK[i][j], 7144 v->ProjectedDCFCLKDeepSleep[i][j], 7145 v->PixelClock[k], 7146 v->HTotal[k], 7147 v->VTotal[k] - v->VActive[k], 7148 v->DynamicMetadataTransmittedBytes[k], 7149 v->DynamicMetadataLinesBeforeActiveRequired[k], 7150 v->Interlace[k], 7151 v->ProgressiveToInterlaceUnitInOPP, 7152 &TSetupPipe, 7153 &TdmbfPipe, 7154 &TdmecPipe, 7155 &TdmsksPipe, 7156 &dummy1, 7157 &dummy2, 7158 &dummy3); 7159 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 7160 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 7161 if (AllowedTimeForUrgentExtraLatency > 0) { 7162 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7163 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7164 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7165 } else { 7166 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7167 } 7168 } 7169 } 7170 DCFCLKRequiredForPeakBandwidth = 0; 7171 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { 7172 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7173 } 7174 MinimumTvmPlus2Tr0 = v->UrgLatency[i] 7175 * (v->GPUVMEnable == true ? 7176 (v->HostVMEnable == true ? 7177 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 7178 0); 7179 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7180 double MaximumTvmPlus2Tr0PlusTsw; 7181 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7182 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7183 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; 7184 } else { 7185 DCFCLKRequiredForPeakBandwidth = dml_max3( 7186 DCFCLKRequiredForPeakBandwidth, 7187 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7188 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7189 } 7190 } 7191 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7192 } 7193 } 7194 } 7195 7196 static void CalculateUnboundedRequestAndCompressedBufferSize( 7197 unsigned int DETBufferSizeInKByte, 7198 int ConfigReturnBufferSizeInKByte, 7199 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7200 int TotalActiveDPP, 7201 bool NoChromaPlanes, 7202 int MaxNumDPP, 7203 int CompressedBufferSegmentSizeInkByteFinal, 7204 enum output_encoder_class *Output, 7205 bool *UnboundedRequestEnabled, 7206 int *CompressedBufferSizeInkByte) 7207 { 7208 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7209 7210 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7211 *CompressedBufferSizeInkByte = ( 7212 *UnboundedRequestEnabled == true ? 7213 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7214 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7215 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7216 7217 #ifdef __DML_VBA_DEBUG__ 7218 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7219 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7220 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7221 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7222 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7223 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7224 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7225 #endif 7226 } 7227 7228 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7229 { 7230 bool ret_val = false; 7231 7232 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7233 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) { 7234 ret_val = false; 7235 } 7236 return (ret_val); 7237 } 7238 7239