1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 #include "display_mode_vba_util_32.h" 26 #include "../dml_inline_defs.h" 27 #include "display_mode_vba_32.h" 28 #include "../display_mode_lib.h" 29 30 unsigned int dml32_dscceComputeDelay( 31 unsigned int bpc, 32 double BPP, 33 unsigned int sliceWidth, 34 unsigned int numSlices, 35 enum output_format_class pixelFormat, 36 enum output_encoder_class Output) 37 { 38 // valid bpc = source bits per component in the set of {8, 10, 12} 39 // valid bpp = increments of 1/16 of a bit 40 // min = 6/7/8 in N420/N422/444, respectively 41 // max = such that compression is 1:1 42 //valid sliceWidth = number of pixels per slice line, 43 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 44 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 45 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 46 47 // fixed value 48 unsigned int rcModelSize = 8192; 49 50 // N422/N420 operate at 2 pixels per clock 51 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, 52 Delay, pixels; 53 54 if (pixelFormat == dm_420) 55 pixelsPerClock = 2; 56 else if (pixelFormat == dm_n422) 57 pixelsPerClock = 2; 58 // #all other modes operate at 1 pixel per clock 59 else 60 pixelsPerClock = 1; 61 62 //initial transmit delay as per PPS 63 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 64 65 //compute ssm delay 66 if (bpc == 8) 67 D = 81; 68 else if (bpc == 10) 69 D = 89; 70 else 71 D = 113; 72 73 //divide by pixel per cycle to compute slice width as seen by DSC 74 w = sliceWidth / pixelsPerClock; 75 76 //422 mode has an additional cycle of delay 77 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 78 s = 0; 79 else 80 s = 1; 81 82 //main calculation for the dscce 83 ix = initalXmitDelay + 45; 84 wx = (w + 2) / 3; 85 p = 3 * wx - w; 86 l0 = ix / w; 87 a = ix + p * l0; 88 ax = (a + 2) / 3 + D + 6 + 1; 89 L = (ax + wx - 1) / wx; 90 if ((ix % w) == 0 && p != 0) 91 lstall = 1; 92 else 93 lstall = 0; 94 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 95 96 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 97 pixels = Delay * 3 * pixelsPerClock; 98 99 #ifdef __DML_VBA_DEBUG__ 100 dml_print("DML::%s: bpc: %d\n", __func__, bpc); 101 dml_print("DML::%s: BPP: %f\n", __func__, BPP); 102 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); 103 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); 104 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); 105 dml_print("DML::%s: Output: %d\n", __func__, Output); 106 dml_print("DML::%s: pixels: %d\n", __func__, pixels); 107 #endif 108 109 return pixels; 110 } 111 112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 113 { 114 unsigned int Delay = 0; 115 116 if (pixelFormat == dm_420) { 117 // sfr 118 Delay = Delay + 2; 119 // dsccif 120 Delay = Delay + 0; 121 // dscc - input deserializer 122 Delay = Delay + 3; 123 // dscc gets pixels every other cycle 124 Delay = Delay + 2; 125 // dscc - input cdc fifo 126 Delay = Delay + 12; 127 // dscc gets pixels every other cycle 128 Delay = Delay + 13; 129 // dscc - cdc uncertainty 130 Delay = Delay + 2; 131 // dscc - output cdc fifo 132 Delay = Delay + 7; 133 // dscc gets pixels every other cycle 134 Delay = Delay + 3; 135 // dscc - cdc uncertainty 136 Delay = Delay + 2; 137 // dscc - output serializer 138 Delay = Delay + 1; 139 // sft 140 Delay = Delay + 1; 141 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { 142 // sfr 143 Delay = Delay + 2; 144 // dsccif 145 Delay = Delay + 1; 146 // dscc - input deserializer 147 Delay = Delay + 5; 148 // dscc - input cdc fifo 149 Delay = Delay + 25; 150 // dscc - cdc uncertainty 151 Delay = Delay + 2; 152 // dscc - output cdc fifo 153 Delay = Delay + 10; 154 // dscc - cdc uncertainty 155 Delay = Delay + 2; 156 // dscc - output serializer 157 Delay = Delay + 1; 158 // sft 159 Delay = Delay + 1; 160 } else { 161 // sfr 162 Delay = Delay + 2; 163 // dsccif 164 Delay = Delay + 0; 165 // dscc - input deserializer 166 Delay = Delay + 3; 167 // dscc - input cdc fifo 168 Delay = Delay + 12; 169 // dscc - cdc uncertainty 170 Delay = Delay + 2; 171 // dscc - output cdc fifo 172 Delay = Delay + 7; 173 // dscc - output serializer 174 Delay = Delay + 1; 175 // dscc - cdc uncertainty 176 Delay = Delay + 2; 177 // sft 178 Delay = Delay + 1; 179 } 180 181 return Delay; 182 } 183 184 185 bool IsVertical(enum dm_rotation_angle Scan) 186 { 187 bool is_vert = false; 188 189 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) 190 is_vert = true; 191 else 192 is_vert = false; 193 return is_vert; 194 } 195 196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( 197 double HRatio, 198 double HRatioChroma, 199 double VRatio, 200 double VRatioChroma, 201 double MaxDCHUBToPSCLThroughput, 202 double MaxPSCLToLBThroughput, 203 double PixelClock, 204 enum source_format_class SourcePixelFormat, 205 unsigned int HTaps, 206 unsigned int HTapsChroma, 207 unsigned int VTaps, 208 unsigned int VTapsChroma, 209 210 /* output */ 211 double *PSCL_THROUGHPUT, 212 double *PSCL_THROUGHPUT_CHROMA, 213 double *DPPCLKUsingSingleDPP) 214 { 215 double DPPCLKUsingSingleDPPLuma; 216 double DPPCLKUsingSingleDPPChroma; 217 218 if (HRatio > 1) { 219 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / 220 dml_ceil((double) HTaps / 6.0, 1.0)); 221 } else { 222 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 223 } 224 225 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / 226 *PSCL_THROUGHPUT, 1); 227 228 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) 229 DPPCLKUsingSingleDPPLuma = 2 * PixelClock; 230 231 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && 232 SourcePixelFormat != dm_rgbe_alpha)) { 233 *PSCL_THROUGHPUT_CHROMA = 0; 234 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; 235 } else { 236 if (HRatioChroma > 1) { 237 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * 238 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); 239 } else { 240 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 241 } 242 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), 243 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); 244 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) 245 DPPCLKUsingSingleDPPChroma = 2 * PixelClock; 246 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); 247 } 248 } 249 250 void dml32_CalculateBytePerPixelAndBlockSizes( 251 enum source_format_class SourcePixelFormat, 252 enum dm_swizzle_mode SurfaceTiling, 253 254 /* Output */ 255 unsigned int *BytePerPixelY, 256 unsigned int *BytePerPixelC, 257 double *BytePerPixelDETY, 258 double *BytePerPixelDETC, 259 unsigned int *BlockHeight256BytesY, 260 unsigned int *BlockHeight256BytesC, 261 unsigned int *BlockWidth256BytesY, 262 unsigned int *BlockWidth256BytesC, 263 unsigned int *MacroTileHeightY, 264 unsigned int *MacroTileHeightC, 265 unsigned int *MacroTileWidthY, 266 unsigned int *MacroTileWidthC) 267 { 268 if (SourcePixelFormat == dm_444_64) { 269 *BytePerPixelDETY = 8; 270 *BytePerPixelDETC = 0; 271 *BytePerPixelY = 8; 272 *BytePerPixelC = 0; 273 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 274 *BytePerPixelDETY = 4; 275 *BytePerPixelDETC = 0; 276 *BytePerPixelY = 4; 277 *BytePerPixelC = 0; 278 } else if (SourcePixelFormat == dm_444_16) { 279 *BytePerPixelDETY = 2; 280 *BytePerPixelDETC = 0; 281 *BytePerPixelY = 2; 282 *BytePerPixelC = 0; 283 } else if (SourcePixelFormat == dm_444_8) { 284 *BytePerPixelDETY = 1; 285 *BytePerPixelDETC = 0; 286 *BytePerPixelY = 1; 287 *BytePerPixelC = 0; 288 } else if (SourcePixelFormat == dm_rgbe_alpha) { 289 *BytePerPixelDETY = 4; 290 *BytePerPixelDETC = 1; 291 *BytePerPixelY = 4; 292 *BytePerPixelC = 1; 293 } else if (SourcePixelFormat == dm_420_8) { 294 *BytePerPixelDETY = 1; 295 *BytePerPixelDETC = 2; 296 *BytePerPixelY = 1; 297 *BytePerPixelC = 2; 298 } else if (SourcePixelFormat == dm_420_12) { 299 *BytePerPixelDETY = 2; 300 *BytePerPixelDETC = 4; 301 *BytePerPixelY = 2; 302 *BytePerPixelC = 4; 303 } else { 304 *BytePerPixelDETY = 4.0 / 3; 305 *BytePerPixelDETC = 8.0 / 3; 306 *BytePerPixelY = 2; 307 *BytePerPixelC = 4; 308 } 309 #ifdef __DML_VBA_DEBUG__ 310 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); 311 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); 312 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); 313 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); 314 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); 315 #endif 316 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 317 || SourcePixelFormat == dm_444_16 318 || SourcePixelFormat == dm_444_8 319 || SourcePixelFormat == dm_mono_16 320 || SourcePixelFormat == dm_mono_8 321 || SourcePixelFormat == dm_rgbe)) { 322 if (SurfaceTiling == dm_sw_linear) 323 *BlockHeight256BytesY = 1; 324 else if (SourcePixelFormat == dm_444_64) 325 *BlockHeight256BytesY = 4; 326 else if (SourcePixelFormat == dm_444_8) 327 *BlockHeight256BytesY = 16; 328 else 329 *BlockHeight256BytesY = 8; 330 331 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 332 *BlockHeight256BytesC = 0; 333 *BlockWidth256BytesC = 0; 334 } else { 335 if (SurfaceTiling == dm_sw_linear) { 336 *BlockHeight256BytesY = 1; 337 *BlockHeight256BytesC = 1; 338 } else if (SourcePixelFormat == dm_rgbe_alpha) { 339 *BlockHeight256BytesY = 8; 340 *BlockHeight256BytesC = 16; 341 } else if (SourcePixelFormat == dm_420_8) { 342 *BlockHeight256BytesY = 16; 343 *BlockHeight256BytesC = 8; 344 } else { 345 *BlockHeight256BytesY = 8; 346 *BlockHeight256BytesC = 8; 347 } 348 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 349 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 350 } 351 #ifdef __DML_VBA_DEBUG__ 352 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); 353 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); 354 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); 355 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); 356 #endif 357 358 if (SurfaceTiling == dm_sw_linear) { 359 *MacroTileHeightY = *BlockHeight256BytesY; 360 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; 361 *MacroTileHeightC = *BlockHeight256BytesC; 362 if (*MacroTileHeightC == 0) 363 *MacroTileWidthC = 0; 364 else 365 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; 366 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || 367 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { 368 *MacroTileHeightY = 16 * *BlockHeight256BytesY; 369 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; 370 *MacroTileHeightC = 16 * *BlockHeight256BytesC; 371 if (*MacroTileHeightC == 0) 372 *MacroTileWidthC = 0; 373 else 374 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; 375 } else { 376 *MacroTileHeightY = 32 * *BlockHeight256BytesY; 377 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; 378 *MacroTileHeightC = 32 * *BlockHeight256BytesC; 379 if (*MacroTileHeightC == 0) 380 *MacroTileWidthC = 0; 381 else 382 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; 383 } 384 385 #ifdef __DML_VBA_DEBUG__ 386 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); 387 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); 388 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); 389 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); 390 #endif 391 } // CalculateBytePerPixelAndBlockSizes 392 393 void dml32_CalculateSwathAndDETConfiguration( 394 unsigned int DETSizeOverride[], 395 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 396 unsigned int ConfigReturnBufferSizeInKByte, 397 unsigned int MaxTotalDETInKByte, 398 unsigned int MinCompressedBufferSizeInKByte, 399 double ForceSingleDPP, 400 unsigned int NumberOfActiveSurfaces, 401 unsigned int nomDETInKByte, 402 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 403 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, 404 unsigned int PixelChunkSizeKBytes, 405 unsigned int ROBSizeKBytes, 406 unsigned int CompressedBufferSegmentSizeInkByteFinal, 407 enum output_encoder_class Output[], 408 double ReadBandwidthLuma[], 409 double ReadBandwidthChroma[], 410 double MaximumSwathWidthLuma[], 411 double MaximumSwathWidthChroma[], 412 enum dm_rotation_angle SourceRotation[], 413 bool ViewportStationary[], 414 enum source_format_class SourcePixelFormat[], 415 enum dm_swizzle_mode SurfaceTiling[], 416 unsigned int ViewportWidth[], 417 unsigned int ViewportHeight[], 418 unsigned int ViewportXStart[], 419 unsigned int ViewportYStart[], 420 unsigned int ViewportXStartC[], 421 unsigned int ViewportYStartC[], 422 unsigned int SurfaceWidthY[], 423 unsigned int SurfaceWidthC[], 424 unsigned int SurfaceHeightY[], 425 unsigned int SurfaceHeightC[], 426 unsigned int Read256BytesBlockHeightY[], 427 unsigned int Read256BytesBlockHeightC[], 428 unsigned int Read256BytesBlockWidthY[], 429 unsigned int Read256BytesBlockWidthC[], 430 enum odm_combine_mode ODMMode[], 431 unsigned int BlendingAndTiming[], 432 unsigned int BytePerPixY[], 433 unsigned int BytePerPixC[], 434 double BytePerPixDETY[], 435 double BytePerPixDETC[], 436 unsigned int HActive[], 437 double HRatio[], 438 double HRatioChroma[], 439 unsigned int DPPPerSurface[], 440 441 /* Output */ 442 unsigned int swath_width_luma_ub[], 443 unsigned int swath_width_chroma_ub[], 444 double SwathWidth[], 445 double SwathWidthChroma[], 446 unsigned int SwathHeightY[], 447 unsigned int SwathHeightC[], 448 unsigned int DETBufferSizeInKByte[], 449 unsigned int DETBufferSizeY[], 450 unsigned int DETBufferSizeC[], 451 bool *UnboundedRequestEnabled, 452 unsigned int *CompressedBufferSizeInkByte, 453 unsigned int *CompBufReservedSpaceKBytes, 454 bool *CompBufReservedSpaceNeedAdjustment, 455 bool ViewportSizeSupportPerSurface[], 456 bool *ViewportSizeSupport) 457 { 458 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 459 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 460 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 461 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 462 unsigned int RoundedUpSwathSizeBytesY; 463 unsigned int RoundedUpSwathSizeBytesC; 464 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; 465 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; 466 unsigned int k; 467 unsigned int TotalActiveDPP = 0; 468 bool NoChromaSurfaces = true; 469 unsigned int DETBufferSizeInKByteForSwathCalculation; 470 471 #ifdef __DML_VBA_DEBUG__ 472 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 473 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); 474 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); 475 #endif 476 dml32_CalculateSwathWidth(ForceSingleDPP, 477 NumberOfActiveSurfaces, 478 SourcePixelFormat, 479 SourceRotation, 480 ViewportStationary, 481 ViewportWidth, 482 ViewportHeight, 483 ViewportXStart, 484 ViewportYStart, 485 ViewportXStartC, 486 ViewportYStartC, 487 SurfaceWidthY, 488 SurfaceWidthC, 489 SurfaceHeightY, 490 SurfaceHeightC, 491 ODMMode, 492 BytePerPixY, 493 BytePerPixC, 494 Read256BytesBlockHeightY, 495 Read256BytesBlockHeightC, 496 Read256BytesBlockWidthY, 497 Read256BytesBlockWidthC, 498 BlendingAndTiming, 499 HActive, 500 HRatio, 501 DPPPerSurface, 502 503 /* Output */ 504 SwathWidthdoubleDPP, 505 SwathWidthdoubleDPPChroma, 506 SwathWidth, 507 SwathWidthChroma, 508 MaximumSwathHeightY, 509 MaximumSwathHeightC, 510 swath_width_luma_ub, 511 swath_width_chroma_ub); 512 513 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 514 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 515 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 516 #ifdef __DML_VBA_DEBUG__ 517 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 518 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 519 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); 520 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); 521 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 522 RoundedUpMaxSwathSizeBytesY[k]); 523 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 524 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); 525 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); 526 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 527 RoundedUpMaxSwathSizeBytesC[k]); 528 #endif 529 530 if (SourcePixelFormat[k] == dm_420_10) { 531 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); 532 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); 533 } 534 } 535 536 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 537 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 538 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 539 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { 540 NoChromaSurfaces = false; 541 } 542 } 543 544 // By default, just set the reserved space to 2 pixel chunks size 545 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; 546 547 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data 548 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] 549 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req 550 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); 551 552 if (*CompBufReservedSpaceNeedAdjustment == 1) { 553 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; 554 } 555 556 #ifdef __DML_VBA_DEBUG__ 557 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); 558 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); 559 #endif 560 561 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 562 563 dml32_CalculateDETBufferSize(DETSizeOverride, 564 UseMALLForPStateChange, 565 ForceSingleDPP, 566 NumberOfActiveSurfaces, 567 *UnboundedRequestEnabled, 568 nomDETInKByte, 569 MaxTotalDETInKByte, 570 ConfigReturnBufferSizeInKByte, 571 MinCompressedBufferSizeInKByte, 572 CompressedBufferSegmentSizeInkByteFinal, 573 SourcePixelFormat, 574 ReadBandwidthLuma, 575 ReadBandwidthChroma, 576 RoundedUpMaxSwathSizeBytesY, 577 RoundedUpMaxSwathSizeBytesC, 578 DPPPerSurface, 579 580 /* Output */ 581 DETBufferSizeInKByte, // per hubp pipe 582 CompressedBufferSizeInkByte); 583 584 #ifdef __DML_VBA_DEBUG__ 585 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 586 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 587 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 588 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 589 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 590 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 591 #endif 592 593 *ViewportSizeSupport = true; 594 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 595 596 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 597 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); 598 #ifdef __DML_VBA_DEBUG__ 599 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, 600 DETBufferSizeInKByteForSwathCalculation); 601 #endif 602 603 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= 604 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 605 SwathHeightY[k] = MaximumSwathHeightY[k]; 606 SwathHeightC[k] = MaximumSwathHeightC[k]; 607 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 608 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 609 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 610 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= 611 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 612 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 613 SwathHeightC[k] = MaximumSwathHeightC[k]; 614 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 615 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 616 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 617 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= 618 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 619 SwathHeightY[k] = MaximumSwathHeightY[k]; 620 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 621 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 622 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 623 } else { 624 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 625 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 626 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 627 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 628 } 629 630 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > 631 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 632 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && 633 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 634 *ViewportSizeSupport = false; 635 ViewportSizeSupportPerSurface[k] = false; 636 } else { 637 ViewportSizeSupportPerSurface[k] = true; 638 } 639 640 if (SwathHeightC[k] == 0) { 641 #ifdef __DML_VBA_DEBUG__ 642 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); 643 #endif 644 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; 645 DETBufferSizeC[k] = 0; 646 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 647 #ifdef __DML_VBA_DEBUG__ 648 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); 649 #endif 650 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; 651 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; 652 } else { 653 #ifdef __DML_VBA_DEBUG__ 654 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); 655 #endif 656 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); 657 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; 658 } 659 660 #ifdef __DML_VBA_DEBUG__ 661 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 662 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); 663 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, 664 k, RoundedUpMaxSwathSizeBytesY[k]); 665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, 666 k, RoundedUpMaxSwathSizeBytesC[k]); 667 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); 668 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); 669 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 670 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 671 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); 672 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, 673 ViewportSizeSupportPerSurface[k]); 674 #endif 675 676 } 677 } // CalculateSwathAndDETConfiguration 678 679 void dml32_CalculateSwathWidth( 680 bool ForceSingleDPP, 681 unsigned int NumberOfActiveSurfaces, 682 enum source_format_class SourcePixelFormat[], 683 enum dm_rotation_angle SourceRotation[], 684 bool ViewportStationary[], 685 unsigned int ViewportWidth[], 686 unsigned int ViewportHeight[], 687 unsigned int ViewportXStart[], 688 unsigned int ViewportYStart[], 689 unsigned int ViewportXStartC[], 690 unsigned int ViewportYStartC[], 691 unsigned int SurfaceWidthY[], 692 unsigned int SurfaceWidthC[], 693 unsigned int SurfaceHeightY[], 694 unsigned int SurfaceHeightC[], 695 enum odm_combine_mode ODMMode[], 696 unsigned int BytePerPixY[], 697 unsigned int BytePerPixC[], 698 unsigned int Read256BytesBlockHeightY[], 699 unsigned int Read256BytesBlockHeightC[], 700 unsigned int Read256BytesBlockWidthY[], 701 unsigned int Read256BytesBlockWidthC[], 702 unsigned int BlendingAndTiming[], 703 unsigned int HActive[], 704 double HRatio[], 705 unsigned int DPPPerSurface[], 706 707 /* Output */ 708 double SwathWidthdoubleDPPY[], 709 double SwathWidthdoubleDPPC[], 710 double SwathWidthY[], // per-pipe 711 double SwathWidthC[], // per-pipe 712 unsigned int MaximumSwathHeightY[], 713 unsigned int MaximumSwathHeightC[], 714 unsigned int swath_width_luma_ub[], // per-pipe 715 unsigned int swath_width_chroma_ub[]) // per-pipe 716 { 717 unsigned int k, j; 718 enum odm_combine_mode MainSurfaceODMMode; 719 720 unsigned int surface_width_ub_l; 721 unsigned int surface_height_ub_l; 722 unsigned int surface_width_ub_c; 723 unsigned int surface_height_ub_c; 724 725 #ifdef __DML_VBA_DEBUG__ 726 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 727 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 728 #endif 729 730 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 731 if (!IsVertical(SourceRotation[k])) 732 SwathWidthdoubleDPPY[k] = ViewportWidth[k]; 733 else 734 SwathWidthdoubleDPPY[k] = ViewportHeight[k]; 735 736 #ifdef __DML_VBA_DEBUG__ 737 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 738 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 739 #endif 740 741 MainSurfaceODMMode = ODMMode[k]; 742 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 743 if (BlendingAndTiming[k] == j) 744 MainSurfaceODMMode = ODMMode[j]; 745 } 746 747 if (ForceSingleDPP) { 748 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 749 } else { 750 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { 751 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 752 dml_round(HActive[k] / 4.0 * HRatio[k])); 753 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { 754 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 755 dml_round(HActive[k] / 2.0 * HRatio[k])); 756 } else if (DPPPerSurface[k] == 2) { 757 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; 758 } else { 759 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 760 } 761 } 762 763 #ifdef __DML_VBA_DEBUG__ 764 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); 765 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); 766 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); 767 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); 768 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); 769 #endif 770 771 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 772 SourcePixelFormat[k] == dm_420_12) { 773 SwathWidthC[k] = SwathWidthY[k] / 2; 774 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; 775 } else { 776 SwathWidthC[k] = SwathWidthY[k]; 777 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; 778 } 779 780 if (ForceSingleDPP == true) { 781 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 782 SwathWidthC[k] = SwathWidthdoubleDPPC[k]; 783 } 784 785 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 786 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 787 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 788 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 789 790 #ifdef __DML_VBA_DEBUG__ 791 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 792 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); 793 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); 794 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); 795 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); 796 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); 797 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); 798 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); 799 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); 800 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); 801 #endif 802 803 if (!IsVertical(SourceRotation[k])) { 804 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 805 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 806 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 807 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 808 dml_floor(ViewportXStart[k] + 809 SwathWidthY[k] + 810 Read256BytesBlockWidthY[k] - 1, 811 Read256BytesBlockWidthY[k]) - 812 dml_floor(ViewportXStart[k], 813 Read256BytesBlockWidthY[k])); 814 } else { 815 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 816 dml_ceil(SwathWidthY[k] - 1, 817 Read256BytesBlockWidthY[k]) + 818 Read256BytesBlockWidthY[k]); 819 } 820 if (BytePerPixC[k] > 0) { 821 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 822 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 823 dml_floor(ViewportXStartC[k] + SwathWidthC[k] + 824 Read256BytesBlockWidthC[k] - 1, 825 Read256BytesBlockWidthC[k]) - 826 dml_floor(ViewportXStartC[k], 827 Read256BytesBlockWidthC[k])); 828 } else { 829 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 830 dml_ceil(SwathWidthC[k] - 1, 831 Read256BytesBlockWidthC[k]) + 832 Read256BytesBlockWidthC[k]); 833 } 834 } else { 835 swath_width_chroma_ub[k] = 0; 836 } 837 } else { 838 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 839 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 840 841 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 842 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + 843 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, 844 Read256BytesBlockHeightY[k]) - 845 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); 846 } else { 847 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, 848 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 849 } 850 if (BytePerPixC[k] > 0) { 851 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 852 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 853 dml_floor(ViewportYStartC[k] + SwathWidthC[k] + 854 Read256BytesBlockHeightC[k] - 1, 855 Read256BytesBlockHeightC[k]) - 856 dml_floor(ViewportYStartC[k], 857 Read256BytesBlockHeightC[k])); 858 } else { 859 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 860 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + 861 Read256BytesBlockHeightC[k]); 862 } 863 } else { 864 swath_width_chroma_ub[k] = 0; 865 } 866 } 867 868 #ifdef __DML_VBA_DEBUG__ 869 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); 870 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); 871 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); 872 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); 873 #endif 874 875 } 876 } // CalculateSwathWidth 877 878 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, 879 unsigned int TotalNumberOfActiveDPP, 880 bool NoChroma, 881 enum output_encoder_class Output, 882 enum dm_swizzle_mode SurfaceTiling, 883 bool CompBufReservedSpaceNeedAdjustment, 884 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 885 { 886 bool ret_val = false; 887 888 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && 889 TotalNumberOfActiveDPP == 1 && NoChroma); 890 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 891 ret_val = false; 892 893 if (SurfaceTiling == dm_sw_linear) 894 ret_val = false; 895 896 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 897 ret_val = false; 898 899 #ifdef __DML_VBA_DEBUG__ 900 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); 901 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 902 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); 903 #endif 904 905 return (ret_val); 906 } 907 908 void dml32_CalculateDETBufferSize( 909 unsigned int DETSizeOverride[], 910 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 911 bool ForceSingleDPP, 912 unsigned int NumberOfActiveSurfaces, 913 bool UnboundedRequestEnabled, 914 unsigned int nomDETInKByte, 915 unsigned int MaxTotalDETInKByte, 916 unsigned int ConfigReturnBufferSizeInKByte, 917 unsigned int MinCompressedBufferSizeInKByte, 918 unsigned int CompressedBufferSegmentSizeInkByteFinal, 919 enum source_format_class SourcePixelFormat[], 920 double ReadBandwidthLuma[], 921 double ReadBandwidthChroma[], 922 unsigned int RoundedUpMaxSwathSizeBytesY[], 923 unsigned int RoundedUpMaxSwathSizeBytesC[], 924 unsigned int DPPPerSurface[], 925 /* Output */ 926 unsigned int DETBufferSizeInKByte[], 927 unsigned int *CompressedBufferSizeInkByte) 928 { 929 unsigned int DETBufferSizePoolInKByte; 930 unsigned int NextDETBufferPieceInKByte; 931 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; 932 bool NextPotentialSurfaceToAssignDETPieceFound; 933 unsigned int NextSurfaceToAssignDETPiece; 934 double TotalBandwidth; 935 double BandwidthOfSurfacesNotAssignedDETPiece; 936 unsigned int max_minDET; 937 unsigned int minDET; 938 unsigned int minDET_pipe; 939 unsigned int j, k; 940 941 #ifdef __DML_VBA_DEBUG__ 942 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 943 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 944 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 945 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 946 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); 947 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 948 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); 949 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, 950 CompressedBufferSegmentSizeInkByteFinal); 951 #endif 952 953 // Note: Will use default det size if that fits 2 swaths 954 if (UnboundedRequestEnabled) { 955 if (DETSizeOverride[0] > 0) { 956 DETBufferSizeInKByte[0] = DETSizeOverride[0]; 957 } else { 958 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * 959 ((double) RoundedUpMaxSwathSizeBytesY[0] + 960 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); 961 } 962 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; 963 } else { 964 DETBufferSizePoolInKByte = MaxTotalDETInKByte; 965 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 966 DETBufferSizeInKByte[k] = nomDETInKByte; 967 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 968 SourcePixelFormat[k] == dm_420_12) { 969 max_minDET = nomDETInKByte - 64; 970 } else { 971 max_minDET = nomDETInKByte; 972 } 973 minDET = 128; 974 minDET_pipe = 0; 975 976 // add DET resource until can hold 2 full swaths 977 while (minDET <= max_minDET && minDET_pipe == 0) { 978 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + 979 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) 980 minDET_pipe = minDET; 981 minDET = minDET + 64; 982 } 983 984 #ifdef __DML_VBA_DEBUG__ 985 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); 986 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); 987 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); 988 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 989 RoundedUpMaxSwathSizeBytesY[k]); 990 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 991 RoundedUpMaxSwathSizeBytesC[k]); 992 #endif 993 994 if (minDET_pipe == 0) { 995 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + 996 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); 997 #ifdef __DML_VBA_DEBUG__ 998 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", 999 __func__, k, minDET_pipe); 1000 #endif 1001 } 1002 1003 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1004 DETBufferSizeInKByte[k] = 0; 1005 } else if (DETSizeOverride[k] > 0) { 1006 DETBufferSizeInKByte[k] = DETSizeOverride[k]; 1007 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1008 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; 1009 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { 1010 DETBufferSizeInKByte[k] = minDET_pipe; 1011 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1012 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; 1013 } 1014 1015 #ifdef __DML_VBA_DEBUG__ 1016 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 1017 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); 1018 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1019 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); 1020 #endif 1021 } 1022 1023 TotalBandwidth = 0; 1024 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1025 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) 1026 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 1027 } 1028 #ifdef __DML_VBA_DEBUG__ 1029 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); 1030 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) 1031 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1032 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); 1033 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); 1034 #endif 1035 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; 1036 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1037 1038 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1039 DETPieceAssignedToThisSurfaceAlready[k] = true; 1040 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * 1041 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= 1042 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { 1043 DETPieceAssignedToThisSurfaceAlready[k] = true; 1044 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1045 ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; 1046 } else { 1047 DETPieceAssignedToThisSurfaceAlready[k] = false; 1048 } 1049 #ifdef __DML_VBA_DEBUG__ 1050 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, 1051 DETPieceAssignedToThisSurfaceAlready[k]); 1052 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, 1053 BandwidthOfSurfacesNotAssignedDETPiece); 1054 #endif 1055 } 1056 1057 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 1058 NextPotentialSurfaceToAssignDETPieceFound = false; 1059 NextSurfaceToAssignDETPiece = 0; 1060 1061 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1062 #ifdef __DML_VBA_DEBUG__ 1063 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, 1064 ReadBandwidthLuma[k]); 1065 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, 1066 ReadBandwidthChroma[k]); 1067 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, 1068 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1069 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, 1070 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1071 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, 1072 NextSurfaceToAssignDETPiece); 1073 #endif 1074 if (!DETPieceAssignedToThisSurfaceAlready[k] && 1075 (!NextPotentialSurfaceToAssignDETPieceFound || 1076 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < 1077 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1078 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { 1079 NextSurfaceToAssignDETPiece = k; 1080 NextPotentialSurfaceToAssignDETPieceFound = true; 1081 } 1082 #ifdef __DML_VBA_DEBUG__ 1083 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", 1084 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); 1085 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", 1086 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); 1087 #endif 1088 } 1089 1090 if (NextPotentialSurfaceToAssignDETPieceFound) { 1091 // Note: To show the banker's rounding behavior in VBA and also the fact 1092 // that the DET buffer size varies due to precision issue 1093 // 1094 //double tmp1 = ((double) DETBufferSizePoolInKByte * 1095 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1096 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1097 // BandwidthOfSurfacesNotAssignedDETPiece / 1098 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1099 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * 1100 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1101 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1102 //BandwidthOfSurfacesNotAssignedDETPiece / 1103 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1104 // 1105 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); 1106 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); 1107 1108 NextDETBufferPieceInKByte = dml_min( 1109 dml_round((double) DETBufferSizePoolInKByte * 1110 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1111 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1112 BandwidthOfSurfacesNotAssignedDETPiece / 1113 ((ForceSingleDPP ? 1 : 1114 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * 1115 (ForceSingleDPP ? 1 : 1116 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, 1117 dml_floor((double) DETBufferSizePoolInKByte, 1118 (ForceSingleDPP ? 1 : 1119 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1120 1121 // Above calculation can assign the entire DET buffer allocation to a single pipe. 1122 // We should limit the per-pipe DET size to the nominal / max per pipe. 1123 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1124 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < 1125 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1126 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - 1127 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; 1128 } else { 1129 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1130 // already has the max per-pipe value 1131 NextDETBufferPieceInKByte = 0; 1132 } 1133 } 1134 1135 #ifdef __DML_VBA_DEBUG__ 1136 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, 1137 DETBufferSizePoolInKByte); 1138 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, 1139 NextSurfaceToAssignDETPiece); 1140 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, 1141 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1142 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, 1143 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1144 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", 1145 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); 1146 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, 1147 NextDETBufferPieceInKByte); 1148 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", 1149 __func__, j, NextSurfaceToAssignDETPiece, 1150 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1151 #endif 1152 1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = 1154 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1155 + NextDETBufferPieceInKByte 1156 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); 1157 #ifdef __DML_VBA_DEBUG__ 1158 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1159 #endif 1160 1161 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; 1162 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; 1163 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1164 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1165 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1166 } 1167 } 1168 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; 1169 } 1170 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 1171 1172 #ifdef __DML_VBA_DEBUG__ 1173 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); 1174 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 1175 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { 1176 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", 1177 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1178 } 1179 #endif 1180 } // CalculateDETBufferSize 1181 1182 void dml32_CalculateODMMode( 1183 unsigned int MaximumPixelsPerLinePerDSCUnit, 1184 unsigned int HActive, 1185 enum output_encoder_class Output, 1186 enum odm_combine_policy ODMUse, 1187 double StateDispclk, 1188 double MaxDispclk, 1189 bool DSCEnable, 1190 unsigned int TotalNumberOfActiveDPP, 1191 unsigned int MaxNumDPP, 1192 double PixelClock, 1193 double DISPCLKDPPCLKDSCCLKDownSpreading, 1194 double DISPCLKRampingMargin, 1195 double DISPCLKDPPCLKVCOSpeed, 1196 1197 /* Output */ 1198 bool *TotalAvailablePipesSupport, 1199 unsigned int *NumberOfDPP, 1200 enum odm_combine_mode *ODMMode, 1201 double *RequiredDISPCLKPerSurface) 1202 { 1203 1204 double SurfaceRequiredDISPCLKWithoutODMCombine; 1205 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1206 double SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1207 1208 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, 1209 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1210 MaxDispclk); 1211 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, 1212 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1213 MaxDispclk); 1214 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, 1215 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1216 MaxDispclk); 1217 *TotalAvailablePipesSupport = true; 1218 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable 1219 1220 if (ODMUse == dm_odm_combine_policy_none) 1221 *ODMMode = dm_odm_combine_mode_disabled; 1222 1223 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; 1224 *NumberOfDPP = 0; 1225 1226 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? 1227 // (ODMUse == "" || ODMUse == "CombineAsNeeded") 1228 1229 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || 1230 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || 1231 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) { 1232 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { 1233 *ODMMode = dm_odm_combine_mode_4to1; 1234 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1235 *NumberOfDPP = 4; 1236 } else { 1237 *TotalAvailablePipesSupport = false; 1238 } 1239 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || 1240 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && 1241 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || 1242 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) { 1243 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { 1244 *ODMMode = dm_odm_combine_mode_2to1; 1245 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1246 *NumberOfDPP = 2; 1247 } else { 1248 *TotalAvailablePipesSupport = false; 1249 } 1250 } else { 1251 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) 1252 *NumberOfDPP = 1; 1253 else 1254 *TotalAvailablePipesSupport = false; 1255 } 1256 } 1257 1258 double dml32_CalculateRequiredDispclk( 1259 enum odm_combine_mode ODMMode, 1260 double PixelClock, 1261 double DISPCLKDPPCLKDSCCLKDownSpreading, 1262 double DISPCLKRampingMargin, 1263 double DISPCLKDPPCLKVCOSpeed, 1264 double MaxDispclk) 1265 { 1266 double RequiredDispclk = 0.; 1267 double PixelClockAfterODM; 1268 double DISPCLKWithRampingRoundedToDFSGranularity; 1269 double DISPCLKWithoutRampingRoundedToDFSGranularity; 1270 double MaxDispclkRoundedDownToDFSGranularity; 1271 1272 if (ODMMode == dm_odm_combine_mode_4to1) 1273 PixelClockAfterODM = PixelClock / 4; 1274 else if (ODMMode == dm_odm_combine_mode_2to1) 1275 PixelClockAfterODM = PixelClock / 2; 1276 else 1277 PixelClockAfterODM = PixelClock; 1278 1279 1280 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1281 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) 1282 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); 1283 1284 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1285 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); 1286 1287 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); 1288 1289 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1290 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; 1291 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1292 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; 1293 else 1294 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; 1295 1296 return RequiredDispclk; 1297 } 1298 1299 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) 1300 { 1301 if (Clock <= 0.0) 1302 return 0.0; 1303 1304 if (round_up) 1305 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); 1306 else 1307 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); 1308 } 1309 1310 void dml32_CalculateOutputLink( 1311 double PHYCLKPerState, 1312 double PHYCLKD18PerState, 1313 double PHYCLKD32PerState, 1314 double Downspreading, 1315 bool IsMainSurfaceUsingTheIndicatedTiming, 1316 enum output_encoder_class Output, 1317 enum output_format_class OutputFormat, 1318 unsigned int HTotal, 1319 unsigned int HActive, 1320 double PixelClockBackEnd, 1321 double ForcedOutputLinkBPP, 1322 unsigned int DSCInputBitPerComponent, 1323 unsigned int NumberOfDSCSlices, 1324 double AudioSampleRate, 1325 unsigned int AudioSampleLayout, 1326 enum odm_combine_mode ODMModeNoDSC, 1327 enum odm_combine_mode ODMModeDSC, 1328 bool DSCEnable, 1329 unsigned int OutputLinkDPLanes, 1330 enum dm_output_link_dp_rate OutputLinkDPRate, 1331 1332 /* Output */ 1333 bool *RequiresDSC, 1334 double *RequiresFEC, 1335 double *OutBpp, 1336 enum dm_output_type *OutputType, 1337 enum dm_output_rate *OutputRate, 1338 unsigned int *RequiredSlots) 1339 { 1340 bool LinkDSCEnable; 1341 unsigned int dummy; 1342 *RequiresDSC = false; 1343 *RequiresFEC = false; 1344 *OutBpp = 0; 1345 *OutputType = dm_output_type_unknown; 1346 *OutputRate = dm_output_rate_unknown; 1347 1348 if (IsMainSurfaceUsingTheIndicatedTiming) { 1349 if (Output == dm_hdmi) { 1350 *RequiresDSC = false; 1351 *RequiresFEC = false; 1352 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, 1353 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, 1354 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1355 ODMModeNoDSC, ODMModeDSC, &dummy); 1356 //OutputTypeAndRate = "HDMI"; 1357 *OutputType = dm_output_type_hdmi; 1358 1359 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { 1360 if (DSCEnable == true) { 1361 *RequiresDSC = true; 1362 LinkDSCEnable = true; 1363 if (Output == dm_dp || Output == dm_dp2p0) 1364 *RequiresFEC = true; 1365 else 1366 *RequiresFEC = false; 1367 } else { 1368 *RequiresDSC = false; 1369 LinkDSCEnable = false; 1370 if (Output == dm_dp2p0) 1371 *RequiresFEC = true; 1372 else 1373 *RequiresFEC = false; 1374 } 1375 if (Output == dm_dp2p0) { 1376 *OutBpp = 0; 1377 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && 1378 PHYCLKD32PerState >= 10000 / 32) { 1379 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1380 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1381 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1382 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1383 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1384 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && 1385 ForcedOutputLinkBPP == 0) { 1386 *RequiresDSC = true; 1387 LinkDSCEnable = true; 1388 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1389 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1390 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1391 OutputFormat, DSCInputBitPerComponent, 1392 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1393 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1394 } 1395 //OutputTypeAndRate = Output & " UHBR10"; 1396 *OutputType = dm_output_type_dp2p0; 1397 *OutputRate = dm_output_rate_dp_rate_uhbr10; 1398 } 1399 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && 1400 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { 1401 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1402 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1403 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1404 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1405 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1406 1407 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && 1408 ForcedOutputLinkBPP == 0) { 1409 *RequiresDSC = true; 1410 LinkDSCEnable = true; 1411 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1412 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1413 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1414 OutputFormat, DSCInputBitPerComponent, 1415 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1416 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1417 } 1418 //OutputTypeAndRate = Output & " UHBR13p5"; 1419 *OutputType = dm_output_type_dp2p0; 1420 *OutputRate = dm_output_rate_dp_rate_uhbr13p5; 1421 } 1422 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && 1423 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { 1424 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1425 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1426 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1427 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1428 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1429 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1430 *RequiresDSC = true; 1431 LinkDSCEnable = true; 1432 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1433 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1434 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1435 OutputFormat, DSCInputBitPerComponent, 1436 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1437 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1438 } 1439 //OutputTypeAndRate = Output & " UHBR20"; 1440 *OutputType = dm_output_type_dp2p0; 1441 *OutputRate = dm_output_rate_dp_rate_uhbr20; 1442 } 1443 } else { 1444 *OutBpp = 0; 1445 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && 1446 PHYCLKPerState >= 270) { 1447 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1448 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1449 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1450 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1451 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1452 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && 1453 ForcedOutputLinkBPP == 0) { 1454 *RequiresDSC = true; 1455 LinkDSCEnable = true; 1456 if (Output == dm_dp) 1457 *RequiresFEC = true; 1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1460 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1461 OutputFormat, DSCInputBitPerComponent, 1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1464 } 1465 //OutputTypeAndRate = Output & " HBR"; 1466 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1467 *OutputRate = dm_output_rate_dp_rate_hbr; 1468 } 1469 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && 1470 *OutBpp == 0 && PHYCLKPerState >= 540) { 1471 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1472 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1473 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1474 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1475 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1476 1477 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && 1478 ForcedOutputLinkBPP == 0) { 1479 *RequiresDSC = true; 1480 LinkDSCEnable = true; 1481 if (Output == dm_dp) 1482 *RequiresFEC = true; 1483 1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1486 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1487 OutputFormat, DSCInputBitPerComponent, 1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1490 } 1491 //OutputTypeAndRate = Output & " HBR2"; 1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1493 *OutputRate = dm_output_rate_dp_rate_hbr2; 1494 } 1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { 1496 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1497 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1498 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1499 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, 1500 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, 1501 RequiredSlots); 1502 1503 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1504 *RequiresDSC = true; 1505 LinkDSCEnable = true; 1506 if (Output == dm_dp) 1507 *RequiresFEC = true; 1508 1509 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1510 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1511 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1512 OutputFormat, DSCInputBitPerComponent, 1513 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1514 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1515 } 1516 //OutputTypeAndRate = Output & " HBR3"; 1517 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1518 *OutputRate = dm_output_rate_dp_rate_hbr3; 1519 } 1520 } 1521 } 1522 } 1523 } 1524 1525 void dml32_CalculateDPPCLK( 1526 unsigned int NumberOfActiveSurfaces, 1527 double DISPCLKDPPCLKDSCCLKDownSpreading, 1528 double DISPCLKDPPCLKVCOSpeed, 1529 double DPPCLKUsingSingleDPP[], 1530 unsigned int DPPPerSurface[], 1531 1532 /* output */ 1533 double *GlobalDPPCLK, 1534 double Dppclk[]) 1535 { 1536 unsigned int k; 1537 *GlobalDPPCLK = 0; 1538 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1539 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); 1540 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); 1541 } 1542 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); 1543 for (k = 0; k < NumberOfActiveSurfaces; ++k) 1544 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); 1545 } 1546 1547 double dml32_TruncToValidBPP( 1548 double LinkBitRate, 1549 unsigned int Lanes, 1550 unsigned int HTotal, 1551 unsigned int HActive, 1552 double PixelClock, 1553 double DesiredBPP, 1554 bool DSCEnable, 1555 enum output_encoder_class Output, 1556 enum output_format_class Format, 1557 unsigned int DSCInputBitPerComponent, 1558 unsigned int DSCSlices, 1559 unsigned int AudioRate, 1560 unsigned int AudioLayout, 1561 enum odm_combine_mode ODMModeNoDSC, 1562 enum odm_combine_mode ODMModeDSC, 1563 /* Output */ 1564 unsigned int *RequiredSlots) 1565 { 1566 double MaxLinkBPP; 1567 unsigned int MinDSCBPP; 1568 double MaxDSCBPP; 1569 unsigned int NonDSCBPP0; 1570 unsigned int NonDSCBPP1; 1571 unsigned int NonDSCBPP2; 1572 unsigned int NonDSCBPP3; 1573 1574 if (Format == dm_420) { 1575 NonDSCBPP0 = 12; 1576 NonDSCBPP1 = 15; 1577 NonDSCBPP2 = 18; 1578 MinDSCBPP = 6; 1579 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 1580 } else if (Format == dm_444) { 1581 NonDSCBPP0 = 18; 1582 NonDSCBPP1 = 24; 1583 NonDSCBPP2 = 30; 1584 NonDSCBPP3 = 36; 1585 MinDSCBPP = 8; 1586 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 1587 } else { 1588 if (Output == dm_hdmi) { 1589 NonDSCBPP0 = 24; 1590 NonDSCBPP1 = 24; 1591 NonDSCBPP2 = 24; 1592 } else { 1593 NonDSCBPP0 = 16; 1594 NonDSCBPP1 = 20; 1595 NonDSCBPP2 = 24; 1596 } 1597 if (Format == dm_n422) { 1598 MinDSCBPP = 7; 1599 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 1600 } else { 1601 MinDSCBPP = 8; 1602 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 1603 } 1604 } 1605 if (Output == dm_dp2p0) { 1606 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; 1607 } else if (DSCEnable && Output == dm_dp) { 1608 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 1609 } else { 1610 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 1611 } 1612 1613 if (DSCEnable) { 1614 if (ODMModeDSC == dm_odm_combine_mode_4to1) 1615 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1616 else if (ODMModeDSC == dm_odm_combine_mode_2to1) 1617 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1618 else if (ODMModeDSC == dm_odm_split_mode_1to2) 1619 MaxLinkBPP = 2 * MaxLinkBPP; 1620 } else { 1621 if (ODMModeNoDSC == dm_odm_combine_mode_4to1) 1622 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1623 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) 1624 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1625 else if (ODMModeNoDSC == dm_odm_split_mode_1to2) 1626 MaxLinkBPP = 2 * MaxLinkBPP; 1627 } 1628 1629 if (DesiredBPP == 0) { 1630 if (DSCEnable) { 1631 if (MaxLinkBPP < MinDSCBPP) 1632 return BPP_INVALID; 1633 else if (MaxLinkBPP >= MaxDSCBPP) 1634 return MaxDSCBPP; 1635 else 1636 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 1637 } else { 1638 if (MaxLinkBPP >= NonDSCBPP3) 1639 return NonDSCBPP3; 1640 else if (MaxLinkBPP >= NonDSCBPP2) 1641 return NonDSCBPP2; 1642 else if (MaxLinkBPP >= NonDSCBPP1) 1643 return NonDSCBPP1; 1644 else if (MaxLinkBPP >= NonDSCBPP0) 1645 return 16.0; 1646 else 1647 return BPP_INVALID; 1648 } 1649 } else { 1650 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || 1651 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) || 1652 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) 1653 return BPP_INVALID; 1654 else 1655 return DesiredBPP; 1656 } 1657 1658 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); 1659 1660 return BPP_INVALID; 1661 } // TruncToValidBPP 1662 1663 double dml32_RequiredDTBCLK( 1664 bool DSCEnable, 1665 double PixelClock, 1666 enum output_format_class OutputFormat, 1667 double OutputBpp, 1668 unsigned int DSCSlices, 1669 unsigned int HTotal, 1670 unsigned int HActive, 1671 unsigned int AudioRate, 1672 unsigned int AudioLayout) 1673 { 1674 double PixelWordRate; 1675 double HCActive; 1676 double HCBlank; 1677 double AverageTribyteRate; 1678 double HActiveTribyteRate; 1679 1680 if (DSCEnable != true) 1681 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); 1682 1683 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); 1684 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * 1685 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); 1686 HCBlank = 64 + 32 * 1687 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); 1688 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; 1689 HActiveTribyteRate = PixelWordRate * HCActive / HActive; 1690 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; 1691 } 1692 1693 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, 1694 enum odm_combine_mode ODMMode, 1695 unsigned int DSCInputBitPerComponent, 1696 double OutputBpp, 1697 unsigned int HActive, 1698 unsigned int HTotal, 1699 unsigned int NumberOfDSCSlices, 1700 enum output_format_class OutputFormat, 1701 enum output_encoder_class Output, 1702 double PixelClock, 1703 double PixelClockBackEnd) 1704 { 1705 unsigned int DSCDelayRequirement_val; 1706 1707 if (DSCEnabled == true && OutputBpp != 0) { 1708 if (ODMMode == dm_odm_combine_mode_4to1) { 1709 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1710 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, 1711 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1712 } else if (ODMMode == dm_odm_combine_mode_2to1) { 1713 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1714 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, 1715 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1716 } else { 1717 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1718 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, 1719 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); 1720 } 1721 1722 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * 1723 dml_ceil(DSCDelayRequirement_val / HActive, 1); 1724 1725 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; 1726 1727 } else { 1728 DSCDelayRequirement_val = 0; 1729 } 1730 1731 #ifdef __DML_VBA_DEBUG__ 1732 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); 1733 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); 1734 dml_print("DML::%s: HActive = %d\n", __func__, HActive); 1735 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); 1736 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); 1737 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); 1738 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); 1739 #endif 1740 1741 return DSCDelayRequirement_val; 1742 } 1743 1744 void dml32_CalculateSurfaceSizeInMall( 1745 unsigned int NumberOfActiveSurfaces, 1746 unsigned int MALLAllocatedForDCN, 1747 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1748 bool DCCEnable[], 1749 bool ViewportStationary[], 1750 unsigned int ViewportXStartY[], 1751 unsigned int ViewportYStartY[], 1752 unsigned int ViewportXStartC[], 1753 unsigned int ViewportYStartC[], 1754 unsigned int ViewportWidthY[], 1755 unsigned int ViewportHeightY[], 1756 unsigned int BytesPerPixelY[], 1757 unsigned int ViewportWidthC[], 1758 unsigned int ViewportHeightC[], 1759 unsigned int BytesPerPixelC[], 1760 unsigned int SurfaceWidthY[], 1761 unsigned int SurfaceWidthC[], 1762 unsigned int SurfaceHeightY[], 1763 unsigned int SurfaceHeightC[], 1764 unsigned int Read256BytesBlockWidthY[], 1765 unsigned int Read256BytesBlockWidthC[], 1766 unsigned int Read256BytesBlockHeightY[], 1767 unsigned int Read256BytesBlockHeightC[], 1768 unsigned int ReadBlockWidthY[], 1769 unsigned int ReadBlockWidthC[], 1770 unsigned int ReadBlockHeightY[], 1771 unsigned int ReadBlockHeightC[], 1772 1773 /* Output */ 1774 unsigned int SurfaceSizeInMALL[], 1775 bool *ExceededMALLSize) 1776 { 1777 unsigned int TotalSurfaceSizeInMALL = 0; 1778 unsigned int k; 1779 1780 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1781 if (ViewportStationary[k]) { 1782 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), 1783 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, 1784 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 1785 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], 1786 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1787 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - 1788 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; 1789 1790 if (ReadBlockWidthC[k] > 0) { 1791 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1792 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), 1793 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 1794 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - 1795 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * 1796 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), 1797 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1798 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - 1799 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * 1800 BytesPerPixelC[k]; 1801 } 1802 if (DCCEnable[k] == true) { 1803 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1804 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), 1805 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * 1806 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) 1807 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) 1808 * dml_min(dml_ceil(SurfaceHeightY[k], 8 * 1809 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1810 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * 1811 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 1812 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256; 1813 if (Read256BytesBlockWidthC[k] > 0) { 1814 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1815 dml_min(dml_ceil(SurfaceWidthC[k], 8 * 1816 Read256BytesBlockWidthC[k]), 1817 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 1818 * Read256BytesBlockWidthC[k] - 1, 8 * 1819 Read256BytesBlockWidthC[k]) - 1820 dml_floor(ViewportXStartC[k], 8 * 1821 Read256BytesBlockWidthC[k])) * 1822 dml_min(dml_ceil(SurfaceHeightC[k], 8 * 1823 Read256BytesBlockHeightC[k]), 1824 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1825 8 * Read256BytesBlockHeightC[k] - 1, 8 * 1826 Read256BytesBlockHeightC[k]) - 1827 dml_floor(ViewportYStartC[k], 8 * 1828 Read256BytesBlockHeightC[k])) * 1829 BytesPerPixelC[k] / 256; 1830 } 1831 } 1832 } else { 1833 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 1834 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * 1835 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 1836 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * 1837 BytesPerPixelY[k]; 1838 if (ReadBlockWidthC[k] > 0) { 1839 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1840 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 1841 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * 1842 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 1843 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * 1844 BytesPerPixelC[k]; 1845 } 1846 if (DCCEnable[k] == true) { 1847 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1848 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * 1849 Read256BytesBlockWidthY[k] - 1), 8 * 1850 Read256BytesBlockWidthY[k]) * 1851 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * 1852 Read256BytesBlockHeightY[k] - 1), 8 * 1853 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256; 1854 1855 if (Read256BytesBlockWidthC[k] > 0) { 1856 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1857 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * 1858 Read256BytesBlockWidthC[k] - 1), 8 * 1859 Read256BytesBlockWidthC[k]) * 1860 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * 1861 Read256BytesBlockHeightC[k] - 1), 8 * 1862 Read256BytesBlockHeightC[k]) * 1863 BytesPerPixelC[k] / 256; 1864 } 1865 } 1866 } 1867 } 1868 1869 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1870 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) 1871 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 1872 } 1873 *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true); 1874 } // CalculateSurfaceSizeInMall 1875 1876 void dml32_CalculateVMRowAndSwath( 1877 unsigned int NumberOfActiveSurfaces, 1878 DmlPipe myPipe[], 1879 unsigned int SurfaceSizeInMALL[], 1880 unsigned int PTEBufferSizeInRequestsLuma, 1881 unsigned int PTEBufferSizeInRequestsChroma, 1882 unsigned int DCCMetaBufferSizeBytes, 1883 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1884 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 1885 unsigned int MALLAllocatedForDCN, 1886 double SwathWidthY[], 1887 double SwathWidthC[], 1888 bool GPUVMEnable, 1889 bool HostVMEnable, 1890 unsigned int HostVMMaxNonCachedPageTableLevels, 1891 unsigned int GPUVMMaxPageTableLevels, 1892 unsigned int GPUVMMinPageSizeKBytes[], 1893 unsigned int HostVMMinPageSize, 1894 1895 /* Output */ 1896 bool PTEBufferSizeNotExceeded[], 1897 bool DCCMetaBufferSizeNotExceeded[], 1898 unsigned int dpte_row_width_luma_ub[], 1899 unsigned int dpte_row_width_chroma_ub[], 1900 unsigned int dpte_row_height_luma[], 1901 unsigned int dpte_row_height_chroma[], 1902 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA 1903 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA 1904 unsigned int meta_req_width[], 1905 unsigned int meta_req_width_chroma[], 1906 unsigned int meta_req_height[], 1907 unsigned int meta_req_height_chroma[], 1908 unsigned int meta_row_width[], 1909 unsigned int meta_row_width_chroma[], 1910 unsigned int meta_row_height[], 1911 unsigned int meta_row_height_chroma[], 1912 unsigned int vm_group_bytes[], 1913 unsigned int dpte_group_bytes[], 1914 unsigned int PixelPTEReqWidthY[], 1915 unsigned int PixelPTEReqHeightY[], 1916 unsigned int PTERequestSizeY[], 1917 unsigned int PixelPTEReqWidthC[], 1918 unsigned int PixelPTEReqHeightC[], 1919 unsigned int PTERequestSizeC[], 1920 unsigned int dpde0_bytes_per_frame_ub_l[], 1921 unsigned int meta_pte_bytes_per_frame_ub_l[], 1922 unsigned int dpde0_bytes_per_frame_ub_c[], 1923 unsigned int meta_pte_bytes_per_frame_ub_c[], 1924 double PrefetchSourceLinesY[], 1925 double PrefetchSourceLinesC[], 1926 double VInitPreFillY[], 1927 double VInitPreFillC[], 1928 unsigned int MaxNumSwathY[], 1929 unsigned int MaxNumSwathC[], 1930 double meta_row_bw[], 1931 double dpte_row_bw[], 1932 double PixelPTEBytesPerRow[], 1933 double PDEAndMetaPTEBytesFrame[], 1934 double MetaRowByte[], 1935 bool use_one_row_for_frame[], 1936 bool use_one_row_for_frame_flip[], 1937 bool UsesMALLForStaticScreen[], 1938 bool PTE_BUFFER_MODE[], 1939 unsigned int BIGK_FRAGMENT_SIZE[]) 1940 { 1941 unsigned int k; 1942 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; 1943 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; 1944 unsigned int PDEAndMetaPTEBytesFrameY; 1945 unsigned int PDEAndMetaPTEBytesFrameC; 1946 unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; 1947 unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; 1948 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; 1949 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; 1950 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; 1951 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; 1952 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1953 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; 1954 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1955 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; 1956 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; 1957 1958 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1959 if (HostVMEnable == true) { 1960 vm_group_bytes[k] = 512; 1961 dpte_group_bytes[k] = 512; 1962 } else if (GPUVMEnable == true) { 1963 vm_group_bytes[k] = 2048; 1964 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) 1965 dpte_group_bytes[k] = 512; 1966 else 1967 dpte_group_bytes[k] = 2048; 1968 } else { 1969 vm_group_bytes[k] = 0; 1970 dpte_group_bytes[k] = 0; 1971 } 1972 1973 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || 1974 myPipe[k].SourcePixelFormat == dm_420_12 || 1975 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { 1976 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && 1977 !IsVertical(myPipe[k].SourceRotation)) { 1978 PTEBufferSizeInRequestsForLuma[k] = 1979 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; 1980 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; 1981 } else { 1982 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 1983 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 1984 } 1985 1986 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 1987 myPipe[k].ViewportStationary, 1988 myPipe[k].DCCEnable, 1989 myPipe[k].DPPPerSurface, 1990 myPipe[k].BlockHeight256BytesC, 1991 myPipe[k].BlockWidth256BytesC, 1992 myPipe[k].SourcePixelFormat, 1993 myPipe[k].SurfaceTiling, 1994 myPipe[k].BytePerPixelC, 1995 myPipe[k].SourceRotation, 1996 SwathWidthC[k], 1997 myPipe[k].ViewportHeightChroma, 1998 myPipe[k].ViewportXStartC, 1999 myPipe[k].ViewportYStartC, 2000 GPUVMEnable, 2001 HostVMEnable, 2002 HostVMMaxNonCachedPageTableLevels, 2003 GPUVMMaxPageTableLevels, 2004 GPUVMMinPageSizeKBytes[k], 2005 HostVMMinPageSize, 2006 PTEBufferSizeInRequestsForChroma[k], 2007 myPipe[k].PitchC, 2008 myPipe[k].DCCMetaPitchC, 2009 myPipe[k].BlockWidthC, 2010 myPipe[k].BlockHeightC, 2011 2012 /* Output */ 2013 &MetaRowByteC[k], 2014 &PixelPTEBytesPerRowC[k], 2015 &dpte_row_width_chroma_ub[k], 2016 &dpte_row_height_chroma[k], 2017 &dpte_row_height_linear_chroma[k], 2018 &PixelPTEBytesPerRowC_one_row_per_frame[k], 2019 &dpte_row_width_chroma_ub_one_row_per_frame[k], 2020 &dpte_row_height_chroma_one_row_per_frame[k], 2021 &meta_req_width_chroma[k], 2022 &meta_req_height_chroma[k], 2023 &meta_row_width_chroma[k], 2024 &meta_row_height_chroma[k], 2025 &PixelPTEReqWidthC[k], 2026 &PixelPTEReqHeightC[k], 2027 &PTERequestSizeC[k], 2028 &dpde0_bytes_per_frame_ub_c[k], 2029 &meta_pte_bytes_per_frame_ub_c[k]); 2030 2031 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( 2032 myPipe[k].VRatioChroma, 2033 myPipe[k].VTapsChroma, 2034 myPipe[k].InterlaceEnable, 2035 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2036 myPipe[k].SwathHeightC, 2037 myPipe[k].SourceRotation, 2038 myPipe[k].ViewportStationary, 2039 SwathWidthC[k], 2040 myPipe[k].ViewportHeightChroma, 2041 myPipe[k].ViewportXStartC, 2042 myPipe[k].ViewportYStartC, 2043 2044 /* Output */ 2045 &VInitPreFillC[k], 2046 &MaxNumSwathC[k]); 2047 } else { 2048 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2049 PTEBufferSizeInRequestsForChroma[k] = 0; 2050 PixelPTEBytesPerRowC[k] = 0; 2051 PDEAndMetaPTEBytesFrameC = 0; 2052 MetaRowByteC[k] = 0; 2053 MaxNumSwathC[k] = 0; 2054 PrefetchSourceLinesC[k] = 0; 2055 dpte_row_height_chroma_one_row_per_frame[k] = 0; 2056 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2057 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2058 } 2059 2060 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2061 myPipe[k].ViewportStationary, 2062 myPipe[k].DCCEnable, 2063 myPipe[k].DPPPerSurface, 2064 myPipe[k].BlockHeight256BytesY, 2065 myPipe[k].BlockWidth256BytesY, 2066 myPipe[k].SourcePixelFormat, 2067 myPipe[k].SurfaceTiling, 2068 myPipe[k].BytePerPixelY, 2069 myPipe[k].SourceRotation, 2070 SwathWidthY[k], 2071 myPipe[k].ViewportHeight, 2072 myPipe[k].ViewportXStart, 2073 myPipe[k].ViewportYStart, 2074 GPUVMEnable, 2075 HostVMEnable, 2076 HostVMMaxNonCachedPageTableLevels, 2077 GPUVMMaxPageTableLevels, 2078 GPUVMMinPageSizeKBytes[k], 2079 HostVMMinPageSize, 2080 PTEBufferSizeInRequestsForLuma[k], 2081 myPipe[k].PitchY, 2082 myPipe[k].DCCMetaPitchY, 2083 myPipe[k].BlockWidthY, 2084 myPipe[k].BlockHeightY, 2085 2086 /* Output */ 2087 &MetaRowByteY[k], 2088 &PixelPTEBytesPerRowY[k], 2089 &dpte_row_width_luma_ub[k], 2090 &dpte_row_height_luma[k], 2091 &dpte_row_height_linear_luma[k], 2092 &PixelPTEBytesPerRowY_one_row_per_frame[k], 2093 &dpte_row_width_luma_ub_one_row_per_frame[k], 2094 &dpte_row_height_luma_one_row_per_frame[k], 2095 &meta_req_width[k], 2096 &meta_req_height[k], 2097 &meta_row_width[k], 2098 &meta_row_height[k], 2099 &PixelPTEReqWidthY[k], 2100 &PixelPTEReqHeightY[k], 2101 &PTERequestSizeY[k], 2102 &dpde0_bytes_per_frame_ub_l[k], 2103 &meta_pte_bytes_per_frame_ub_l[k]); 2104 2105 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( 2106 myPipe[k].VRatio, 2107 myPipe[k].VTaps, 2108 myPipe[k].InterlaceEnable, 2109 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2110 myPipe[k].SwathHeightY, 2111 myPipe[k].SourceRotation, 2112 myPipe[k].ViewportStationary, 2113 SwathWidthY[k], 2114 myPipe[k].ViewportHeight, 2115 myPipe[k].ViewportXStart, 2116 myPipe[k].ViewportYStart, 2117 2118 /* Output */ 2119 &VInitPreFillY[k], 2120 &MaxNumSwathY[k]); 2121 2122 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2123 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; 2124 2125 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && 2126 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { 2127 PTEBufferSizeNotExceeded[k] = true; 2128 } else { 2129 PTEBufferSizeNotExceeded[k] = false; 2130 } 2131 2132 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2133 PTEBufferSizeInRequestsForLuma[k] && 2134 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); 2135 } 2136 2137 dml32_CalculateMALLUseForStaticScreen( 2138 NumberOfActiveSurfaces, 2139 MALLAllocatedForDCN, 2140 UseMALLForStaticScreen, // mode 2141 SurfaceSizeInMALL, 2142 one_row_per_frame_fits_in_buffer, 2143 /* Output */ 2144 UsesMALLForStaticScreen); // boolen 2145 2146 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2147 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2148 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2149 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2150 (GPUVMMinPageSizeKBytes[k] > 64); 2151 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; 2152 } 2153 2154 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2155 #ifdef __DML_VBA_DEBUG__ 2156 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); 2157 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2158 #endif 2159 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2160 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2161 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2162 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); 2163 2164 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && 2165 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); 2166 2167 if (use_one_row_for_frame[k]) { 2168 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; 2169 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; 2170 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; 2171 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; 2172 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; 2173 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; 2174 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; 2175 } 2176 2177 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) 2178 DCCMetaBufferSizeNotExceeded[k] = true; 2179 else 2180 DCCMetaBufferSizeNotExceeded[k] = false; 2181 2182 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; 2183 if (use_one_row_for_frame[k]) 2184 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; 2185 2186 dml32_CalculateRowBandwidth( 2187 GPUVMEnable, 2188 myPipe[k].SourcePixelFormat, 2189 myPipe[k].VRatio, 2190 myPipe[k].VRatioChroma, 2191 myPipe[k].DCCEnable, 2192 myPipe[k].HTotal / myPipe[k].PixelClock, 2193 MetaRowByteY[k], MetaRowByteC[k], 2194 meta_row_height[k], 2195 meta_row_height_chroma[k], 2196 PixelPTEBytesPerRowY[k], 2197 PixelPTEBytesPerRowC[k], 2198 dpte_row_height_luma[k], 2199 dpte_row_height_chroma[k], 2200 2201 /* Output */ 2202 &meta_row_bw[k], 2203 &dpte_row_bw[k]); 2204 #ifdef __DML_VBA_DEBUG__ 2205 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); 2206 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", 2207 __func__, k, use_one_row_for_frame_flip[k]); 2208 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", 2209 __func__, k, UseMALLForPStateChange[k]); 2210 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); 2211 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", 2212 __func__, k, dpte_row_width_luma_ub[k]); 2213 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); 2214 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", 2215 __func__, k, dpte_row_height_chroma[k]); 2216 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", 2217 __func__, k, dpte_row_width_chroma_ub[k]); 2218 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); 2219 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); 2220 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", 2221 __func__, k, PTEBufferSizeNotExceeded[k]); 2222 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); 2223 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); 2224 #endif 2225 } 2226 } // CalculateVMRowAndSwath 2227 2228 unsigned int dml32_CalculateVMAndRowBytes( 2229 bool ViewportStationary, 2230 bool DCCEnable, 2231 unsigned int NumberOfDPPs, 2232 unsigned int BlockHeight256Bytes, 2233 unsigned int BlockWidth256Bytes, 2234 enum source_format_class SourcePixelFormat, 2235 unsigned int SurfaceTiling, 2236 unsigned int BytePerPixel, 2237 enum dm_rotation_angle SourceRotation, 2238 double SwathWidth, 2239 unsigned int ViewportHeight, 2240 unsigned int ViewportXStart, 2241 unsigned int ViewportYStart, 2242 bool GPUVMEnable, 2243 bool HostVMEnable, 2244 unsigned int HostVMMaxNonCachedPageTableLevels, 2245 unsigned int GPUVMMaxPageTableLevels, 2246 unsigned int GPUVMMinPageSizeKBytes, 2247 unsigned int HostVMMinPageSize, 2248 unsigned int PTEBufferSizeInRequests, 2249 unsigned int Pitch, 2250 unsigned int DCCMetaPitch, 2251 unsigned int MacroTileWidth, 2252 unsigned int MacroTileHeight, 2253 2254 /* Output */ 2255 unsigned int *MetaRowByte, 2256 unsigned int *PixelPTEBytesPerRow, 2257 unsigned int *dpte_row_width_ub, 2258 unsigned int *dpte_row_height, 2259 unsigned int *dpte_row_height_linear, 2260 unsigned int *PixelPTEBytesPerRow_one_row_per_frame, 2261 unsigned int *dpte_row_width_ub_one_row_per_frame, 2262 unsigned int *dpte_row_height_one_row_per_frame, 2263 unsigned int *MetaRequestWidth, 2264 unsigned int *MetaRequestHeight, 2265 unsigned int *meta_row_width, 2266 unsigned int *meta_row_height, 2267 unsigned int *PixelPTEReqWidth, 2268 unsigned int *PixelPTEReqHeight, 2269 unsigned int *PTERequestSize, 2270 unsigned int *DPDE0BytesFrame, 2271 unsigned int *MetaPTEBytesFrame) 2272 { 2273 unsigned int MPDEBytesFrame; 2274 unsigned int DCCMetaSurfaceBytes; 2275 unsigned int ExtraDPDEBytesFrame; 2276 unsigned int PDEAndMetaPTEBytesFrame; 2277 unsigned int HostVMDynamicLevels = 0; 2278 unsigned int MacroTileSizeBytes; 2279 unsigned int vp_height_meta_ub; 2280 unsigned int vp_height_dpte_ub; 2281 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this 2282 2283 if (GPUVMEnable == true && HostVMEnable == true) { 2284 if (HostVMMinPageSize < 2048) 2285 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 2286 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 2287 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 2288 else 2289 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 2290 } 2291 2292 *MetaRequestHeight = 8 * BlockHeight256Bytes; 2293 *MetaRequestWidth = 8 * BlockWidth256Bytes; 2294 if (SurfaceTiling == dm_sw_linear) { 2295 *meta_row_height = 32; 2296 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) 2297 - dml_floor(ViewportXStart, *MetaRequestWidth); 2298 } else if (!IsVertical(SourceRotation)) { 2299 *meta_row_height = *MetaRequestHeight; 2300 if (ViewportStationary && NumberOfDPPs == 1) { 2301 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, 2302 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); 2303 } else { 2304 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 2305 } 2306 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 2307 } else { 2308 *meta_row_height = *MetaRequestWidth; 2309 if (ViewportStationary && NumberOfDPPs == 1) { 2310 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, 2311 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); 2312 } else { 2313 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 2314 } 2315 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 2316 } 2317 2318 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2319 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 2320 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); 2321 } else if (!IsVertical(SourceRotation)) { 2322 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2323 } else { 2324 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2325 } 2326 2327 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; 2328 2329 if (GPUVMEnable == true) { 2330 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / 2331 (8 * 4.0 * 1024), 1) + 1) * 64; 2332 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); 2333 } else { 2334 *MetaPTEBytesFrame = 0; 2335 MPDEBytesFrame = 0; 2336 } 2337 2338 if (DCCEnable != true) { 2339 *MetaPTEBytesFrame = 0; 2340 MPDEBytesFrame = 0; 2341 *MetaRowByte = 0; 2342 } 2343 2344 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; 2345 2346 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { 2347 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2348 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + 2349 MacroTileHeight - 1, MacroTileHeight) - 2350 dml_floor(ViewportYStart, MacroTileHeight); 2351 } else if (!IsVertical(SourceRotation)) { 2352 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; 2353 } else { 2354 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; 2355 } 2356 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / 2357 (8 * 2097152), 1) + 1); 2358 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); 2359 } else { 2360 *DPDE0BytesFrame = 0; 2361 ExtraDPDEBytesFrame = 0; 2362 vp_height_dpte_ub = 0; 2363 } 2364 2365 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2366 2367 #ifdef __DML_VBA_DEBUG__ 2368 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 2369 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 2370 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); 2371 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); 2372 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); 2373 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); 2374 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); 2375 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); 2376 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); 2377 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2378 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2379 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2380 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2381 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2382 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); 2383 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); 2384 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); 2385 #endif 2386 2387 if (HostVMEnable == true) 2388 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2389 2390 if (SurfaceTiling == dm_sw_linear) { 2391 *PixelPTEReqHeight = 1; 2392 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2393 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2394 *PTERequestSize = 64; 2395 } else if (GPUVMMinPageSizeKBytes == 4) { 2396 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2397 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2398 *PTERequestSize = 128; 2399 } else { 2400 *PixelPTEReqHeight = MacroTileHeight; 2401 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); 2402 *PTERequestSize = 64; 2403 } 2404 #ifdef __DML_VBA_DEBUG__ 2405 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2406 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); 2407 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); 2408 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); 2409 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); 2410 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); 2411 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); 2412 #endif 2413 2414 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; 2415 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / 2416 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * 2417 (double) *PixelPTEReqWidth; 2418 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * 2419 *PTERequestSize; 2420 2421 if (SurfaceTiling == dm_sw_linear) { 2422 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2423 *PixelPTEReqWidth / Pitch), 1)); 2424 #ifdef __DML_VBA_DEBUG__ 2425 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, 2426 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); 2427 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, 2428 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); 2429 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, 2430 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2431 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, 2432 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2433 *PixelPTEReqWidth / Pitch), 1)); 2434 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2435 #endif 2436 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), 2437 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; 2438 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; 2439 2440 // VBA_DELTA, VBA doesn't have programming value for pte row height linear. 2441 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2442 PixelPTEReqWidth_linear / Pitch), 1); 2443 if (*dpte_row_height_linear > 128) 2444 *dpte_row_height_linear = 128; 2445 2446 } else if (!IsVertical(SourceRotation)) { 2447 *dpte_row_height = *PixelPTEReqHeight; 2448 2449 if (GPUVMMinPageSizeKBytes > 64) { 2450 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / 2451 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2452 } else if (ViewportStationary && (NumberOfDPPs == 1)) { 2453 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + 2454 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - 2455 dml_floor(ViewportXStart, *PixelPTEReqWidth); 2456 } else { 2457 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * 2458 *PixelPTEReqWidth; 2459 } 2460 2461 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2462 } else { 2463 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); 2464 2465 if (ViewportStationary && (NumberOfDPPs == 1)) { 2466 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, 2467 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); 2468 } else { 2469 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) 2470 * *PixelPTEReqHeight; 2471 } 2472 2473 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2474 } 2475 2476 if (GPUVMEnable != true) 2477 *PixelPTEBytesPerRow = 0; 2478 if (HostVMEnable == true) 2479 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2480 2481 #ifdef __DML_VBA_DEBUG__ 2482 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2483 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2484 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); 2485 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); 2486 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); 2487 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); 2488 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); 2489 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", 2490 __func__, *dpte_row_width_ub_one_row_per_frame); 2491 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", 2492 __func__, *PixelPTEBytesPerRow_one_row_per_frame); 2493 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", 2494 *MetaPTEBytesFrame); 2495 #endif 2496 2497 return PDEAndMetaPTEBytesFrame; 2498 } // CalculateVMAndRowBytes 2499 2500 double dml32_CalculatePrefetchSourceLines( 2501 double VRatio, 2502 unsigned int VTaps, 2503 bool Interlace, 2504 bool ProgressiveToInterlaceUnitInOPP, 2505 unsigned int SwathHeight, 2506 enum dm_rotation_angle SourceRotation, 2507 bool ViewportStationary, 2508 double SwathWidth, 2509 unsigned int ViewportHeight, 2510 unsigned int ViewportXStart, 2511 unsigned int ViewportYStart, 2512 2513 /* Output */ 2514 double *VInitPreFill, 2515 unsigned int *MaxNumSwath) 2516 { 2517 2518 unsigned int vp_start_rot; 2519 unsigned int sw0_tmp; 2520 unsigned int MaxPartialSwath; 2521 double numLines; 2522 2523 #ifdef __DML_VBA_DEBUG__ 2524 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 2525 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); 2526 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); 2527 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); 2528 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); 2529 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 2530 #endif 2531 if (ProgressiveToInterlaceUnitInOPP) 2532 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); 2533 else 2534 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 2535 2536 if (ViewportStationary) { 2537 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { 2538 vp_start_rot = SwathHeight - 2539 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); 2540 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { 2541 vp_start_rot = ViewportXStart; 2542 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { 2543 vp_start_rot = SwathHeight - 2544 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); 2545 } else { 2546 vp_start_rot = ViewportYStart; 2547 } 2548 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); 2549 if (sw0_tmp < *VInitPreFill) 2550 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; 2551 else 2552 *MaxNumSwath = 1; 2553 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); 2554 } else { 2555 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; 2556 if (*VInitPreFill > 1) 2557 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); 2558 else 2559 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); 2560 } 2561 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; 2562 2563 #ifdef __DML_VBA_DEBUG__ 2564 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); 2565 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); 2566 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 2567 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 2568 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); 2569 #endif 2570 return numLines; 2571 2572 } // CalculatePrefetchSourceLines 2573 2574 void dml32_CalculateMALLUseForStaticScreen( 2575 unsigned int NumberOfActiveSurfaces, 2576 unsigned int MALLAllocatedForDCNFinal, 2577 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, 2578 unsigned int SurfaceSizeInMALL[], 2579 bool one_row_per_frame_fits_in_buffer[], 2580 2581 /* output */ 2582 bool UsesMALLForStaticScreen[]) 2583 { 2584 unsigned int k; 2585 unsigned int SurfaceToAddToMALL; 2586 bool CanAddAnotherSurfaceToMALL; 2587 unsigned int TotalSurfaceSizeInMALL; 2588 2589 TotalSurfaceSizeInMALL = 0; 2590 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2591 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); 2592 if (UsesMALLForStaticScreen[k]) 2593 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 2594 #ifdef __DML_VBA_DEBUG__ 2595 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2596 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); 2597 #endif 2598 } 2599 2600 SurfaceToAddToMALL = 0; 2601 CanAddAnotherSurfaceToMALL = true; 2602 while (CanAddAnotherSurfaceToMALL) { 2603 CanAddAnotherSurfaceToMALL = false; 2604 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2605 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && 2606 !UsesMALLForStaticScreen[k] && 2607 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && 2608 one_row_per_frame_fits_in_buffer[k] && 2609 (!CanAddAnotherSurfaceToMALL || 2610 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { 2611 CanAddAnotherSurfaceToMALL = true; 2612 SurfaceToAddToMALL = k; 2613 #ifdef __DML_VBA_DEBUG__ 2614 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", 2615 __func__, k, UseMALLForStaticScreen[k]); 2616 #endif 2617 } 2618 } 2619 if (CanAddAnotherSurfaceToMALL) { 2620 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; 2621 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; 2622 2623 #ifdef __DML_VBA_DEBUG__ 2624 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); 2625 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); 2626 #endif 2627 2628 } 2629 } 2630 } 2631 2632 void dml32_CalculateRowBandwidth( 2633 bool GPUVMEnable, 2634 enum source_format_class SourcePixelFormat, 2635 double VRatio, 2636 double VRatioChroma, 2637 bool DCCEnable, 2638 double LineTime, 2639 unsigned int MetaRowByteLuma, 2640 unsigned int MetaRowByteChroma, 2641 unsigned int meta_row_height_luma, 2642 unsigned int meta_row_height_chroma, 2643 unsigned int PixelPTEBytesPerRowLuma, 2644 unsigned int PixelPTEBytesPerRowChroma, 2645 unsigned int dpte_row_height_luma, 2646 unsigned int dpte_row_height_chroma, 2647 /* Output */ 2648 double *meta_row_bw, 2649 double *dpte_row_bw) 2650 { 2651 if (DCCEnable != true) { 2652 *meta_row_bw = 0; 2653 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2654 SourcePixelFormat == dm_rgbe_alpha) { 2655 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * 2656 MetaRowByteChroma / (meta_row_height_chroma * LineTime); 2657 } else { 2658 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 2659 } 2660 2661 if (GPUVMEnable != true) { 2662 *dpte_row_bw = 0; 2663 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2664 SourcePixelFormat == dm_rgbe_alpha) { 2665 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + 2666 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 2667 } else { 2668 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 2669 } 2670 } 2671 2672 double dml32_CalculateUrgentLatency( 2673 double UrgentLatencyPixelDataOnly, 2674 double UrgentLatencyPixelMixedWithVMData, 2675 double UrgentLatencyVMDataOnly, 2676 bool DoUrgentLatencyAdjustment, 2677 double UrgentLatencyAdjustmentFabricClockComponent, 2678 double UrgentLatencyAdjustmentFabricClockReference, 2679 double FabricClock) 2680 { 2681 double ret; 2682 2683 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 2684 if (DoUrgentLatencyAdjustment == true) { 2685 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * 2686 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 2687 } 2688 return ret; 2689 } 2690 2691 void dml32_CalculateUrgentBurstFactor( 2692 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 2693 unsigned int swath_width_luma_ub, 2694 unsigned int swath_width_chroma_ub, 2695 unsigned int SwathHeightY, 2696 unsigned int SwathHeightC, 2697 double LineTime, 2698 double UrgentLatency, 2699 double CursorBufferSize, 2700 unsigned int CursorWidth, 2701 unsigned int CursorBPP, 2702 double VRatio, 2703 double VRatioC, 2704 double BytePerPixelInDETY, 2705 double BytePerPixelInDETC, 2706 unsigned int DETBufferSizeY, 2707 unsigned int DETBufferSizeC, 2708 /* Output */ 2709 double *UrgentBurstFactorCursor, 2710 double *UrgentBurstFactorLuma, 2711 double *UrgentBurstFactorChroma, 2712 bool *NotEnoughUrgentLatencyHiding) 2713 { 2714 double LinesInDETLuma; 2715 double LinesInDETChroma; 2716 unsigned int LinesInCursorBuffer; 2717 double CursorBufferSizeInTime; 2718 double DETBufferSizeInTimeLuma; 2719 double DETBufferSizeInTimeChroma; 2720 2721 *NotEnoughUrgentLatencyHiding = 0; 2722 2723 if (CursorWidth > 0) { 2724 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / 2725 (CursorWidth * CursorBPP / 8.0)), 1.0); 2726 if (VRatio > 0) { 2727 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 2728 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 2729 *NotEnoughUrgentLatencyHiding = 1; 2730 *UrgentBurstFactorCursor = 0; 2731 } else { 2732 *UrgentBurstFactorCursor = CursorBufferSizeInTime / 2733 (CursorBufferSizeInTime - UrgentLatency); 2734 } 2735 } else { 2736 *UrgentBurstFactorCursor = 1; 2737 } 2738 } 2739 2740 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : 2741 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; 2742 2743 if (VRatio > 0) { 2744 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 2745 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 2746 *NotEnoughUrgentLatencyHiding = 1; 2747 *UrgentBurstFactorLuma = 0; 2748 } else { 2749 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 2750 } 2751 } else { 2752 *UrgentBurstFactorLuma = 1; 2753 } 2754 2755 if (BytePerPixelInDETC > 0) { 2756 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 2757 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC 2758 / swath_width_chroma_ub; 2759 2760 if (VRatio > 0) { 2761 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 2762 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 2763 *NotEnoughUrgentLatencyHiding = 1; 2764 *UrgentBurstFactorChroma = 0; 2765 } else { 2766 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma 2767 / (DETBufferSizeInTimeChroma - UrgentLatency); 2768 } 2769 } else { 2770 *UrgentBurstFactorChroma = 1; 2771 } 2772 } 2773 } // CalculateUrgentBurstFactor 2774 2775 void dml32_CalculateDCFCLKDeepSleep( 2776 unsigned int NumberOfActiveSurfaces, 2777 unsigned int BytePerPixelY[], 2778 unsigned int BytePerPixelC[], 2779 double VRatio[], 2780 double VRatioChroma[], 2781 double SwathWidthY[], 2782 double SwathWidthC[], 2783 unsigned int DPPPerSurface[], 2784 double HRatio[], 2785 double HRatioChroma[], 2786 double PixelClock[], 2787 double PSCL_THROUGHPUT[], 2788 double PSCL_THROUGHPUT_CHROMA[], 2789 double Dppclk[], 2790 double ReadBandwidthLuma[], 2791 double ReadBandwidthChroma[], 2792 unsigned int ReturnBusWidth, 2793 2794 /* Output */ 2795 double *DCFClkDeepSleep) 2796 { 2797 unsigned int k; 2798 double DisplayPipeLineDeliveryTimeLuma; 2799 double DisplayPipeLineDeliveryTimeChroma; 2800 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; 2801 double ReadBandwidth = 0.0; 2802 2803 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2804 2805 if (VRatio[k] <= 1) { 2806 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] 2807 / PixelClock[k]; 2808 } else { 2809 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 2810 } 2811 if (BytePerPixelC[k] == 0) { 2812 DisplayPipeLineDeliveryTimeChroma = 0; 2813 } else { 2814 if (VRatioChroma[k] <= 1) { 2815 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * 2816 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 2817 } else { 2818 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] 2819 / Dppclk[k]; 2820 } 2821 } 2822 2823 if (BytePerPixelC[k] > 0) { 2824 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * 2825 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 2826 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 2827 32.0 / DisplayPipeLineDeliveryTimeChroma); 2828 } else { 2829 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 2830 64.0 / DisplayPipeLineDeliveryTimeLuma; 2831 } 2832 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); 2833 2834 #ifdef __DML_VBA_DEBUG__ 2835 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); 2836 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 2837 #endif 2838 } 2839 2840 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2841 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 2842 2843 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); 2844 2845 #ifdef __DML_VBA_DEBUG__ 2846 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); 2847 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); 2848 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); 2849 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); 2850 #endif 2851 2852 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2853 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 2854 #ifdef __DML_VBA_DEBUG__ 2855 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 2856 #endif 2857 } // CalculateDCFCLKDeepSleep 2858 2859 double dml32_CalculateWriteBackDelay( 2860 enum source_format_class WritebackPixelFormat, 2861 double WritebackHRatio, 2862 double WritebackVRatio, 2863 unsigned int WritebackVTaps, 2864 unsigned int WritebackDestinationWidth, 2865 unsigned int WritebackDestinationHeight, 2866 unsigned int WritebackSourceHeight, 2867 unsigned int HTotal) 2868 { 2869 double CalculateWriteBackDelay; 2870 double Line_length; 2871 double Output_lines_last_notclamped; 2872 double WritebackVInit; 2873 2874 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 2875 Line_length = dml_max((double) WritebackDestinationWidth, 2876 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); 2877 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - 2878 dml_ceil(((double)WritebackSourceHeight - 2879 (double) WritebackVInit) / (double)WritebackVRatio, 1.0); 2880 if (Output_lines_last_notclamped < 0) { 2881 CalculateWriteBackDelay = 0; 2882 } else { 2883 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + 2884 (HTotal - WritebackDestinationWidth) + 80; 2885 } 2886 return CalculateWriteBackDelay; 2887 } 2888 2889 void dml32_UseMinimumDCFCLK( 2890 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 2891 bool DRRDisplay[], 2892 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 2893 unsigned int MaxInterDCNTileRepeaters, 2894 unsigned int MaxPrefetchMode, 2895 double DRAMClockChangeLatencyFinal, 2896 double FCLKChangeLatency, 2897 double SREnterPlusExitTime, 2898 unsigned int ReturnBusWidth, 2899 unsigned int RoundTripPingLatencyCycles, 2900 unsigned int ReorderingBytes, 2901 unsigned int PixelChunkSizeInKByte, 2902 unsigned int MetaChunkSize, 2903 bool GPUVMEnable, 2904 unsigned int GPUVMMaxPageTableLevels, 2905 bool HostVMEnable, 2906 unsigned int NumberOfActiveSurfaces, 2907 double HostVMMinPageSize, 2908 unsigned int HostVMMaxNonCachedPageTableLevels, 2909 bool DynamicMetadataVMEnabled, 2910 bool ImmediateFlipRequirement, 2911 bool ProgressiveToInterlaceUnitInOPP, 2912 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, 2913 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, 2914 unsigned int VTotal[], 2915 unsigned int VActive[], 2916 unsigned int DynamicMetadataTransmittedBytes[], 2917 unsigned int DynamicMetadataLinesBeforeActiveRequired[], 2918 bool Interlace[], 2919 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], 2920 double RequiredDISPCLK[][2], 2921 double UrgLatency[], 2922 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 2923 double ProjectedDCFClkDeepSleep[][2], 2924 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 2925 unsigned int TotalNumberOfActiveDPP[][2], 2926 unsigned int TotalNumberOfDCCActiveDPP[][2], 2927 unsigned int dpte_group_bytes[], 2928 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 2929 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 2930 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 2931 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 2932 unsigned int BytePerPixelY[], 2933 unsigned int BytePerPixelC[], 2934 unsigned int HTotal[], 2935 double PixelClock[], 2936 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 2937 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 2938 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 2939 bool DynamicMetadataEnable[], 2940 double ReadBandwidthLuma[], 2941 double ReadBandwidthChroma[], 2942 double DCFCLKPerState[], 2943 /* Output */ 2944 double DCFCLKState[][2]) 2945 { 2946 unsigned int i, j, k; 2947 unsigned int dummy1; 2948 double dummy2, dummy3; 2949 double NormalEfficiency; 2950 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 2951 2952 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; 2953 for (i = 0; i < DC__VOLTAGE_STATES; ++i) { 2954 for (j = 0; j <= 1; ++j) { 2955 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 2956 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 2957 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; 2958 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 2959 double MinimumTWait = 0.0; 2960 double DPTEBandwidth; 2961 double DCFCLKRequiredForAverageBandwidth; 2962 unsigned int ExtraLatencyBytes; 2963 double ExtraLatencyCycles; 2964 double DCFCLKRequiredForPeakBandwidth; 2965 unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; 2966 double MinimumTvmPlus2Tr0; 2967 2968 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 2969 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2970 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 2971 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] 2972 / (15.75 * HTotal[k] / PixelClock[k]); 2973 } 2974 2975 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) 2976 NoOfDPPState[k] = NoOfDPP[i][j][k]; 2977 2978 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; 2979 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); 2980 2981 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, 2982 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, 2983 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, 2984 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, 2985 HostVMMaxNonCachedPageTableLevels); 2986 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ 2987 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 2988 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2989 double DCFCLKCyclesRequiredInPrefetch; 2990 double PrefetchTime; 2991 2992 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] 2993 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 2994 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] 2995 * BytePerPixelC[k]) / NormalEfficiency 2996 / ReturnBusWidth; 2997 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 2998 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency 2999 / NormalEfficiency / ReturnBusWidth 3000 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 3001 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency 3002 / ReturnBusWidth 3003 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth 3004 + PixelDCFCLKCyclesRequiredInPrefetch[k]; 3005 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) 3006 * HTotal[k] / PixelClock[k]; 3007 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && 3008 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 3009 UrgLatency[i] * GPUVMMaxPageTableLevels * 3010 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 3011 3012 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, 3013 UseMALLForPStateChange[k], 3014 SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3015 DRRDisplay[k], 3016 DRAMClockChangeLatencyFinal, 3017 FCLKChangeLatency, 3018 UrgLatency[i], 3019 SREnterPlusExitTime); 3020 3021 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - 3022 MinimumTWait - UrgLatency[i] * 3023 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : 3024 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? 3025 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - 3026 DynamicMetadataVMExtraLatency[k]; 3027 3028 if (PrefetchTime > 0) { 3029 double ExpectedVRatioPrefetch; 3030 3031 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * 3032 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3033 DCFCLKCyclesRequiredInPrefetch); 3034 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * 3035 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3036 PrefetchPixelLinesTime[k] * 3037 dml_max(1.0, ExpectedVRatioPrefetch) * 3038 dml_max(1.0, ExpectedVRatioPrefetch / 4); 3039 if (HostVMEnable == true || ImmediateFlipRequirement == true) { 3040 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3041 DCFCLKRequiredForPeakBandwidthPerSurface[k] + 3042 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / 3043 NormalEfficiency / ReturnBusWidth; 3044 } 3045 } else { 3046 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3047 } 3048 if (DynamicMetadataEnable[k] == true) { 3049 double TSetupPipe; 3050 double TdmbfPipe; 3051 double TdmsksPipe; 3052 double TdmecPipe; 3053 double AllowedTimeForUrgentExtraLatency; 3054 3055 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3056 MaxInterDCNTileRepeaters, 3057 RequiredDPPCLKPerSurface[i][j][k], 3058 RequiredDISPCLK[i][j], 3059 ProjectedDCFClkDeepSleep[i][j], 3060 PixelClock[k], 3061 HTotal[k], 3062 VTotal[k] - VActive[k], 3063 DynamicMetadataTransmittedBytes[k], 3064 DynamicMetadataLinesBeforeActiveRequired[k], 3065 Interlace[k], 3066 ProgressiveToInterlaceUnitInOPP, 3067 3068 /* output */ 3069 &TSetupPipe, 3070 &TdmbfPipe, 3071 &TdmecPipe, 3072 &TdmsksPipe, 3073 &dummy1, 3074 &dummy2, 3075 &dummy3); 3076 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / 3077 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - 3078 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 3079 if (AllowedTimeForUrgentExtraLatency > 0) 3080 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3081 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], 3082 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 3083 else 3084 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3085 } 3086 } 3087 DCFCLKRequiredForPeakBandwidth = 0; 3088 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { 3089 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + 3090 DCFCLKRequiredForPeakBandwidthPerSurface[k]; 3091 } 3092 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? 3093 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * 3094 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); 3095 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3096 double MaximumTvmPlus2Tr0PlusTsw; 3097 3098 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / 3099 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 3100 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 3101 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 3102 } else { 3103 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 3104 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - 3105 MinimumTvmPlus2Tr0 - 3106 PrefetchPixelLinesTime[k] / 4), 3107 (2 * ExtraLatencyCycles + 3108 PixelDCFCLKCyclesRequiredInPrefetch[k]) / 3109 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 3110 } 3111 } 3112 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * 3113 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 3114 } 3115 } 3116 } 3117 3118 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, 3119 unsigned int TotalNumberOfActiveDPP, 3120 unsigned int PixelChunkSizeInKByte, 3121 unsigned int TotalNumberOfDCCActiveDPP, 3122 unsigned int MetaChunkSize, 3123 bool GPUVMEnable, 3124 bool HostVMEnable, 3125 unsigned int NumberOfActiveSurfaces, 3126 unsigned int NumberOfDPP[], 3127 unsigned int dpte_group_bytes[], 3128 double HostVMInefficiencyFactor, 3129 double HostVMMinPageSize, 3130 unsigned int HostVMMaxNonCachedPageTableLevels) 3131 { 3132 unsigned int k; 3133 double ret; 3134 unsigned int HostVMDynamicLevels; 3135 3136 if (GPUVMEnable == true && HostVMEnable == true) { 3137 if (HostVMMinPageSize < 2048) 3138 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 3139 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 3140 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 3141 else 3142 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 3143 } else { 3144 HostVMDynamicLevels = 0; 3145 } 3146 3147 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + 3148 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 3149 3150 if (GPUVMEnable == true) { 3151 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3152 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * 3153 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 3154 } 3155 } 3156 return ret; 3157 } 3158 3159 void dml32_CalculateVUpdateAndDynamicMetadataParameters( 3160 unsigned int MaxInterDCNTileRepeaters, 3161 double Dppclk, 3162 double Dispclk, 3163 double DCFClkDeepSleep, 3164 double PixelClock, 3165 unsigned int HTotal, 3166 unsigned int VBlank, 3167 unsigned int DynamicMetadataTransmittedBytes, 3168 unsigned int DynamicMetadataLinesBeforeActiveRequired, 3169 unsigned int InterlaceEnable, 3170 bool ProgressiveToInterlaceUnitInOPP, 3171 3172 /* output */ 3173 double *TSetup, 3174 double *Tdmbf, 3175 double *Tdmec, 3176 double *Tdmsks, 3177 unsigned int *VUpdateOffsetPix, 3178 double *VUpdateWidthPix, 3179 double *VReadyOffsetPix) 3180 { 3181 double TotalRepeaterDelayTime; 3182 3183 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); 3184 *VUpdateWidthPix = 3185 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); 3186 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, 3187 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); 3188 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); 3189 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3190 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; 3191 *Tdmec = HTotal / PixelClock; 3192 3193 if (DynamicMetadataLinesBeforeActiveRequired == 0) 3194 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3195 else 3196 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3197 3198 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) 3199 *Tdmsks = *Tdmsks / 2; 3200 #ifdef __DML_VBA_DEBUG__ 3201 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3202 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3203 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3204 3205 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", 3206 __func__, DynamicMetadataLinesBeforeActiveRequired); 3207 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); 3208 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); 3209 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); 3210 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); 3211 #endif 3212 } 3213 3214 double dml32_CalculateTWait( 3215 unsigned int PrefetchMode, 3216 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 3217 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3218 bool DRRDisplay, 3219 double DRAMClockChangeLatency, 3220 double FCLKChangeLatency, 3221 double UrgentLatency, 3222 double SREnterPlusExitTime) 3223 { 3224 double TWait = 0.0; 3225 3226 if (PrefetchMode == 0 && 3227 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && 3228 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && 3229 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && 3230 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { 3231 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3232 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3233 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3234 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3235 TWait = dml_max(SREnterPlusExitTime, UrgentLatency); 3236 } else { 3237 TWait = UrgentLatency; 3238 } 3239 3240 #ifdef __DML_VBA_DEBUG__ 3241 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); 3242 dml_print("DML::%s: TWait = %f\n", __func__, TWait); 3243 #endif 3244 return TWait; 3245 } // CalculateTWait 3246 3247 // Function: get_return_bw_mbps 3248 // Megabyte per second 3249 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, 3250 const int VoltageLevel, 3251 const bool HostVMEnable, 3252 const double DCFCLK, 3253 const double FabricClock, 3254 const double DRAMSpeed) 3255 { 3256 double ReturnBW = 0.; 3257 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; 3258 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; 3259 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; 3260 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3261 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3262 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3263 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3264 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3265 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3266 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3267 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3268 3269 if (HostVMEnable != true) 3270 ReturnBW = PixelDataOnlyReturnBW; 3271 else 3272 ReturnBW = PixelMixedWithVMDataReturnBW; 3273 3274 #ifdef __DML_VBA_DEBUG__ 3275 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3276 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); 3277 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3278 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3279 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3280 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); 3281 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); 3282 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); 3283 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); 3284 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); 3285 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); 3286 #endif 3287 return ReturnBW; 3288 } 3289 3290 // Function: get_return_bw_mbps_vm_only 3291 // Megabyte per second 3292 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, 3293 const int VoltageLevel, 3294 const double DCFCLK, 3295 const double FabricClock, 3296 const double DRAMSpeed) 3297 { 3298 double VMDataOnlyReturnBW = dml_min3( 3299 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3300 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes 3301 * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3302 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes 3303 * (VoltageLevel < 2 ? 3304 soc->pct_ideal_dram_bw_after_urgent_strobe : 3305 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); 3306 #ifdef __DML_VBA_DEBUG__ 3307 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3308 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3309 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3310 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3311 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 3312 #endif 3313 return VMDataOnlyReturnBW; 3314 } 3315 3316 double dml32_CalculateExtraLatency( 3317 unsigned int RoundTripPingLatencyCycles, 3318 unsigned int ReorderingBytes, 3319 double DCFCLK, 3320 unsigned int TotalNumberOfActiveDPP, 3321 unsigned int PixelChunkSizeInKByte, 3322 unsigned int TotalNumberOfDCCActiveDPP, 3323 unsigned int MetaChunkSize, 3324 double ReturnBW, 3325 bool GPUVMEnable, 3326 bool HostVMEnable, 3327 unsigned int NumberOfActiveSurfaces, 3328 unsigned int NumberOfDPP[], 3329 unsigned int dpte_group_bytes[], 3330 double HostVMInefficiencyFactor, 3331 double HostVMMinPageSize, 3332 unsigned int HostVMMaxNonCachedPageTableLevels) 3333 { 3334 double ExtraLatencyBytes; 3335 double ExtraLatency; 3336 3337 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( 3338 ReorderingBytes, 3339 TotalNumberOfActiveDPP, 3340 PixelChunkSizeInKByte, 3341 TotalNumberOfDCCActiveDPP, 3342 MetaChunkSize, 3343 GPUVMEnable, 3344 HostVMEnable, 3345 NumberOfActiveSurfaces, 3346 NumberOfDPP, 3347 dpte_group_bytes, 3348 HostVMInefficiencyFactor, 3349 HostVMMinPageSize, 3350 HostVMMaxNonCachedPageTableLevels); 3351 3352 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 3353 3354 #ifdef __DML_VBA_DEBUG__ 3355 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 3356 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 3357 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 3358 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 3359 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 3360 #endif 3361 3362 return ExtraLatency; 3363 } // CalculateExtraLatency 3364 3365 bool dml32_CalculatePrefetchSchedule( 3366 double HostVMInefficiencyFactor, 3367 DmlPipe *myPipe, 3368 unsigned int DSCDelay, 3369 double DPPCLKDelaySubtotalPlusCNVCFormater, 3370 double DPPCLKDelaySCL, 3371 double DPPCLKDelaySCLLBOnly, 3372 double DPPCLKDelayCNVCCursor, 3373 double DISPCLKDelaySubtotal, 3374 unsigned int DPP_RECOUT_WIDTH, 3375 enum output_format_class OutputFormat, 3376 unsigned int MaxInterDCNTileRepeaters, 3377 unsigned int VStartup, 3378 unsigned int MaxVStartup, 3379 unsigned int GPUVMPageTableLevels, 3380 bool GPUVMEnable, 3381 bool HostVMEnable, 3382 unsigned int HostVMMaxNonCachedPageTableLevels, 3383 double HostVMMinPageSize, 3384 bool DynamicMetadataEnable, 3385 bool DynamicMetadataVMEnabled, 3386 int DynamicMetadataLinesBeforeActiveRequired, 3387 unsigned int DynamicMetadataTransmittedBytes, 3388 double UrgentLatency, 3389 double UrgentExtraLatency, 3390 double TCalc, 3391 unsigned int PDEAndMetaPTEBytesFrame, 3392 unsigned int MetaRowByte, 3393 unsigned int PixelPTEBytesPerRow, 3394 double PrefetchSourceLinesY, 3395 unsigned int SwathWidthY, 3396 unsigned int VInitPreFillY, 3397 unsigned int MaxNumSwathY, 3398 double PrefetchSourceLinesC, 3399 unsigned int SwathWidthC, 3400 unsigned int VInitPreFillC, 3401 unsigned int MaxNumSwathC, 3402 unsigned int swath_width_luma_ub, 3403 unsigned int swath_width_chroma_ub, 3404 unsigned int SwathHeightY, 3405 unsigned int SwathHeightC, 3406 double TWait, 3407 /* Output */ 3408 double *DSTXAfterScaler, 3409 double *DSTYAfterScaler, 3410 double *DestinationLinesForPrefetch, 3411 double *PrefetchBandwidth, 3412 double *DestinationLinesToRequestVMInVBlank, 3413 double *DestinationLinesToRequestRowInVBlank, 3414 double *VRatioPrefetchY, 3415 double *VRatioPrefetchC, 3416 double *RequiredPrefetchPixDataBWLuma, 3417 double *RequiredPrefetchPixDataBWChroma, 3418 bool *NotEnoughTimeForDynamicMetadata, 3419 double *Tno_bw, 3420 double *prefetch_vmrow_bw, 3421 double *Tdmdl_vm, 3422 double *Tdmdl, 3423 double *TSetup, 3424 unsigned int *VUpdateOffsetPix, 3425 double *VUpdateWidthPix, 3426 double *VReadyOffsetPix) 3427 { 3428 bool MyError = false; 3429 unsigned int DPPCycles, DISPCLKCycles; 3430 double DSTTotalPixelsAfterScaler; 3431 double LineTime; 3432 double dst_y_prefetch_equ; 3433 double prefetch_bw_oto; 3434 double Tvm_oto; 3435 double Tr0_oto; 3436 double Tvm_oto_lines; 3437 double Tr0_oto_lines; 3438 double dst_y_prefetch_oto; 3439 double TimeForFetchingMetaPTE = 0; 3440 double TimeForFetchingRowInVBlank = 0; 3441 double LinesToRequestPrefetchPixelData = 0; 3442 unsigned int HostVMDynamicLevelsTrips; 3443 double trip_to_mem; 3444 double Tvm_trips; 3445 double Tr0_trips; 3446 double Tvm_trips_rounded; 3447 double Tr0_trips_rounded; 3448 double Lsw_oto; 3449 double Tpre_rounded; 3450 double prefetch_bw_equ; 3451 double Tvm_equ; 3452 double Tr0_equ; 3453 double Tdmbf; 3454 double Tdmec; 3455 double Tdmsks; 3456 double prefetch_sw_bytes; 3457 double bytes_pp; 3458 double dep_bytes; 3459 unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; 3460 double min_Lsw; 3461 double Tsw_est1 = 0; 3462 double Tsw_est3 = 0; 3463 3464 if (GPUVMEnable == true && HostVMEnable == true) 3465 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3466 else 3467 HostVMDynamicLevelsTrips = 0; 3468 #ifdef __DML_VBA_DEBUG__ 3469 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 3470 dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); 3471 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); 3472 dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n", 3473 __func__, HostVMEnable, HostVMInefficiencyFactor); 3474 #endif 3475 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3476 MaxInterDCNTileRepeaters, 3477 myPipe->Dppclk, 3478 myPipe->Dispclk, 3479 myPipe->DCFClkDeepSleep, 3480 myPipe->PixelClock, 3481 myPipe->HTotal, 3482 myPipe->VBlank, 3483 DynamicMetadataTransmittedBytes, 3484 DynamicMetadataLinesBeforeActiveRequired, 3485 myPipe->InterlaceEnable, 3486 myPipe->ProgressiveToInterlaceUnitInOPP, 3487 TSetup, 3488 3489 /* output */ 3490 &Tdmbf, 3491 &Tdmec, 3492 &Tdmsks, 3493 VUpdateOffsetPix, 3494 VUpdateWidthPix, 3495 VReadyOffsetPix); 3496 3497 LineTime = myPipe->HTotal / myPipe->PixelClock; 3498 trip_to_mem = UrgentLatency; 3499 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 3500 3501 if (DynamicMetadataVMEnabled == true) 3502 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 3503 else 3504 *Tdmdl = TWait + UrgentExtraLatency; 3505 3506 #ifdef __DML_VBA_ALLOW_DELTA__ 3507 if (DynamicMetadataEnable == false) 3508 *Tdmdl = 0.0; 3509 #endif 3510 3511 if (DynamicMetadataEnable == true) { 3512 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 3513 *NotEnoughTimeForDynamicMetadata = true; 3514 #ifdef __DML_VBA_DEBUG__ 3515 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3516 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3517 __func__, Tdmbf); 3518 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3519 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3520 __func__, Tdmsks); 3521 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3522 __func__, *Tdmdl); 3523 #endif 3524 } else { 3525 *NotEnoughTimeForDynamicMetadata = false; 3526 } 3527 } else { 3528 *NotEnoughTimeForDynamicMetadata = false; 3529 } 3530 3531 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && 3532 GPUVMEnable == true ? TWait + Tvm_trips : 0); 3533 3534 if (myPipe->ScalerEnabled) 3535 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 3536 else 3537 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 3538 3539 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 3540 3541 DISPCLKCycles = DISPCLKDelaySubtotal; 3542 3543 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3544 return true; 3545 3546 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * 3547 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3548 3549 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) 3550 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH 3551 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? 3552 myPipe->HActive / 2 : 0) 3553 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3554 3555 #ifdef __DML_VBA_DEBUG__ 3556 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 3557 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3558 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3559 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 3560 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3561 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3562 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); 3563 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); 3564 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 3565 #endif 3566 3567 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 3568 *DSTYAfterScaler = 1; 3569 else 3570 *DSTYAfterScaler = 0; 3571 3572 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3573 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3574 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3575 #ifdef __DML_VBA_DEBUG__ 3576 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3577 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); 3578 #endif 3579 3580 MyError = false; 3581 3582 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 3583 3584 if (GPUVMEnable == true) { 3585 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; 3586 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3587 if (GPUVMPageTableLevels >= 3) { 3588 *Tno_bw = UrgentExtraLatency + trip_to_mem * 3589 (double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); 3590 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { 3591 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 3592 4.0 * LineTime; // VBA_ERROR 3593 *Tno_bw = UrgentExtraLatency; 3594 } else { 3595 *Tno_bw = 0; 3596 } 3597 } else if (myPipe->DCCEnable == true) { 3598 Tvm_trips_rounded = LineTime / 4.0; 3599 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3600 *Tno_bw = 0; 3601 } else { 3602 Tvm_trips_rounded = LineTime / 4.0; 3603 Tr0_trips_rounded = LineTime / 2.0; 3604 *Tno_bw = 0; 3605 } 3606 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); 3607 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); 3608 3609 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3610 || myPipe->SourcePixelFormat == dm_420_12) { 3611 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3612 } else { 3613 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3614 } 3615 3616 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3617 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3618 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3619 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 3620 3621 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 3622 min_Lsw = dml_max(min_Lsw, 1.0); 3623 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; 3624 3625 if (GPUVMEnable == true) { 3626 Tvm_oto = dml_max3( 3627 Tvm_trips, 3628 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 3629 LineTime / 4.0); 3630 } else 3631 Tvm_oto = LineTime / 4.0; 3632 3633 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 3634 Tr0_oto = dml_max4( 3635 Tr0_trips, 3636 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 3637 (LineTime - Tvm_oto)/2.0, 3638 LineTime / 4.0); 3639 #ifdef __DML_VBA_DEBUG__ 3640 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3641 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); 3642 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); 3643 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); 3644 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); 3645 #endif 3646 } else 3647 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 3648 3649 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 3650 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 3651 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 3652 3653 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - 3654 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3655 3656 #ifdef __DML_VBA_DEBUG__ 3657 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3658 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); 3659 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3660 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3661 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); 3662 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3663 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3664 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3665 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3666 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3667 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3668 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); 3669 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); 3670 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3671 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3672 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3673 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3674 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 3675 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 3676 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 3677 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 3678 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 3679 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); 3680 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); 3681 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); 3682 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); 3683 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); 3684 #endif 3685 3686 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 3687 Tpre_rounded = dst_y_prefetch_equ * LineTime; 3688 #ifdef __DML_VBA_DEBUG__ 3689 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); 3690 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); 3691 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3692 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3693 __func__, VStartup * LineTime); 3694 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3695 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3696 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 3697 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3698 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3699 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3700 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3701 __func__, *DSTYAfterScaler); 3702 #endif 3703 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3704 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3705 3706 if (prefetch_sw_bytes < dep_bytes) 3707 prefetch_sw_bytes = 2 * dep_bytes; 3708 3709 *PrefetchBandwidth = 0; 3710 *DestinationLinesToRequestVMInVBlank = 0; 3711 *DestinationLinesToRequestRowInVBlank = 0; 3712 *VRatioPrefetchY = 0; 3713 *VRatioPrefetchC = 0; 3714 *RequiredPrefetchPixDataBWLuma = 0; 3715 if (dst_y_prefetch_equ > 1) { 3716 double PrefetchBandwidth1; 3717 double PrefetchBandwidth2; 3718 double PrefetchBandwidth3; 3719 double PrefetchBandwidth4; 3720 3721 if (Tpre_rounded - *Tno_bw > 0) { 3722 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3723 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3724 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 3725 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 3726 } else 3727 PrefetchBandwidth1 = 0; 3728 3729 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) 3730 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 3731 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3732 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3733 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 3734 } 3735 3736 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 3737 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / 3738 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 3739 else 3740 PrefetchBandwidth2 = 0; 3741 3742 if (Tpre_rounded - Tvm_trips_rounded > 0) { 3743 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3744 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 3745 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 3746 } else 3747 PrefetchBandwidth3 = 0; 3748 3749 3750 if (VStartup == MaxVStartup && 3751 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * 3752 LineTime - Tvm_trips_rounded > 0) { 3753 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3754 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 3755 } 3756 3757 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { 3758 PrefetchBandwidth4 = prefetch_sw_bytes / 3759 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 3760 } else { 3761 PrefetchBandwidth4 = 0; 3762 } 3763 3764 #ifdef __DML_VBA_DEBUG__ 3765 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 3766 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3767 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 3768 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); 3769 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); 3770 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3771 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3772 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 3773 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); 3774 #endif 3775 { 3776 bool Case1OK; 3777 bool Case2OK; 3778 bool Case3OK; 3779 3780 if (PrefetchBandwidth1 > 0) { 3781 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3782 >= Tvm_trips_rounded 3783 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3784 / PrefetchBandwidth1 >= Tr0_trips_rounded) { 3785 Case1OK = true; 3786 } else { 3787 Case1OK = false; 3788 } 3789 } else { 3790 Case1OK = false; 3791 } 3792 3793 if (PrefetchBandwidth2 > 0) { 3794 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3795 >= Tvm_trips_rounded 3796 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3797 / PrefetchBandwidth2 < Tr0_trips_rounded) { 3798 Case2OK = true; 3799 } else { 3800 Case2OK = false; 3801 } 3802 } else { 3803 Case2OK = false; 3804 } 3805 3806 if (PrefetchBandwidth3 > 0) { 3807 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3808 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3809 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3810 Tr0_trips_rounded) { 3811 Case3OK = true; 3812 } else { 3813 Case3OK = false; 3814 } 3815 } else { 3816 Case3OK = false; 3817 } 3818 3819 if (Case1OK) 3820 prefetch_bw_equ = PrefetchBandwidth1; 3821 else if (Case2OK) 3822 prefetch_bw_equ = PrefetchBandwidth2; 3823 else if (Case3OK) 3824 prefetch_bw_equ = PrefetchBandwidth3; 3825 else 3826 prefetch_bw_equ = PrefetchBandwidth4; 3827 3828 #ifdef __DML_VBA_DEBUG__ 3829 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3830 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3831 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3832 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 3833 #endif 3834 3835 if (prefetch_bw_equ > 0) { 3836 if (GPUVMEnable == true) { 3837 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3838 HostVMInefficiencyFactor / prefetch_bw_equ, 3839 Tvm_trips, LineTime / 4); 3840 } else { 3841 Tvm_equ = LineTime / 4; 3842 } 3843 3844 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 3845 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3846 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, 3847 (LineTime - Tvm_equ) / 2, LineTime / 4); 3848 } else { 3849 Tr0_equ = (LineTime - Tvm_equ) / 2; 3850 } 3851 } else { 3852 Tvm_equ = 0; 3853 Tr0_equ = 0; 3854 #ifdef __DML_VBA_DEBUG__ 3855 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3856 #endif 3857 } 3858 } 3859 3860 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 3861 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 3862 TimeForFetchingMetaPTE = Tvm_oto; 3863 TimeForFetchingRowInVBlank = Tr0_oto; 3864 *PrefetchBandwidth = prefetch_bw_oto; 3865 } else { 3866 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3867 TimeForFetchingMetaPTE = Tvm_equ; 3868 TimeForFetchingRowInVBlank = Tr0_equ; 3869 *PrefetchBandwidth = prefetch_bw_equ; 3870 } 3871 3872 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 3873 3874 *DestinationLinesToRequestRowInVBlank = 3875 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 3876 3877 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3878 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3879 3880 #ifdef __DML_VBA_DEBUG__ 3881 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3882 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3883 __func__, *DestinationLinesToRequestVMInVBlank); 3884 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 3885 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3886 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3887 __func__, *DestinationLinesToRequestRowInVBlank); 3888 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3889 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 3890 #endif 3891 3892 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { 3893 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 3894 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3895 #ifdef __DML_VBA_DEBUG__ 3896 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3897 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 3898 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3899 #endif 3900 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3901 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3902 *VRatioPrefetchY = 3903 dml_max((double) PrefetchSourceLinesY / 3904 LinesToRequestPrefetchPixelData, 3905 (double) MaxNumSwathY * SwathHeightY / 3906 (LinesToRequestPrefetchPixelData - 3907 (VInitPreFillY - 3.0) / 2.0)); 3908 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3909 } else { 3910 MyError = true; 3911 *VRatioPrefetchY = 0; 3912 } 3913 #ifdef __DML_VBA_DEBUG__ 3914 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3915 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3916 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 3917 #endif 3918 } 3919 3920 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 3921 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3922 3923 #ifdef __DML_VBA_DEBUG__ 3924 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3925 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 3926 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3927 #endif 3928 if ((SwathHeightC > 4)) { 3929 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3930 *VRatioPrefetchC = 3931 dml_max(*VRatioPrefetchC, 3932 (double) MaxNumSwathC * SwathHeightC / 3933 (LinesToRequestPrefetchPixelData - 3934 (VInitPreFillC - 3.0) / 2.0)); 3935 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3936 } else { 3937 MyError = true; 3938 *VRatioPrefetchC = 0; 3939 } 3940 #ifdef __DML_VBA_DEBUG__ 3941 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3942 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3943 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 3944 #endif 3945 } 3946 3947 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 3948 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3949 / LineTime; 3950 3951 #ifdef __DML_VBA_DEBUG__ 3952 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3953 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3954 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3955 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 3956 __func__, *RequiredPrefetchPixDataBWLuma); 3957 #endif 3958 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 3959 LinesToRequestPrefetchPixelData 3960 * myPipe->BytePerPixelC 3961 * swath_width_chroma_ub / LineTime; 3962 } else { 3963 MyError = true; 3964 #ifdef __DML_VBA_DEBUG__ 3965 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 3966 __func__, LinesToRequestPrefetchPixelData); 3967 #endif 3968 *VRatioPrefetchY = 0; 3969 *VRatioPrefetchC = 0; 3970 *RequiredPrefetchPixDataBWLuma = 0; 3971 *RequiredPrefetchPixDataBWChroma = 0; 3972 } 3973 #ifdef __DML_VBA_DEBUG__ 3974 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 3975 (double)LinesToRequestPrefetchPixelData * LineTime + 3976 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 3977 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 3978 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 3979 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 3980 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 3981 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - 3982 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 3983 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 3984 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 3985 PixelPTEBytesPerRow); 3986 #endif 3987 } else { 3988 MyError = true; 3989 #ifdef __DML_VBA_DEBUG__ 3990 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 3991 __func__, dst_y_prefetch_equ); 3992 #endif 3993 } 3994 3995 { 3996 double prefetch_vm_bw; 3997 double prefetch_row_bw; 3998 3999 if (PDEAndMetaPTEBytesFrame == 0) { 4000 prefetch_vm_bw = 0; 4001 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 4002 #ifdef __DML_VBA_DEBUG__ 4003 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 4004 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 4005 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 4006 __func__, *DestinationLinesToRequestVMInVBlank); 4007 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 4008 #endif 4009 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 4010 (*DestinationLinesToRequestVMInVBlank * LineTime); 4011 #ifdef __DML_VBA_DEBUG__ 4012 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 4013 #endif 4014 } else { 4015 prefetch_vm_bw = 0; 4016 MyError = true; 4017 #ifdef __DML_VBA_DEBUG__ 4018 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", 4019 __func__, *DestinationLinesToRequestVMInVBlank); 4020 #endif 4021 } 4022 4023 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 4024 prefetch_row_bw = 0; 4025 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 4026 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 4027 (*DestinationLinesToRequestRowInVBlank * LineTime); 4028 4029 #ifdef __DML_VBA_DEBUG__ 4030 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 4031 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 4032 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 4033 __func__, *DestinationLinesToRequestRowInVBlank); 4034 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 4035 #endif 4036 } else { 4037 prefetch_row_bw = 0; 4038 MyError = true; 4039 #ifdef __DML_VBA_DEBUG__ 4040 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", 4041 __func__, *DestinationLinesToRequestRowInVBlank); 4042 #endif 4043 } 4044 4045 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 4046 } 4047 4048 if (MyError) { 4049 *PrefetchBandwidth = 0; 4050 TimeForFetchingMetaPTE = 0; 4051 TimeForFetchingRowInVBlank = 0; 4052 *DestinationLinesToRequestVMInVBlank = 0; 4053 *DestinationLinesToRequestRowInVBlank = 0; 4054 *DestinationLinesForPrefetch = 0; 4055 LinesToRequestPrefetchPixelData = 0; 4056 *VRatioPrefetchY = 0; 4057 *VRatioPrefetchC = 0; 4058 *RequiredPrefetchPixDataBWLuma = 0; 4059 *RequiredPrefetchPixDataBWChroma = 0; 4060 } 4061 4062 return MyError; 4063 } // CalculatePrefetchSchedule 4064 4065 void dml32_CalculateFlipSchedule( 4066 double HostVMInefficiencyFactor, 4067 double UrgentExtraLatency, 4068 double UrgentLatency, 4069 unsigned int GPUVMMaxPageTableLevels, 4070 bool HostVMEnable, 4071 unsigned int HostVMMaxNonCachedPageTableLevels, 4072 bool GPUVMEnable, 4073 double HostVMMinPageSize, 4074 double PDEAndMetaPTEBytesPerFrame, 4075 double MetaRowBytes, 4076 double DPTEBytesPerRow, 4077 double BandwidthAvailableForImmediateFlip, 4078 unsigned int TotImmediateFlipBytes, 4079 enum source_format_class SourcePixelFormat, 4080 double LineTime, 4081 double VRatio, 4082 double VRatioChroma, 4083 double Tno_bw, 4084 bool DCCEnable, 4085 unsigned int dpte_row_height, 4086 unsigned int meta_row_height, 4087 unsigned int dpte_row_height_chroma, 4088 unsigned int meta_row_height_chroma, 4089 bool use_one_row_for_frame_flip, 4090 4091 /* Output */ 4092 double *DestinationLinesToRequestVMInImmediateFlip, 4093 double *DestinationLinesToRequestRowInImmediateFlip, 4094 double *final_flip_bw, 4095 bool *ImmediateFlipSupportedForPipe) 4096 { 4097 double min_row_time = 0.0; 4098 unsigned int HostVMDynamicLevelsTrips; 4099 double TimeForFetchingMetaPTEImmediateFlip; 4100 double TimeForFetchingRowInVBlankImmediateFlip; 4101 double ImmediateFlipBW; 4102 4103 if (GPUVMEnable == true && HostVMEnable == true) 4104 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 4105 else 4106 HostVMDynamicLevelsTrips = 0; 4107 4108 #ifdef __DML_VBA_DEBUG__ 4109 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); 4110 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 4111 #endif 4112 4113 if (TotImmediateFlipBytes > 0) { 4114 if (use_one_row_for_frame_flip) { 4115 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * 4116 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4117 } else { 4118 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * 4119 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4120 } 4121 if (GPUVMEnable == true) { 4122 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * 4123 HostVMInefficiencyFactor / ImmediateFlipBW, 4124 UrgentExtraLatency + UrgentLatency * 4125 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 4126 LineTime / 4.0); 4127 } else { 4128 TimeForFetchingMetaPTEImmediateFlip = 0; 4129 } 4130 if ((GPUVMEnable == true || DCCEnable == true)) { 4131 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 4132 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 4133 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); 4134 } else { 4135 TimeForFetchingRowInVBlankImmediateFlip = 0; 4136 } 4137 4138 *DestinationLinesToRequestVMInImmediateFlip = 4139 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; 4140 *DestinationLinesToRequestRowInImmediateFlip = 4141 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; 4142 4143 if (GPUVMEnable == true) { 4144 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / 4145 (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 4146 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4147 (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 4148 } else if ((GPUVMEnable == true || DCCEnable == true)) { 4149 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4150 (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 4151 } else { 4152 *final_flip_bw = 0; 4153 } 4154 } else { 4155 TimeForFetchingMetaPTEImmediateFlip = 0; 4156 TimeForFetchingRowInVBlankImmediateFlip = 0; 4157 *DestinationLinesToRequestVMInImmediateFlip = 0; 4158 *DestinationLinesToRequestRowInImmediateFlip = 0; 4159 *final_flip_bw = 0; 4160 } 4161 4162 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 4163 if (GPUVMEnable == true && DCCEnable != true) { 4164 min_row_time = dml_min(dpte_row_height * 4165 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 4166 } else if (GPUVMEnable != true && DCCEnable == true) { 4167 min_row_time = dml_min(meta_row_height * 4168 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 4169 } else { 4170 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * 4171 LineTime / VRatio, dpte_row_height_chroma * LineTime / 4172 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); 4173 } 4174 } else { 4175 if (GPUVMEnable == true && DCCEnable != true) { 4176 min_row_time = dpte_row_height * LineTime / VRatio; 4177 } else if (GPUVMEnable != true && DCCEnable == true) { 4178 min_row_time = meta_row_height * LineTime / VRatio; 4179 } else { 4180 min_row_time = 4181 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 4182 } 4183 } 4184 4185 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 4186 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip 4187 > min_row_time) { 4188 *ImmediateFlipSupportedForPipe = false; 4189 } else { 4190 *ImmediateFlipSupportedForPipe = true; 4191 } 4192 4193 #ifdef __DML_VBA_DEBUG__ 4194 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 4195 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 4196 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", 4197 __func__, *DestinationLinesToRequestVMInImmediateFlip); 4198 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", 4199 __func__, *DestinationLinesToRequestRowInImmediateFlip); 4200 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 4201 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", 4202 __func__, TimeForFetchingRowInVBlankImmediateFlip); 4203 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 4204 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 4205 #endif 4206 } // CalculateFlipSchedule 4207 4208 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 4209 bool USRRetrainingRequiredFinal, 4210 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 4211 unsigned int PrefetchMode, 4212 unsigned int NumberOfActiveSurfaces, 4213 unsigned int MaxLineBufferLines, 4214 unsigned int LineBufferSize, 4215 unsigned int WritebackInterfaceBufferSize, 4216 double DCFCLK, 4217 double ReturnBW, 4218 bool SynchronizeTimingsFinal, 4219 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 4220 bool DRRDisplay[], 4221 unsigned int dpte_group_bytes[], 4222 unsigned int meta_row_height[], 4223 unsigned int meta_row_height_chroma[], 4224 SOCParametersList mmSOCParameters, 4225 unsigned int WritebackChunkSize, 4226 double SOCCLK, 4227 double DCFClkDeepSleep, 4228 unsigned int DETBufferSizeY[], 4229 unsigned int DETBufferSizeC[], 4230 unsigned int SwathHeightY[], 4231 unsigned int SwathHeightC[], 4232 unsigned int LBBitPerPixel[], 4233 double SwathWidthY[], 4234 double SwathWidthC[], 4235 double HRatio[], 4236 double HRatioChroma[], 4237 unsigned int VTaps[], 4238 unsigned int VTapsChroma[], 4239 double VRatio[], 4240 double VRatioChroma[], 4241 unsigned int HTotal[], 4242 unsigned int VTotal[], 4243 unsigned int VActive[], 4244 double PixelClock[], 4245 unsigned int BlendingAndTiming[], 4246 unsigned int DPPPerSurface[], 4247 double BytePerPixelDETY[], 4248 double BytePerPixelDETC[], 4249 double DSTXAfterScaler[], 4250 double DSTYAfterScaler[], 4251 bool WritebackEnable[], 4252 enum source_format_class WritebackPixelFormat[], 4253 double WritebackDestinationWidth[], 4254 double WritebackDestinationHeight[], 4255 double WritebackSourceHeight[], 4256 bool UnboundedRequestEnabled, 4257 unsigned int CompressedBufferSizeInkByte, 4258 4259 /* Output */ 4260 Watermarks *Watermark, 4261 enum clock_change_support *DRAMClockChangeSupport, 4262 double MaxActiveDRAMClockChangeLatencySupported[], 4263 unsigned int SubViewportLinesNeededInMALL[], 4264 enum dm_fclock_change_support *FCLKChangeSupport, 4265 double *MinActiveFCLKChangeLatencySupported, 4266 bool *USRRetrainingSupport, 4267 double ActiveDRAMClockChangeLatencyMargin[]) 4268 { 4269 unsigned int i, j, k; 4270 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; 4271 unsigned int DRAMClockChangeSupportNumber = 0; 4272 unsigned int LastSurfaceWithoutMargin; 4273 unsigned int DRAMClockChangeMethod = 0; 4274 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4275 double MinActiveFCLKChangeMargin = 0.; 4276 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4277 double ActiveClockChangeLatencyHidingY; 4278 double ActiveClockChangeLatencyHidingC; 4279 double ActiveClockChangeLatencyHiding; 4280 double EffectiveDETBufferSizeY; 4281 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; 4282 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; 4283 double TotalPixelBW = 0.0; 4284 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; 4285 double EffectiveLBLatencyHidingY; 4286 double EffectiveLBLatencyHidingC; 4287 double LinesInDETY[DC__NUM_DPP__MAX]; 4288 double LinesInDETC[DC__NUM_DPP__MAX]; 4289 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 4290 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; 4291 double FullDETBufferingTimeY; 4292 double FullDETBufferingTimeC; 4293 double WritebackDRAMClockChangeLatencyMargin; 4294 double WritebackFCLKChangeLatencyMargin; 4295 double WritebackLatencyHiding; 4296 bool SameTimingForFCLKChange; 4297 4298 unsigned int TotalActiveWriteback = 0; 4299 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; 4300 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 4301 4302 Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; 4303 Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency 4304 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; 4305 Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark; 4306 Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark; 4307 Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency 4308 + 10 / DCFClkDeepSleep; 4309 Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency 4310 + 10 / DCFClkDeepSleep; 4311 Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency 4312 + 10 / DCFClkDeepSleep; 4313 Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time 4314 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; 4315 4316 #ifdef __DML_VBA_DEBUG__ 4317 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); 4318 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); 4319 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); 4320 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark); 4321 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark); 4322 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark); 4323 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark); 4324 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark); 4325 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark); 4326 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark); 4327 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", 4328 __func__, Watermark->Z8StutterEnterPlusExitWatermark); 4329 #endif 4330 4331 4332 TotalActiveWriteback = 0; 4333 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4334 if (WritebackEnable[k] == true) 4335 TotalActiveWriteback = TotalActiveWriteback + 1; 4336 } 4337 4338 if (TotalActiveWriteback <= 1) { 4339 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; 4340 } else { 4341 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency 4342 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4343 } 4344 if (USRRetrainingRequiredFinal) 4345 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark 4346 + mmSOCParameters.USRRetrainingLatency; 4347 4348 if (TotalActiveWriteback <= 1) { 4349 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4350 + mmSOCParameters.WritebackLatency; 4351 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4352 + mmSOCParameters.WritebackLatency; 4353 } else { 4354 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4355 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4356 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4357 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK; 4358 } 4359 4360 if (USRRetrainingRequiredFinal) 4361 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark 4362 + mmSOCParameters.USRRetrainingLatency; 4363 4364 if (USRRetrainingRequiredFinal) 4365 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark 4366 + mmSOCParameters.USRRetrainingLatency; 4367 4368 #ifdef __DML_VBA_DEBUG__ 4369 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", 4370 __func__, Watermark->WritebackDRAMClockChangeWatermark); 4371 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark); 4372 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark); 4373 dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal); 4374 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); 4375 #endif 4376 4377 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4378 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + 4379 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); 4380 } 4381 4382 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4383 4384 LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); 4385 LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); 4386 4387 4388 #ifdef __DML_VBA_DEBUG__ 4389 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines); 4390 dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize); 4391 dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]); 4392 dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]); 4393 dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); 4394 #endif 4395 4396 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); 4397 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); 4398 EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4399 4400 if (UnboundedRequestEnabled) { 4401 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 4402 + CompressedBufferSizeInkByte * 1024 4403 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) 4404 / (HTotal[k] / PixelClock[k]) / TotalPixelBW; 4405 } 4406 4407 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4408 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 4409 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; 4410 4411 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 4412 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; 4413 4414 if (NumberOfActiveSurfaces > 1) { 4415 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY 4416 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] 4417 / PixelClock[k] / VRatio[k]; 4418 } 4419 4420 if (BytePerPixelDETC[k] > 0) { 4421 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4422 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); 4423 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) 4424 / VRatioChroma[k]; 4425 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 4426 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] 4427 / PixelClock[k]; 4428 if (NumberOfActiveSurfaces > 1) { 4429 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC 4430 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] 4431 / PixelClock[k] / VRatioChroma[k]; 4432 } 4433 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, 4434 ActiveClockChangeLatencyHidingC); 4435 } else { 4436 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; 4437 } 4438 4439 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark 4440 - Watermark->DRAMClockChangeWatermark; 4441 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark 4442 - Watermark->FCLKChangeWatermark; 4443 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; 4444 4445 if (WritebackEnable[k]) { 4446 WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 4447 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] 4448 / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); 4449 if (WritebackPixelFormat[k] == dm_444_64) 4450 WritebackLatencyHiding = WritebackLatencyHiding / 2; 4451 4452 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding 4453 - Watermark->WritebackDRAMClockChangeWatermark; 4454 4455 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding 4456 - Watermark->WritebackFCLKChangeWatermark; 4457 4458 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], 4459 WritebackFCLKChangeLatencyMargin); 4460 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], 4461 WritebackDRAMClockChangeLatencyMargin); 4462 } 4463 MaxActiveDRAMClockChangeLatencySupported[k] = 4464 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 4465 0 : 4466 (ActiveDRAMClockChangeLatencyMargin[k] 4467 + mmSOCParameters.DRAMClockChangeLatency); 4468 } 4469 4470 for (i = 0; i < NumberOfActiveSurfaces; ++i) { 4471 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 4472 if (i == j || 4473 (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) || 4474 (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) || 4475 (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) || 4476 (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] && 4477 HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && 4478 VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && 4479 (DRRDisplay[i] || DRRDisplay[j]))) { 4480 SynchronizedSurfaces[i][j] = true; 4481 } else { 4482 SynchronizedSurfaces[i][j] = false; 4483 } 4484 } 4485 } 4486 4487 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4488 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4489 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4490 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { 4491 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4492 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; 4493 SurfaceWithMinActiveFCLKChangeMargin = k; 4494 } 4495 } 4496 4497 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4498 4499 SameTimingForFCLKChange = true; 4500 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4501 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { 4502 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4503 (SameTimingForFCLKChange || 4504 ActiveFCLKChangeLatencyMargin[k] < 4505 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4506 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; 4507 } 4508 SameTimingForFCLKChange = false; 4509 } 4510 } 4511 4512 if (MinActiveFCLKChangeMargin > 0) { 4513 *FCLKChangeSupport = dm_fclock_change_vactive; 4514 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4515 (PrefetchMode <= 1)) { 4516 *FCLKChangeSupport = dm_fclock_change_vblank; 4517 } else { 4518 *FCLKChangeSupport = dm_fclock_change_unsupported; 4519 } 4520 4521 *USRRetrainingSupport = true; 4522 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4523 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4524 (USRRetrainingLatencyMargin[k] < 0)) { 4525 *USRRetrainingSupport = false; 4526 } 4527 } 4528 4529 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4530 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && 4531 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && 4532 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && 4533 ActiveDRAMClockChangeLatencyMargin[k] < 0) { 4534 if (PrefetchMode > 0) { 4535 DRAMClockChangeSupportNumber = 2; 4536 } else if (DRAMClockChangeSupportNumber == 0) { 4537 DRAMClockChangeSupportNumber = 1; 4538 LastSurfaceWithoutMargin = k; 4539 } else if (DRAMClockChangeSupportNumber == 1 && 4540 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { 4541 DRAMClockChangeSupportNumber = 2; 4542 } 4543 } 4544 } 4545 4546 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4547 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) 4548 DRAMClockChangeMethod = 1; 4549 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) 4550 DRAMClockChangeMethod = 2; 4551 } 4552 4553 if (DRAMClockChangeMethod == 0) { 4554 if (DRAMClockChangeSupportNumber == 0) 4555 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 4556 else if (DRAMClockChangeSupportNumber == 1) 4557 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 4558 else 4559 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4560 } else if (DRAMClockChangeMethod == 1) { 4561 if (DRAMClockChangeSupportNumber == 0) 4562 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; 4563 else if (DRAMClockChangeSupportNumber == 1) 4564 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; 4565 else 4566 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4567 } else { 4568 if (DRAMClockChangeSupportNumber == 0) 4569 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; 4570 else if (DRAMClockChangeSupportNumber == 1) 4571 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; 4572 else 4573 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4574 } 4575 4576 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4577 unsigned int dst_y_pstate; 4578 unsigned int src_y_pstate_l; 4579 unsigned int src_y_pstate_c; 4580 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; 4581 4582 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); 4583 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); 4584 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; 4585 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; 4586 4587 #ifdef __DML_VBA_DEBUG__ 4588 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 4589 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 4590 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 4591 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 4592 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); 4593 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); 4594 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); 4595 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); 4596 dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]); 4597 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); 4598 #endif 4599 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; 4600 4601 if (BytePerPixelDETC[k] > 0) { 4602 src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); 4603 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; 4604 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; 4605 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); 4606 4607 #ifdef __DML_VBA_DEBUG__ 4608 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); 4609 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); 4610 dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]); 4611 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); 4612 #endif 4613 } 4614 } 4615 #ifdef __DML_VBA_DEBUG__ 4616 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); 4617 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); 4618 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", 4619 __func__, *MinActiveFCLKChangeLatencySupported); 4620 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); 4621 #endif 4622 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport 4623 4624 double dml32_CalculateWriteBackDISPCLK( 4625 enum source_format_class WritebackPixelFormat, 4626 double PixelClock, 4627 double WritebackHRatio, 4628 double WritebackVRatio, 4629 unsigned int WritebackHTaps, 4630 unsigned int WritebackVTaps, 4631 unsigned int WritebackSourceWidth, 4632 unsigned int WritebackDestinationWidth, 4633 unsigned int HTotal, 4634 unsigned int WritebackLineBufferSize, 4635 double DISPCLKDPPCLKVCOSpeed) 4636 { 4637 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 4638 4639 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 4640 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 4641 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * 4642 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 4643 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); 4644 } 4645 4646 void dml32_CalculateMinAndMaxPrefetchMode( 4647 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, 4648 unsigned int *MinPrefetchMode, 4649 unsigned int *MaxPrefetchMode) 4650 { 4651 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { 4652 *MinPrefetchMode = 3; 4653 *MaxPrefetchMode = 3; 4654 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { 4655 *MinPrefetchMode = 2; 4656 *MaxPrefetchMode = 2; 4657 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { 4658 *MinPrefetchMode = 1; 4659 *MaxPrefetchMode = 1; 4660 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { 4661 *MinPrefetchMode = 0; 4662 *MaxPrefetchMode = 0; 4663 } else if (AllowForPStateChangeOrStutterInVBlankFinal == 4664 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) { 4665 *MinPrefetchMode = 0; 4666 *MaxPrefetchMode = 3; 4667 } else { 4668 *MinPrefetchMode = 0; 4669 *MaxPrefetchMode = 3; 4670 } 4671 } // CalculateMinAndMaxPrefetchMode 4672 4673 void dml32_CalculatePixelDeliveryTimes( 4674 unsigned int NumberOfActiveSurfaces, 4675 double VRatio[], 4676 double VRatioChroma[], 4677 double VRatioPrefetchY[], 4678 double VRatioPrefetchC[], 4679 unsigned int swath_width_luma_ub[], 4680 unsigned int swath_width_chroma_ub[], 4681 unsigned int DPPPerSurface[], 4682 double HRatio[], 4683 double HRatioChroma[], 4684 double PixelClock[], 4685 double PSCL_THROUGHPUT[], 4686 double PSCL_THROUGHPUT_CHROMA[], 4687 double Dppclk[], 4688 unsigned int BytePerPixelC[], 4689 enum dm_rotation_angle SourceRotation[], 4690 unsigned int NumberOfCursors[], 4691 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 4692 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 4693 unsigned int BlockWidth256BytesY[], 4694 unsigned int BlockHeight256BytesY[], 4695 unsigned int BlockWidth256BytesC[], 4696 unsigned int BlockHeight256BytesC[], 4697 4698 /* Output */ 4699 double DisplayPipeLineDeliveryTimeLuma[], 4700 double DisplayPipeLineDeliveryTimeChroma[], 4701 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 4702 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 4703 double DisplayPipeRequestDeliveryTimeLuma[], 4704 double DisplayPipeRequestDeliveryTimeChroma[], 4705 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 4706 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 4707 double CursorRequestDeliveryTime[], 4708 double CursorRequestDeliveryTimePrefetch[]) 4709 { 4710 double req_per_swath_ub; 4711 unsigned int k; 4712 4713 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4714 4715 #ifdef __DML_VBA_DEBUG__ 4716 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 4717 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 4718 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 4719 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 4720 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 4721 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 4722 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); 4723 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); 4724 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 4725 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); 4726 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); 4727 #endif 4728 4729 if (VRatio[k] <= 1) { 4730 DisplayPipeLineDeliveryTimeLuma[k] = 4731 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4732 } else { 4733 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4734 } 4735 4736 if (BytePerPixelC[k] == 0) { 4737 DisplayPipeLineDeliveryTimeChroma[k] = 0; 4738 } else { 4739 if (VRatioChroma[k] <= 1) { 4740 DisplayPipeLineDeliveryTimeChroma[k] = 4741 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4742 } else { 4743 DisplayPipeLineDeliveryTimeChroma[k] = 4744 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4745 } 4746 } 4747 4748 if (VRatioPrefetchY[k] <= 1) { 4749 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4750 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4751 } else { 4752 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4753 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4754 } 4755 4756 if (BytePerPixelC[k] == 0) { 4757 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 4758 } else { 4759 if (VRatioPrefetchC[k] <= 1) { 4760 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * 4761 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4762 } else { 4763 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 4764 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4765 } 4766 } 4767 #ifdef __DML_VBA_DEBUG__ 4768 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", 4769 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 4770 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", 4771 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 4772 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", 4773 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 4774 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", 4775 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 4776 #endif 4777 } 4778 4779 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4780 if (!IsVertical(SourceRotation[k])) 4781 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 4782 else 4783 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 4784 #ifdef __DML_VBA_DEBUG__ 4785 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); 4786 #endif 4787 4788 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 4789 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = 4790 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 4791 if (BytePerPixelC[k] == 0) { 4792 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 4793 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 4794 } else { 4795 if (!IsVertical(SourceRotation[k])) 4796 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 4797 else 4798 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 4799 #ifdef __DML_VBA_DEBUG__ 4800 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); 4801 #endif 4802 DisplayPipeRequestDeliveryTimeChroma[k] = 4803 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 4804 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 4805 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 4806 } 4807 #ifdef __DML_VBA_DEBUG__ 4808 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", 4809 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 4810 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", 4811 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 4812 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", 4813 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 4814 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", 4815 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 4816 #endif 4817 } 4818 4819 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4820 unsigned int cursor_req_per_width; 4821 4822 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / 4823 256.0 / 8.0, 1.0); 4824 if (NumberOfCursors[k] > 0) { 4825 if (VRatio[k] <= 1) { 4826 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4827 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4828 } else { 4829 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4830 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4831 } 4832 if (VRatioPrefetchY[k] <= 1) { 4833 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4834 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4835 } else { 4836 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4837 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4838 } 4839 } else { 4840 CursorRequestDeliveryTime[k] = 0; 4841 CursorRequestDeliveryTimePrefetch[k] = 0; 4842 } 4843 #ifdef __DML_VBA_DEBUG__ 4844 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", 4845 __func__, k, NumberOfCursors[k]); 4846 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", 4847 __func__, k, CursorRequestDeliveryTime[k]); 4848 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", 4849 __func__, k, CursorRequestDeliveryTimePrefetch[k]); 4850 #endif 4851 } 4852 } // CalculatePixelDeliveryTimes 4853 4854 void dml32_CalculateMetaAndPTETimes( 4855 bool use_one_row_for_frame[], 4856 unsigned int NumberOfActiveSurfaces, 4857 bool GPUVMEnable, 4858 unsigned int MetaChunkSize, 4859 unsigned int MinMetaChunkSizeBytes, 4860 unsigned int HTotal[], 4861 double VRatio[], 4862 double VRatioChroma[], 4863 double DestinationLinesToRequestRowInVBlank[], 4864 double DestinationLinesToRequestRowInImmediateFlip[], 4865 bool DCCEnable[], 4866 double PixelClock[], 4867 unsigned int BytePerPixelY[], 4868 unsigned int BytePerPixelC[], 4869 enum dm_rotation_angle SourceRotation[], 4870 unsigned int dpte_row_height[], 4871 unsigned int dpte_row_height_chroma[], 4872 unsigned int meta_row_width[], 4873 unsigned int meta_row_width_chroma[], 4874 unsigned int meta_row_height[], 4875 unsigned int meta_row_height_chroma[], 4876 unsigned int meta_req_width[], 4877 unsigned int meta_req_width_chroma[], 4878 unsigned int meta_req_height[], 4879 unsigned int meta_req_height_chroma[], 4880 unsigned int dpte_group_bytes[], 4881 unsigned int PTERequestSizeY[], 4882 unsigned int PTERequestSizeC[], 4883 unsigned int PixelPTEReqWidthY[], 4884 unsigned int PixelPTEReqHeightY[], 4885 unsigned int PixelPTEReqWidthC[], 4886 unsigned int PixelPTEReqHeightC[], 4887 unsigned int dpte_row_width_luma_ub[], 4888 unsigned int dpte_row_width_chroma_ub[], 4889 4890 /* Output */ 4891 double DST_Y_PER_PTE_ROW_NOM_L[], 4892 double DST_Y_PER_PTE_ROW_NOM_C[], 4893 double DST_Y_PER_META_ROW_NOM_L[], 4894 double DST_Y_PER_META_ROW_NOM_C[], 4895 double TimePerMetaChunkNominal[], 4896 double TimePerChromaMetaChunkNominal[], 4897 double TimePerMetaChunkVBlank[], 4898 double TimePerChromaMetaChunkVBlank[], 4899 double TimePerMetaChunkFlip[], 4900 double TimePerChromaMetaChunkFlip[], 4901 double time_per_pte_group_nom_luma[], 4902 double time_per_pte_group_vblank_luma[], 4903 double time_per_pte_group_flip_luma[], 4904 double time_per_pte_group_nom_chroma[], 4905 double time_per_pte_group_vblank_chroma[], 4906 double time_per_pte_group_flip_chroma[]) 4907 { 4908 unsigned int meta_chunk_width; 4909 unsigned int min_meta_chunk_width; 4910 unsigned int meta_chunk_per_row_int; 4911 unsigned int meta_row_remainder; 4912 unsigned int meta_chunk_threshold; 4913 unsigned int meta_chunks_per_row_ub; 4914 unsigned int meta_chunk_width_chroma; 4915 unsigned int min_meta_chunk_width_chroma; 4916 unsigned int meta_chunk_per_row_int_chroma; 4917 unsigned int meta_row_remainder_chroma; 4918 unsigned int meta_chunk_threshold_chroma; 4919 unsigned int meta_chunks_per_row_ub_chroma; 4920 unsigned int dpte_group_width_luma; 4921 unsigned int dpte_groups_per_row_luma_ub; 4922 unsigned int dpte_group_width_chroma; 4923 unsigned int dpte_groups_per_row_chroma_ub; 4924 unsigned int k; 4925 4926 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4927 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 4928 if (BytePerPixelC[k] == 0) 4929 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 4930 else 4931 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 4932 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 4933 if (BytePerPixelC[k] == 0) 4934 DST_Y_PER_META_ROW_NOM_C[k] = 0; 4935 else 4936 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 4937 } 4938 4939 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4940 if (DCCEnable[k] == true) { 4941 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 4942 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 4943 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 4944 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 4945 if (!IsVertical(SourceRotation[k])) 4946 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 4947 else 4948 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 4949 4950 if (meta_row_remainder <= meta_chunk_threshold) 4951 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 4952 else 4953 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 4954 4955 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * 4956 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4957 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4958 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4959 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4960 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4961 if (BytePerPixelC[k] == 0) { 4962 TimePerChromaMetaChunkNominal[k] = 0; 4963 TimePerChromaMetaChunkVBlank[k] = 0; 4964 TimePerChromaMetaChunkFlip[k] = 0; 4965 } else { 4966 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / 4967 meta_row_height_chroma[k]; 4968 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / 4969 meta_row_height_chroma[k]; 4970 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / 4971 meta_chunk_width_chroma; 4972 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 4973 if (!IsVertical(SourceRotation[k])) { 4974 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4975 meta_req_width_chroma[k]; 4976 } else { 4977 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4978 meta_req_height_chroma[k]; 4979 } 4980 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) 4981 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 4982 else 4983 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 4984 4985 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * 4986 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4987 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4988 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4989 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4990 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4991 } 4992 } else { 4993 TimePerMetaChunkNominal[k] = 0; 4994 TimePerMetaChunkVBlank[k] = 0; 4995 TimePerMetaChunkFlip[k] = 0; 4996 TimePerChromaMetaChunkNominal[k] = 0; 4997 TimePerChromaMetaChunkVBlank[k] = 0; 4998 TimePerChromaMetaChunkFlip[k] = 0; 4999 } 5000 } 5001 5002 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5003 if (GPUVMEnable == true) { 5004 if (!IsVertical(SourceRotation[k])) { 5005 dpte_group_width_luma = (double) dpte_group_bytes[k] / 5006 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 5007 } else { 5008 dpte_group_width_luma = (double) dpte_group_bytes[k] / 5009 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 5010 } 5011 5012 if (use_one_row_for_frame[k]) { 5013 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5014 (double) dpte_group_width_luma / 2.0, 1.0); 5015 } else { 5016 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5017 (double) dpte_group_width_luma, 1.0); 5018 } 5019 #ifdef __DML_VBA_DEBUG__ 5020 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", 5021 __func__, k, use_one_row_for_frame[k]); 5022 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", 5023 __func__, k, dpte_group_bytes[k]); 5024 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", 5025 __func__, k, PTERequestSizeY[k]); 5026 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", 5027 __func__, k, PixelPTEReqWidthY[k]); 5028 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", 5029 __func__, k, PixelPTEReqHeightY[k]); 5030 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", 5031 __func__, k, dpte_row_width_luma_ub[k]); 5032 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", 5033 __func__, k, dpte_group_width_luma); 5034 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", 5035 __func__, k, dpte_groups_per_row_luma_ub); 5036 #endif 5037 5038 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * 5039 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5040 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * 5041 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5042 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5043 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5044 if (BytePerPixelC[k] == 0) { 5045 time_per_pte_group_nom_chroma[k] = 0; 5046 time_per_pte_group_vblank_chroma[k] = 0; 5047 time_per_pte_group_flip_chroma[k] = 0; 5048 } else { 5049 if (!IsVertical(SourceRotation[k])) { 5050 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5051 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 5052 } else { 5053 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5054 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 5055 } 5056 5057 if (use_one_row_for_frame[k]) { 5058 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5059 (double) dpte_group_width_chroma / 2.0, 1.0); 5060 } else { 5061 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5062 (double) dpte_group_width_chroma, 1.0); 5063 } 5064 #ifdef __DML_VBA_DEBUG__ 5065 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", 5066 __func__, k, dpte_row_width_chroma_ub[k]); 5067 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", 5068 __func__, k, dpte_group_width_chroma); 5069 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", 5070 __func__, k, dpte_groups_per_row_chroma_ub); 5071 #endif 5072 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * 5073 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5074 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * 5075 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5076 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5077 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5078 } 5079 } else { 5080 time_per_pte_group_nom_luma[k] = 0; 5081 time_per_pte_group_vblank_luma[k] = 0; 5082 time_per_pte_group_flip_luma[k] = 0; 5083 time_per_pte_group_nom_chroma[k] = 0; 5084 time_per_pte_group_vblank_chroma[k] = 0; 5085 time_per_pte_group_flip_chroma[k] = 0; 5086 } 5087 #ifdef __DML_VBA_DEBUG__ 5088 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", 5089 __func__, k, DestinationLinesToRequestRowInVBlank[k]); 5090 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", 5091 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); 5092 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", 5093 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); 5094 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", 5095 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); 5096 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", 5097 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); 5098 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", 5099 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); 5100 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", 5101 __func__, k, TimePerMetaChunkNominal[k]); 5102 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", 5103 __func__, k, TimePerMetaChunkVBlank[k]); 5104 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", 5105 __func__, k, TimePerMetaChunkFlip[k]); 5106 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", 5107 __func__, k, TimePerChromaMetaChunkNominal[k]); 5108 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", 5109 __func__, k, TimePerChromaMetaChunkVBlank[k]); 5110 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", 5111 __func__, k, TimePerChromaMetaChunkFlip[k]); 5112 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", 5113 __func__, k, time_per_pte_group_nom_luma[k]); 5114 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", 5115 __func__, k, time_per_pte_group_vblank_luma[k]); 5116 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", 5117 __func__, k, time_per_pte_group_flip_luma[k]); 5118 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", 5119 __func__, k, time_per_pte_group_nom_chroma[k]); 5120 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", 5121 __func__, k, time_per_pte_group_vblank_chroma[k]); 5122 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", 5123 __func__, k, time_per_pte_group_flip_chroma[k]); 5124 #endif 5125 } 5126 } // CalculateMetaAndPTETimes 5127 5128 void dml32_CalculateVMGroupAndRequestTimes( 5129 unsigned int NumberOfActiveSurfaces, 5130 bool GPUVMEnable, 5131 unsigned int GPUVMMaxPageTableLevels, 5132 unsigned int HTotal[], 5133 unsigned int BytePerPixelC[], 5134 double DestinationLinesToRequestVMInVBlank[], 5135 double DestinationLinesToRequestVMInImmediateFlip[], 5136 bool DCCEnable[], 5137 double PixelClock[], 5138 unsigned int dpte_row_width_luma_ub[], 5139 unsigned int dpte_row_width_chroma_ub[], 5140 unsigned int vm_group_bytes[], 5141 unsigned int dpde0_bytes_per_frame_ub_l[], 5142 unsigned int dpde0_bytes_per_frame_ub_c[], 5143 unsigned int meta_pte_bytes_per_frame_ub_l[], 5144 unsigned int meta_pte_bytes_per_frame_ub_c[], 5145 5146 /* Output */ 5147 double TimePerVMGroupVBlank[], 5148 double TimePerVMGroupFlip[], 5149 double TimePerVMRequestVBlank[], 5150 double TimePerVMRequestFlip[]) 5151 { 5152 unsigned int k; 5153 unsigned int num_group_per_lower_vm_stage; 5154 unsigned int num_req_per_lower_vm_stage; 5155 5156 #ifdef __DML_VBA_DEBUG__ 5157 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 5158 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 5159 #endif 5160 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5161 5162 #ifdef __DML_VBA_DEBUG__ 5163 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); 5164 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); 5165 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", 5166 __func__, k, dpde0_bytes_per_frame_ub_l[k]); 5167 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", 5168 __func__, k, dpde0_bytes_per_frame_ub_c[k]); 5169 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", 5170 __func__, k, meta_pte_bytes_per_frame_ub_l[k]); 5171 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", 5172 __func__, k, meta_pte_bytes_per_frame_ub_c[k]); 5173 #endif 5174 5175 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 5176 if (DCCEnable[k] == false) { 5177 if (BytePerPixelC[k] > 0) { 5178 num_group_per_lower_vm_stage = dml_ceil( 5179 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5180 (double) (vm_group_bytes[k]), 1.0) + 5181 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5182 (double) (vm_group_bytes[k]), 1.0); 5183 } else { 5184 num_group_per_lower_vm_stage = dml_ceil( 5185 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5186 (double) (vm_group_bytes[k]), 1.0); 5187 } 5188 } else { 5189 if (GPUVMMaxPageTableLevels == 1) { 5190 if (BytePerPixelC[k] > 0) { 5191 num_group_per_lower_vm_stage = dml_ceil( 5192 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5193 (double) (vm_group_bytes[k]), 1.0) + 5194 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5195 (double) (vm_group_bytes[k]), 1.0); 5196 } else { 5197 num_group_per_lower_vm_stage = dml_ceil( 5198 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5199 (double) (vm_group_bytes[k]), 1.0); 5200 } 5201 } else { 5202 if (BytePerPixelC[k] > 0) { 5203 num_group_per_lower_vm_stage = 2 + dml_ceil( 5204 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5205 (double) (vm_group_bytes[k]), 1) + 5206 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5207 (double) (vm_group_bytes[k]), 1) + 5208 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / 5209 (double) (vm_group_bytes[k]), 1) + 5210 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5211 (double) (vm_group_bytes[k]), 1); 5212 } else { 5213 num_group_per_lower_vm_stage = 1 + dml_ceil( 5214 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5215 (double) (vm_group_bytes[k]), 1) + dml_ceil( 5216 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5217 (double) (vm_group_bytes[k]), 1); 5218 } 5219 } 5220 } 5221 5222 if (DCCEnable[k] == false) { 5223 if (BytePerPixelC[k] > 0) { 5224 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + 5225 dpde0_bytes_per_frame_ub_c[k] / 64; 5226 } else { 5227 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 5228 } 5229 } else { 5230 if (GPUVMMaxPageTableLevels == 1) { 5231 if (BytePerPixelC[k] > 0) { 5232 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + 5233 meta_pte_bytes_per_frame_ub_c[k] / 64; 5234 } else { 5235 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 5236 } 5237 } else { 5238 if (BytePerPixelC[k] > 0) { 5239 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5240 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + 5241 meta_pte_bytes_per_frame_ub_l[k] / 64 + 5242 meta_pte_bytes_per_frame_ub_c[k] / 64; 5243 } else { 5244 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5245 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 5246 } 5247 } 5248 } 5249 5250 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5251 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5252 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5253 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5254 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5255 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5256 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5257 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5258 5259 if (GPUVMMaxPageTableLevels > 2) { 5260 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 5261 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 5262 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 5263 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 5264 } 5265 5266 } else { 5267 TimePerVMGroupVBlank[k] = 0; 5268 TimePerVMGroupFlip[k] = 0; 5269 TimePerVMRequestVBlank[k] = 0; 5270 TimePerVMRequestFlip[k] = 0; 5271 } 5272 5273 #ifdef __DML_VBA_DEBUG__ 5274 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); 5275 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); 5276 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); 5277 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); 5278 #endif 5279 } 5280 } // CalculateVMGroupAndRequestTimes 5281 5282 void dml32_CalculateDCCConfiguration( 5283 bool DCCEnabled, 5284 bool DCCProgrammingAssumesScanDirectionUnknown, 5285 enum source_format_class SourcePixelFormat, 5286 unsigned int SurfaceWidthLuma, 5287 unsigned int SurfaceWidthChroma, 5288 unsigned int SurfaceHeightLuma, 5289 unsigned int SurfaceHeightChroma, 5290 unsigned int nomDETInKByte, 5291 unsigned int RequestHeight256ByteLuma, 5292 unsigned int RequestHeight256ByteChroma, 5293 enum dm_swizzle_mode TilingFormat, 5294 unsigned int BytePerPixelY, 5295 unsigned int BytePerPixelC, 5296 double BytePerPixelDETY, 5297 double BytePerPixelDETC, 5298 enum dm_rotation_angle SourceRotation, 5299 /* Output */ 5300 unsigned int *MaxUncompressedBlockLuma, 5301 unsigned int *MaxUncompressedBlockChroma, 5302 unsigned int *MaxCompressedBlockLuma, 5303 unsigned int *MaxCompressedBlockChroma, 5304 unsigned int *IndependentBlockLuma, 5305 unsigned int *IndependentBlockChroma) 5306 { 5307 typedef enum { 5308 REQ_256Bytes, 5309 REQ_128BytesNonContiguous, 5310 REQ_128BytesContiguous, 5311 REQ_NA 5312 } RequestType; 5313 5314 RequestType RequestLuma; 5315 RequestType RequestChroma; 5316 5317 unsigned int segment_order_horz_contiguous_luma; 5318 unsigned int segment_order_horz_contiguous_chroma; 5319 unsigned int segment_order_vert_contiguous_luma; 5320 unsigned int segment_order_vert_contiguous_chroma; 5321 unsigned int req128_horz_wc_l; 5322 unsigned int req128_horz_wc_c; 5323 unsigned int req128_vert_wc_l; 5324 unsigned int req128_vert_wc_c; 5325 unsigned int MAS_vp_horz_limit; 5326 unsigned int MAS_vp_vert_limit; 5327 unsigned int max_vp_horz_width; 5328 unsigned int max_vp_vert_height; 5329 unsigned int eff_surf_width_l; 5330 unsigned int eff_surf_width_c; 5331 unsigned int eff_surf_height_l; 5332 unsigned int eff_surf_height_c; 5333 unsigned int full_swath_bytes_horz_wc_l; 5334 unsigned int full_swath_bytes_horz_wc_c; 5335 unsigned int full_swath_bytes_vert_wc_l; 5336 unsigned int full_swath_bytes_vert_wc_c; 5337 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; 5338 5339 unsigned int yuv420; 5340 unsigned int horz_div_l; 5341 unsigned int horz_div_c; 5342 unsigned int vert_div_l; 5343 unsigned int vert_div_c; 5344 5345 unsigned int swath_buf_size; 5346 double detile_buf_vp_horz_limit; 5347 double detile_buf_vp_vert_limit; 5348 5349 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || 5350 SourcePixelFormat == dm_420_12) ? 1 : 0); 5351 horz_div_l = 1; 5352 horz_div_c = 1; 5353 vert_div_l = 1; 5354 vert_div_c = 1; 5355 5356 if (BytePerPixelY == 1) 5357 vert_div_l = 0; 5358 if (BytePerPixelC == 1) 5359 vert_div_c = 0; 5360 5361 if (BytePerPixelC == 0) { 5362 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; 5363 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5364 BytePerPixelY / (1 + horz_div_l)); 5365 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5366 (1 + vert_div_l)); 5367 } else { 5368 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; 5369 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5370 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * 5371 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 5372 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5373 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / 5374 (1 + vert_div_c) / (1 + yuv420)); 5375 } 5376 5377 if (SourcePixelFormat == dm_420_10) { 5378 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 5379 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 5380 } 5381 5382 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 5383 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 5384 5385 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; 5386 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); 5387 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 5388 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 5389 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 5390 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 5391 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 5392 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 5393 5394 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 5395 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 5396 if (BytePerPixelC > 0) { 5397 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 5398 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 5399 } else { 5400 full_swath_bytes_horz_wc_c = 0; 5401 full_swath_bytes_vert_wc_c = 0; 5402 } 5403 5404 if (SourcePixelFormat == dm_420_10) { 5405 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); 5406 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); 5407 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); 5408 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); 5409 } 5410 5411 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5412 req128_horz_wc_l = 0; 5413 req128_horz_wc_c = 0; 5414 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + 5415 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5416 req128_horz_wc_l = 0; 5417 req128_horz_wc_c = 1; 5418 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * 5419 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5420 req128_horz_wc_l = 1; 5421 req128_horz_wc_c = 0; 5422 } else { 5423 req128_horz_wc_l = 1; 5424 req128_horz_wc_c = 1; 5425 } 5426 5427 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5428 req128_vert_wc_l = 0; 5429 req128_vert_wc_c = 0; 5430 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * 5431 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5432 req128_vert_wc_l = 0; 5433 req128_vert_wc_c = 1; 5434 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && 5435 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5436 req128_vert_wc_l = 1; 5437 req128_vert_wc_c = 0; 5438 } else { 5439 req128_vert_wc_l = 1; 5440 req128_vert_wc_c = 1; 5441 } 5442 5443 if (BytePerPixelY == 2) { 5444 segment_order_horz_contiguous_luma = 0; 5445 segment_order_vert_contiguous_luma = 1; 5446 } else { 5447 segment_order_horz_contiguous_luma = 1; 5448 segment_order_vert_contiguous_luma = 0; 5449 } 5450 5451 if (BytePerPixelC == 2) { 5452 segment_order_horz_contiguous_chroma = 0; 5453 segment_order_vert_contiguous_chroma = 1; 5454 } else { 5455 segment_order_horz_contiguous_chroma = 1; 5456 segment_order_vert_contiguous_chroma = 0; 5457 } 5458 #ifdef __DML_VBA_DEBUG__ 5459 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); 5460 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 5461 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); 5462 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); 5463 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); 5464 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); 5465 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); 5466 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); 5467 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", 5468 __func__, segment_order_horz_contiguous_chroma); 5469 #endif 5470 5471 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 5472 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) 5473 RequestLuma = REQ_256Bytes; 5474 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || 5475 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) 5476 RequestLuma = REQ_128BytesNonContiguous; 5477 else 5478 RequestLuma = REQ_128BytesContiguous; 5479 5480 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) 5481 RequestChroma = REQ_256Bytes; 5482 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || 5483 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) 5484 RequestChroma = REQ_128BytesNonContiguous; 5485 else 5486 RequestChroma = REQ_128BytesContiguous; 5487 5488 } else if (!IsVertical(SourceRotation)) { 5489 if (req128_horz_wc_l == 0) 5490 RequestLuma = REQ_256Bytes; 5491 else if (segment_order_horz_contiguous_luma == 0) 5492 RequestLuma = REQ_128BytesNonContiguous; 5493 else 5494 RequestLuma = REQ_128BytesContiguous; 5495 5496 if (req128_horz_wc_c == 0) 5497 RequestChroma = REQ_256Bytes; 5498 else if (segment_order_horz_contiguous_chroma == 0) 5499 RequestChroma = REQ_128BytesNonContiguous; 5500 else 5501 RequestChroma = REQ_128BytesContiguous; 5502 5503 } else { 5504 if (req128_vert_wc_l == 0) 5505 RequestLuma = REQ_256Bytes; 5506 else if (segment_order_vert_contiguous_luma == 0) 5507 RequestLuma = REQ_128BytesNonContiguous; 5508 else 5509 RequestLuma = REQ_128BytesContiguous; 5510 5511 if (req128_vert_wc_c == 0) 5512 RequestChroma = REQ_256Bytes; 5513 else if (segment_order_vert_contiguous_chroma == 0) 5514 RequestChroma = REQ_128BytesNonContiguous; 5515 else 5516 RequestChroma = REQ_128BytesContiguous; 5517 } 5518 5519 if (RequestLuma == REQ_256Bytes) { 5520 *MaxUncompressedBlockLuma = 256; 5521 *MaxCompressedBlockLuma = 256; 5522 *IndependentBlockLuma = 0; 5523 } else if (RequestLuma == REQ_128BytesContiguous) { 5524 *MaxUncompressedBlockLuma = 256; 5525 *MaxCompressedBlockLuma = 128; 5526 *IndependentBlockLuma = 128; 5527 } else { 5528 *MaxUncompressedBlockLuma = 256; 5529 *MaxCompressedBlockLuma = 64; 5530 *IndependentBlockLuma = 64; 5531 } 5532 5533 if (RequestChroma == REQ_256Bytes) { 5534 *MaxUncompressedBlockChroma = 256; 5535 *MaxCompressedBlockChroma = 256; 5536 *IndependentBlockChroma = 0; 5537 } else if (RequestChroma == REQ_128BytesContiguous) { 5538 *MaxUncompressedBlockChroma = 256; 5539 *MaxCompressedBlockChroma = 128; 5540 *IndependentBlockChroma = 128; 5541 } else { 5542 *MaxUncompressedBlockChroma = 256; 5543 *MaxCompressedBlockChroma = 64; 5544 *IndependentBlockChroma = 64; 5545 } 5546 5547 if (DCCEnabled != true || BytePerPixelC == 0) { 5548 *MaxUncompressedBlockChroma = 0; 5549 *MaxCompressedBlockChroma = 0; 5550 *IndependentBlockChroma = 0; 5551 } 5552 5553 if (DCCEnabled != true) { 5554 *MaxUncompressedBlockLuma = 0; 5555 *MaxCompressedBlockLuma = 0; 5556 *IndependentBlockLuma = 0; 5557 } 5558 5559 #ifdef __DML_VBA_DEBUG__ 5560 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); 5561 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); 5562 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); 5563 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); 5564 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); 5565 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); 5566 #endif 5567 5568 } // CalculateDCCConfiguration 5569 5570 void dml32_CalculateStutterEfficiency( 5571 unsigned int CompressedBufferSizeInkByte, 5572 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 5573 bool UnboundedRequestEnabled, 5574 unsigned int MetaFIFOSizeInKEntries, 5575 unsigned int ZeroSizeBufferEntries, 5576 unsigned int PixelChunkSizeInKByte, 5577 unsigned int NumberOfActiveSurfaces, 5578 unsigned int ROBBufferSizeInKByte, 5579 double TotalDataReadBandwidth, 5580 double DCFCLK, 5581 double ReturnBW, 5582 unsigned int CompbufReservedSpace64B, 5583 unsigned int CompbufReservedSpaceZs, 5584 double SRExitTime, 5585 double SRExitZ8Time, 5586 bool SynchronizeTimingsFinal, 5587 unsigned int BlendingAndTiming[], 5588 double StutterEnterPlusExitWatermark, 5589 double Z8StutterEnterPlusExitWatermark, 5590 bool ProgressiveToInterlaceUnitInOPP, 5591 bool Interlace[], 5592 double MinTTUVBlank[], 5593 unsigned int DPPPerSurface[], 5594 unsigned int DETBufferSizeY[], 5595 unsigned int BytePerPixelY[], 5596 double BytePerPixelDETY[], 5597 double SwathWidthY[], 5598 unsigned int SwathHeightY[], 5599 unsigned int SwathHeightC[], 5600 double NetDCCRateLuma[], 5601 double NetDCCRateChroma[], 5602 double DCCFractionOfZeroSizeRequestsLuma[], 5603 double DCCFractionOfZeroSizeRequestsChroma[], 5604 unsigned int HTotal[], 5605 unsigned int VTotal[], 5606 double PixelClock[], 5607 double VRatio[], 5608 enum dm_rotation_angle SourceRotation[], 5609 unsigned int BlockHeight256BytesY[], 5610 unsigned int BlockWidth256BytesY[], 5611 unsigned int BlockHeight256BytesC[], 5612 unsigned int BlockWidth256BytesC[], 5613 unsigned int DCCYMaxUncompressedBlock[], 5614 unsigned int DCCCMaxUncompressedBlock[], 5615 unsigned int VActive[], 5616 bool DCCEnable[], 5617 bool WritebackEnable[], 5618 double ReadBandwidthSurfaceLuma[], 5619 double ReadBandwidthSurfaceChroma[], 5620 double meta_row_bw[], 5621 double dpte_row_bw[], 5622 5623 /* Output */ 5624 double *StutterEfficiencyNotIncludingVBlank, 5625 double *StutterEfficiency, 5626 unsigned int *NumberOfStutterBurstsPerFrame, 5627 double *Z8StutterEfficiencyNotIncludingVBlank, 5628 double *Z8StutterEfficiency, 5629 unsigned int *Z8NumberOfStutterBurstsPerFrame, 5630 double *StutterPeriod, 5631 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) 5632 { 5633 5634 bool FoundCriticalSurface = false; 5635 unsigned int SwathSizeCriticalSurface = 0; 5636 unsigned int LastChunkOfSwathSize; 5637 unsigned int MissingPartOfLastSwathOfDETSize; 5638 double LastZ8StutterPeriod = 0.0; 5639 double LastStutterPeriod = 0.0; 5640 unsigned int TotalNumberOfActiveOTG = 0; 5641 double doublePixelClock; 5642 unsigned int doubleHTotal; 5643 unsigned int doubleVTotal; 5644 bool SameTiming = true; 5645 double DETBufferingTimeY; 5646 double SwathWidthYCriticalSurface = 0.0; 5647 double SwathHeightYCriticalSurface = 0.0; 5648 double VActiveTimeCriticalSurface = 0.0; 5649 double FrameTimeCriticalSurface = 0.0; 5650 unsigned int BytePerPixelYCriticalSurface = 0; 5651 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; 5652 unsigned int DETBufferSizeYCriticalSurface = 0; 5653 double MinTTUVBlankCriticalSurface = 0.0; 5654 unsigned int BlockWidth256BytesYCriticalSurface = 0; 5655 bool doublePlaneCriticalSurface = 0; 5656 bool doublePipeCriticalSurface = 0; 5657 double TotalCompressedReadBandwidth; 5658 double TotalRowReadBandwidth; 5659 double AverageDCCCompressionRate; 5660 double EffectiveCompressedBufferSize; 5661 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 5662 double StutterBurstTime; 5663 unsigned int TotalActiveWriteback; 5664 double LinesInDETY; 5665 double LinesInDETYRoundedDownToSwath; 5666 double MaximumEffectiveCompressionLuma; 5667 double MaximumEffectiveCompressionChroma; 5668 double TotalZeroSizeRequestReadBandwidth; 5669 double TotalZeroSizeCompressedReadBandwidth; 5670 double AverageDCCZeroSizeFraction; 5671 double AverageZeroSizeCompressionRate; 5672 unsigned int k; 5673 5674 TotalZeroSizeRequestReadBandwidth = 0; 5675 TotalZeroSizeCompressedReadBandwidth = 0; 5676 TotalRowReadBandwidth = 0; 5677 TotalCompressedReadBandwidth = 0; 5678 5679 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5680 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5681 if (DCCEnable[k] == true) { 5682 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) 5683 || (!IsVertical(SourceRotation[k]) 5684 && BlockHeight256BytesY[k] > SwathHeightY[k]) 5685 || DCCYMaxUncompressedBlock[k] < 256) { 5686 MaximumEffectiveCompressionLuma = 2; 5687 } else { 5688 MaximumEffectiveCompressionLuma = 4; 5689 } 5690 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5691 + ReadBandwidthSurfaceLuma[k] 5692 / dml_min(NetDCCRateLuma[k], 5693 MaximumEffectiveCompressionLuma); 5694 #ifdef __DML_VBA_DEBUG__ 5695 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5696 __func__, k, ReadBandwidthSurfaceLuma[k]); 5697 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", 5698 __func__, k, NetDCCRateLuma[k]); 5699 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", 5700 __func__, k, MaximumEffectiveCompressionLuma); 5701 #endif 5702 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5703 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 5704 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5705 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] 5706 / MaximumEffectiveCompressionLuma; 5707 5708 if (ReadBandwidthSurfaceChroma[k] > 0) { 5709 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) 5710 || (!IsVertical(SourceRotation[k]) 5711 && BlockHeight256BytesC[k] > SwathHeightC[k]) 5712 || DCCCMaxUncompressedBlock[k] < 256) { 5713 MaximumEffectiveCompressionChroma = 2; 5714 } else { 5715 MaximumEffectiveCompressionChroma = 4; 5716 } 5717 TotalCompressedReadBandwidth = 5718 TotalCompressedReadBandwidth 5719 + ReadBandwidthSurfaceChroma[k] 5720 / dml_min(NetDCCRateChroma[k], 5721 MaximumEffectiveCompressionChroma); 5722 #ifdef __DML_VBA_DEBUG__ 5723 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", 5724 __func__, k, ReadBandwidthSurfaceChroma[k]); 5725 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", 5726 __func__, k, NetDCCRateChroma[k]); 5727 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", 5728 __func__, k, MaximumEffectiveCompressionChroma); 5729 #endif 5730 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5731 + ReadBandwidthSurfaceChroma[k] 5732 * DCCFractionOfZeroSizeRequestsChroma[k]; 5733 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5734 + ReadBandwidthSurfaceChroma[k] 5735 * DCCFractionOfZeroSizeRequestsChroma[k] 5736 / MaximumEffectiveCompressionChroma; 5737 } 5738 } else { 5739 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5740 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; 5741 } 5742 TotalRowReadBandwidth = TotalRowReadBandwidth 5743 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); 5744 } 5745 } 5746 5747 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 5748 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 5749 5750 #ifdef __DML_VBA_DEBUG__ 5751 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 5752 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 5753 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 5754 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", 5755 __func__, TotalZeroSizeCompressedReadBandwidth); 5756 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 5757 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 5758 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5759 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 5760 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); 5761 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); 5762 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 5763 #endif 5764 if (AverageDCCZeroSizeFraction == 1) { 5765 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5766 / TotalZeroSizeCompressedReadBandwidth; 5767 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 5768 * AverageZeroSizeCompressionRate 5769 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5770 * AverageZeroSizeCompressionRate; 5771 } else if (AverageDCCZeroSizeFraction > 0) { 5772 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5773 / TotalZeroSizeCompressedReadBandwidth; 5774 EffectiveCompressedBufferSize = dml_min( 5775 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5776 (double) MetaFIFOSizeInKEntries * 1024 * 64 5777 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate 5778 + 1 / AverageDCCCompressionRate)) 5779 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5780 * AverageDCCCompressionRate, 5781 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5782 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5783 5784 #ifdef __DML_VBA_DEBUG__ 5785 dml_print("DML::%s: min 1 = %f\n", __func__, 5786 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5787 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / 5788 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / 5789 AverageDCCCompressionRate)); 5790 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - 5791 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); 5792 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / 5793 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5794 #endif 5795 } else { 5796 EffectiveCompressedBufferSize = dml_min( 5797 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5798 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) 5799 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5800 * AverageDCCCompressionRate; 5801 5802 #ifdef __DML_VBA_DEBUG__ 5803 dml_print("DML::%s: min 1 = %f\n", __func__, 5804 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5805 dml_print("DML::%s: min 2 = %f\n", __func__, 5806 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 5807 #endif 5808 } 5809 5810 #ifdef __DML_VBA_DEBUG__ 5811 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 5812 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 5813 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5814 #endif 5815 5816 *StutterPeriod = 0; 5817 5818 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5819 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5820 LinesInDETY = ((double) DETBufferSizeY[k] 5821 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) 5822 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) 5823 / BytePerPixelDETY[k] / SwathWidthY[k]; 5824 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 5825 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) 5826 / VRatio[k]; 5827 #ifdef __DML_VBA_DEBUG__ 5828 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 5829 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 5830 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 5831 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5832 __func__, k, ReadBandwidthSurfaceLuma[k]); 5833 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 5834 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); 5835 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", 5836 __func__, k, LinesInDETYRoundedDownToSwath); 5837 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); 5838 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5839 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); 5840 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 5841 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5842 #endif 5843 5844 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { 5845 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 5846 5847 FoundCriticalSurface = true; 5848 *StutterPeriod = DETBufferingTimeY; 5849 FrameTimeCriticalSurface = ( 5850 isInterlaceTiming ? 5851 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) 5852 * (double) HTotal[k] / PixelClock[k]; 5853 VActiveTimeCriticalSurface = ( 5854 isInterlaceTiming ? 5855 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) 5856 * (double) HTotal[k] / PixelClock[k]; 5857 BytePerPixelYCriticalSurface = BytePerPixelY[k]; 5858 SwathWidthYCriticalSurface = SwathWidthY[k]; 5859 SwathHeightYCriticalSurface = SwathHeightY[k]; 5860 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; 5861 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] 5862 - (LinesInDETY - LinesInDETYRoundedDownToSwath); 5863 DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; 5864 MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; 5865 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); 5866 doublePipeCriticalSurface = (DPPPerSurface[k] == 1); 5867 5868 #ifdef __DML_VBA_DEBUG__ 5869 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", 5870 __func__, k, FoundCriticalSurface); 5871 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", 5872 __func__, k, *StutterPeriod); 5873 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", 5874 __func__, k, MinTTUVBlankCriticalSurface); 5875 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", 5876 __func__, k, FrameTimeCriticalSurface); 5877 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", 5878 __func__, k, VActiveTimeCriticalSurface); 5879 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", 5880 __func__, k, BytePerPixelYCriticalSurface); 5881 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", 5882 __func__, k, SwathWidthYCriticalSurface); 5883 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", 5884 __func__, k, SwathHeightYCriticalSurface); 5885 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", 5886 __func__, k, BlockWidth256BytesYCriticalSurface); 5887 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", 5888 __func__, k, doublePlaneCriticalSurface); 5889 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", 5890 __func__, k, doublePipeCriticalSurface); 5891 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", 5892 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); 5893 #endif 5894 } 5895 } 5896 } 5897 5898 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, 5899 EffectiveCompressedBufferSize); 5900 #ifdef __DML_VBA_DEBUG__ 5901 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 5902 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5903 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5904 __func__, *StutterPeriod * TotalDataReadBandwidth); 5905 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5906 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, 5907 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 5908 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 5909 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 5910 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 5911 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 5912 #endif 5913 5914 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate 5915 / ReturnBW 5916 + (*StutterPeriod * TotalDataReadBandwidth 5917 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 5918 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 5919 #ifdef __DML_VBA_DEBUG__ 5920 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 5921 AverageDCCCompressionRate / ReturnBW); 5922 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5923 __func__, (*StutterPeriod * TotalDataReadBandwidth)); 5924 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - 5925 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 5926 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 5927 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 5928 #endif 5929 StutterBurstTime = dml_max(StutterBurstTime, 5930 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface 5931 * SwathWidthYCriticalSurface / ReturnBW); 5932 5933 #ifdef __DML_VBA_DEBUG__ 5934 dml_print("DML::%s: Time to finish residue swath=%f\n", 5935 __func__, 5936 LinesToFinishSwathTransferStutterCriticalSurface * 5937 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); 5938 #endif 5939 5940 TotalActiveWriteback = 0; 5941 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5942 if (WritebackEnable[k]) 5943 TotalActiveWriteback = TotalActiveWriteback + 1; 5944 } 5945 5946 if (TotalActiveWriteback == 0) { 5947 #ifdef __DML_VBA_DEBUG__ 5948 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 5949 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 5950 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 5951 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 5952 #endif 5953 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 5954 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 5955 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 5956 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 5957 *NumberOfStutterBurstsPerFrame = ( 5958 *StutterEfficiencyNotIncludingVBlank > 0 ? 5959 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5960 *Z8NumberOfStutterBurstsPerFrame = ( 5961 *Z8StutterEfficiencyNotIncludingVBlank > 0 ? 5962 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5963 } else { 5964 *StutterEfficiencyNotIncludingVBlank = 0.; 5965 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 5966 *NumberOfStutterBurstsPerFrame = 0; 5967 *Z8NumberOfStutterBurstsPerFrame = 0; 5968 } 5969 #ifdef __DML_VBA_DEBUG__ 5970 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); 5971 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 5972 __func__, *StutterEfficiencyNotIncludingVBlank); 5973 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", 5974 __func__, *Z8StutterEfficiencyNotIncludingVBlank); 5975 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 5976 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 5977 #endif 5978 5979 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5980 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5981 if (BlendingAndTiming[k] == k) { 5982 if (TotalNumberOfActiveOTG == 0) { 5983 doublePixelClock = PixelClock[k]; 5984 doubleHTotal = HTotal[k]; 5985 doubleVTotal = VTotal[k]; 5986 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] 5987 || doubleVTotal != VTotal[k]) { 5988 SameTiming = false; 5989 } 5990 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 5991 } 5992 } 5993 } 5994 5995 if (*StutterEfficiencyNotIncludingVBlank > 0) { 5996 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 5997 5998 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming 5999 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { 6000 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime 6001 + StutterBurstTime * VActiveTimeCriticalSurface 6002 / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 6003 } else { 6004 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6005 } 6006 } else { 6007 *StutterEfficiency = 0; 6008 } 6009 6010 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6011 LastZ8StutterPeriod = VActiveTimeCriticalSurface 6012 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6013 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + 6014 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { 6015 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime 6016 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 6017 } else { 6018 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6019 } 6020 } else { 6021 *Z8StutterEfficiency = 0.; 6022 } 6023 6024 #ifdef __DML_VBA_DEBUG__ 6025 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6026 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6027 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6028 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6029 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6030 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6031 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 6032 __func__, *StutterEfficiencyNotIncludingVBlank); 6033 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6034 #endif 6035 6036 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface 6037 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); 6038 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); 6039 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) 6040 - DETBufferSizeYCriticalSurface; 6041 6042 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) 6043 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) 6044 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) 6045 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); 6046 6047 #ifdef __DML_VBA_DEBUG__ 6048 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); 6049 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); 6050 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); 6051 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 6052 #endif 6053 } // CalculateStutterEfficiency 6054 6055 void dml32_CalculateMaxDETAndMinCompressedBufferSize( 6056 unsigned int ConfigReturnBufferSizeInKByte, 6057 unsigned int ROBBufferSizeInKByte, 6058 unsigned int MaxNumDPP, 6059 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size 6060 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA 6061 6062 /* Output */ 6063 unsigned int *MaxTotalDETInKByte, 6064 unsigned int *nomDETInKByte, 6065 unsigned int *MinCompressedBufferSizeInKByte) 6066 { 6067 bool det_buff_size_override_en = nomDETInKByteOverrideEnable; 6068 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; 6069 6070 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + 6071 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); 6072 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); 6073 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; 6074 6075 #ifdef __DML_VBA_DEBUG__ 6076 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); 6077 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); 6078 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); 6079 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); 6080 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); 6081 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); 6082 #endif 6083 6084 if (det_buff_size_override_en) { 6085 *nomDETInKByte = det_buff_size_override_val; 6086 #ifdef __DML_VBA_DEBUG__ 6087 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); 6088 #endif 6089 } 6090 } // CalculateMaxDETAndMinCompressedBufferSize 6091 6092 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, 6093 double ReturnBW, 6094 bool NotUrgentLatencyHiding[], 6095 double ReadBandwidthLuma[], 6096 double ReadBandwidthChroma[], 6097 double cursor_bw[], 6098 double meta_row_bandwidth[], 6099 double dpte_row_bandwidth[], 6100 unsigned int NumberOfDPP[], 6101 double UrgentBurstFactorLuma[], 6102 double UrgentBurstFactorChroma[], 6103 double UrgentBurstFactorCursor[]) 6104 { 6105 unsigned int k; 6106 bool NotEnoughUrgentLatencyHiding = false; 6107 bool CalculateVActiveBandwithSupport_val = false; 6108 double VActiveBandwith = 0; 6109 6110 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6111 if (NotUrgentLatencyHiding[k]) { 6112 NotEnoughUrgentLatencyHiding = true; 6113 } 6114 } 6115 6116 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6117 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; 6118 } 6119 6120 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6121 6122 #ifdef __DML_VBA_DEBUG__ 6123 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); 6124 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); 6125 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6126 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); 6127 #endif 6128 return CalculateVActiveBandwithSupport_val; 6129 } 6130 6131 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, 6132 double ReturnBW, 6133 bool NotUrgentLatencyHiding[], 6134 double ReadBandwidthLuma[], 6135 double ReadBandwidthChroma[], 6136 double PrefetchBandwidthLuma[], 6137 double PrefetchBandwidthChroma[], 6138 double cursor_bw[], 6139 double meta_row_bandwidth[], 6140 double dpte_row_bandwidth[], 6141 double cursor_bw_pre[], 6142 double prefetch_vmrow_bw[], 6143 unsigned int NumberOfDPP[], 6144 double UrgentBurstFactorLuma[], 6145 double UrgentBurstFactorChroma[], 6146 double UrgentBurstFactorCursor[], 6147 double UrgentBurstFactorLumaPre[], 6148 double UrgentBurstFactorChromaPre[], 6149 double UrgentBurstFactorCursorPre[], 6150 6151 /* output */ 6152 double *PrefetchBandwidth, 6153 double *FractionOfUrgentBandwidth, 6154 bool *PrefetchBandwidthSupport) 6155 { 6156 unsigned int k; 6157 bool NotEnoughUrgentLatencyHiding = false; 6158 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6159 if (NotUrgentLatencyHiding[k]) { 6160 NotEnoughUrgentLatencyHiding = true; 6161 } 6162 } 6163 6164 *PrefetchBandwidth = 0; 6165 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6166 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6167 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), 6168 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6169 } 6170 6171 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6172 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW; 6173 } 6174 6175 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, 6176 double ReturnBW, 6177 double ReadBandwidthLuma[], 6178 double ReadBandwidthChroma[], 6179 double PrefetchBandwidthLuma[], 6180 double PrefetchBandwidthChroma[], 6181 double cursor_bw[], 6182 double cursor_bw_pre[], 6183 unsigned int NumberOfDPP[], 6184 double UrgentBurstFactorLuma[], 6185 double UrgentBurstFactorChroma[], 6186 double UrgentBurstFactorCursor[], 6187 double UrgentBurstFactorLumaPre[], 6188 double UrgentBurstFactorChromaPre[], 6189 double UrgentBurstFactorCursorPre[]) 6190 { 6191 unsigned int k; 6192 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; 6193 6194 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6195 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6196 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6197 } 6198 6199 return CalculateBandwidthAvailableForImmediateFlip_val; 6200 } 6201 6202 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, 6203 double ReturnBW, 6204 enum immediate_flip_requirement ImmediateFlipRequirement[], 6205 double final_flip_bw[], 6206 double ReadBandwidthLuma[], 6207 double ReadBandwidthChroma[], 6208 double PrefetchBandwidthLuma[], 6209 double PrefetchBandwidthChroma[], 6210 double cursor_bw[], 6211 double meta_row_bandwidth[], 6212 double dpte_row_bandwidth[], 6213 double cursor_bw_pre[], 6214 double prefetch_vmrow_bw[], 6215 unsigned int NumberOfDPP[], 6216 double UrgentBurstFactorLuma[], 6217 double UrgentBurstFactorChroma[], 6218 double UrgentBurstFactorCursor[], 6219 double UrgentBurstFactorLumaPre[], 6220 double UrgentBurstFactorChromaPre[], 6221 double UrgentBurstFactorCursorPre[], 6222 6223 /* output */ 6224 double *TotalBandwidth, 6225 double *FractionOfUrgentBandwidth, 6226 bool *ImmediateFlipBandwidthSupport) 6227 { 6228 unsigned int k; 6229 *TotalBandwidth = 0; 6230 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6231 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { 6232 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6233 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6234 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6235 } else { 6236 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6237 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6238 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6239 } 6240 } 6241 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); 6242 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; 6243 } 6244