1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 #include "display_mode_vba_util_32.h" 26 #include "../dml_inline_defs.h" 27 #include "display_mode_vba_32.h" 28 #include "../display_mode_lib.h" 29 30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096 31 32 unsigned int dml32_dscceComputeDelay( 33 unsigned int bpc, 34 double BPP, 35 unsigned int sliceWidth, 36 unsigned int numSlices, 37 enum output_format_class pixelFormat, 38 enum output_encoder_class Output) 39 { 40 // valid bpc = source bits per component in the set of {8, 10, 12} 41 // valid bpp = increments of 1/16 of a bit 42 // min = 6/7/8 in N420/N422/444, respectively 43 // max = such that compression is 1:1 44 //valid sliceWidth = number of pixels per slice line, 45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 48 49 // fixed value 50 unsigned int rcModelSize = 8192; 51 52 // N422/N420 operate at 2 pixels per clock 53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, 54 Delay, pixels; 55 56 if (pixelFormat == dm_420) 57 pixelsPerClock = 2; 58 else if (pixelFormat == dm_n422) 59 pixelsPerClock = 2; 60 // #all other modes operate at 1 pixel per clock 61 else 62 pixelsPerClock = 1; 63 64 //initial transmit delay as per PPS 65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 66 67 //compute ssm delay 68 if (bpc == 8) 69 D = 81; 70 else if (bpc == 10) 71 D = 89; 72 else 73 D = 113; 74 75 //divide by pixel per cycle to compute slice width as seen by DSC 76 w = sliceWidth / pixelsPerClock; 77 78 //422 mode has an additional cycle of delay 79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 80 s = 0; 81 else 82 s = 1; 83 84 //main calculation for the dscce 85 ix = initalXmitDelay + 45; 86 wx = (w + 2) / 3; 87 p = 3 * wx - w; 88 l0 = ix / w; 89 a = ix + p * l0; 90 ax = (a + 2) / 3 + D + 6 + 1; 91 L = (ax + wx - 1) / wx; 92 if ((ix % w) == 0 && p != 0) 93 lstall = 1; 94 else 95 lstall = 0; 96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 97 98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 99 pixels = Delay * 3 * pixelsPerClock; 100 101 #ifdef __DML_VBA_DEBUG__ 102 dml_print("DML::%s: bpc: %d\n", __func__, bpc); 103 dml_print("DML::%s: BPP: %f\n", __func__, BPP); 104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); 105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); 106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); 107 dml_print("DML::%s: Output: %d\n", __func__, Output); 108 dml_print("DML::%s: pixels: %d\n", __func__, pixels); 109 #endif 110 111 return pixels; 112 } 113 114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 115 { 116 unsigned int Delay = 0; 117 118 if (pixelFormat == dm_420) { 119 // sfr 120 Delay = Delay + 2; 121 // dsccif 122 Delay = Delay + 0; 123 // dscc - input deserializer 124 Delay = Delay + 3; 125 // dscc gets pixels every other cycle 126 Delay = Delay + 2; 127 // dscc - input cdc fifo 128 Delay = Delay + 12; 129 // dscc gets pixels every other cycle 130 Delay = Delay + 13; 131 // dscc - cdc uncertainty 132 Delay = Delay + 2; 133 // dscc - output cdc fifo 134 Delay = Delay + 7; 135 // dscc gets pixels every other cycle 136 Delay = Delay + 3; 137 // dscc - cdc uncertainty 138 Delay = Delay + 2; 139 // dscc - output serializer 140 Delay = Delay + 1; 141 // sft 142 Delay = Delay + 1; 143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { 144 // sfr 145 Delay = Delay + 2; 146 // dsccif 147 Delay = Delay + 1; 148 // dscc - input deserializer 149 Delay = Delay + 5; 150 // dscc - input cdc fifo 151 Delay = Delay + 25; 152 // dscc - cdc uncertainty 153 Delay = Delay + 2; 154 // dscc - output cdc fifo 155 Delay = Delay + 10; 156 // dscc - cdc uncertainty 157 Delay = Delay + 2; 158 // dscc - output serializer 159 Delay = Delay + 1; 160 // sft 161 Delay = Delay + 1; 162 } else { 163 // sfr 164 Delay = Delay + 2; 165 // dsccif 166 Delay = Delay + 0; 167 // dscc - input deserializer 168 Delay = Delay + 3; 169 // dscc - input cdc fifo 170 Delay = Delay + 12; 171 // dscc - cdc uncertainty 172 Delay = Delay + 2; 173 // dscc - output cdc fifo 174 Delay = Delay + 7; 175 // dscc - output serializer 176 Delay = Delay + 1; 177 // dscc - cdc uncertainty 178 Delay = Delay + 2; 179 // sft 180 Delay = Delay + 1; 181 } 182 183 return Delay; 184 } 185 186 187 bool IsVertical(enum dm_rotation_angle Scan) 188 { 189 bool is_vert = false; 190 191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) 192 is_vert = true; 193 else 194 is_vert = false; 195 return is_vert; 196 } 197 198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( 199 double HRatio, 200 double HRatioChroma, 201 double VRatio, 202 double VRatioChroma, 203 double MaxDCHUBToPSCLThroughput, 204 double MaxPSCLToLBThroughput, 205 double PixelClock, 206 enum source_format_class SourcePixelFormat, 207 unsigned int HTaps, 208 unsigned int HTapsChroma, 209 unsigned int VTaps, 210 unsigned int VTapsChroma, 211 212 /* output */ 213 double *PSCL_THROUGHPUT, 214 double *PSCL_THROUGHPUT_CHROMA, 215 double *DPPCLKUsingSingleDPP) 216 { 217 double DPPCLKUsingSingleDPPLuma; 218 double DPPCLKUsingSingleDPPChroma; 219 220 if (HRatio > 1) { 221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / 222 dml_ceil((double) HTaps / 6.0, 1.0)); 223 } else { 224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 225 } 226 227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / 228 *PSCL_THROUGHPUT, 1); 229 230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) 231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock; 232 233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && 234 SourcePixelFormat != dm_rgbe_alpha)) { 235 *PSCL_THROUGHPUT_CHROMA = 0; 236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; 237 } else { 238 if (HRatioChroma > 1) { 239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * 240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); 241 } else { 242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 243 } 244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), 245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); 246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) 247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock; 248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); 249 } 250 } 251 252 void dml32_CalculateBytePerPixelAndBlockSizes( 253 enum source_format_class SourcePixelFormat, 254 enum dm_swizzle_mode SurfaceTiling, 255 256 /* Output */ 257 unsigned int *BytePerPixelY, 258 unsigned int *BytePerPixelC, 259 double *BytePerPixelDETY, 260 double *BytePerPixelDETC, 261 unsigned int *BlockHeight256BytesY, 262 unsigned int *BlockHeight256BytesC, 263 unsigned int *BlockWidth256BytesY, 264 unsigned int *BlockWidth256BytesC, 265 unsigned int *MacroTileHeightY, 266 unsigned int *MacroTileHeightC, 267 unsigned int *MacroTileWidthY, 268 unsigned int *MacroTileWidthC) 269 { 270 if (SourcePixelFormat == dm_444_64) { 271 *BytePerPixelDETY = 8; 272 *BytePerPixelDETC = 0; 273 *BytePerPixelY = 8; 274 *BytePerPixelC = 0; 275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 276 *BytePerPixelDETY = 4; 277 *BytePerPixelDETC = 0; 278 *BytePerPixelY = 4; 279 *BytePerPixelC = 0; 280 } else if (SourcePixelFormat == dm_444_16) { 281 *BytePerPixelDETY = 2; 282 *BytePerPixelDETC = 0; 283 *BytePerPixelY = 2; 284 *BytePerPixelC = 0; 285 } else if (SourcePixelFormat == dm_444_8) { 286 *BytePerPixelDETY = 1; 287 *BytePerPixelDETC = 0; 288 *BytePerPixelY = 1; 289 *BytePerPixelC = 0; 290 } else if (SourcePixelFormat == dm_rgbe_alpha) { 291 *BytePerPixelDETY = 4; 292 *BytePerPixelDETC = 1; 293 *BytePerPixelY = 4; 294 *BytePerPixelC = 1; 295 } else if (SourcePixelFormat == dm_420_8) { 296 *BytePerPixelDETY = 1; 297 *BytePerPixelDETC = 2; 298 *BytePerPixelY = 1; 299 *BytePerPixelC = 2; 300 } else if (SourcePixelFormat == dm_420_12) { 301 *BytePerPixelDETY = 2; 302 *BytePerPixelDETC = 4; 303 *BytePerPixelY = 2; 304 *BytePerPixelC = 4; 305 } else { 306 *BytePerPixelDETY = 4.0 / 3; 307 *BytePerPixelDETC = 8.0 / 3; 308 *BytePerPixelY = 2; 309 *BytePerPixelC = 4; 310 } 311 #ifdef __DML_VBA_DEBUG__ 312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); 313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); 314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); 315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); 316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); 317 #endif 318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 319 || SourcePixelFormat == dm_444_16 320 || SourcePixelFormat == dm_444_8 321 || SourcePixelFormat == dm_mono_16 322 || SourcePixelFormat == dm_mono_8 323 || SourcePixelFormat == dm_rgbe)) { 324 if (SurfaceTiling == dm_sw_linear) 325 *BlockHeight256BytesY = 1; 326 else if (SourcePixelFormat == dm_444_64) 327 *BlockHeight256BytesY = 4; 328 else if (SourcePixelFormat == dm_444_8) 329 *BlockHeight256BytesY = 16; 330 else 331 *BlockHeight256BytesY = 8; 332 333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 334 *BlockHeight256BytesC = 0; 335 *BlockWidth256BytesC = 0; 336 } else { 337 if (SurfaceTiling == dm_sw_linear) { 338 *BlockHeight256BytesY = 1; 339 *BlockHeight256BytesC = 1; 340 } else if (SourcePixelFormat == dm_rgbe_alpha) { 341 *BlockHeight256BytesY = 8; 342 *BlockHeight256BytesC = 16; 343 } else if (SourcePixelFormat == dm_420_8) { 344 *BlockHeight256BytesY = 16; 345 *BlockHeight256BytesC = 8; 346 } else { 347 *BlockHeight256BytesY = 8; 348 *BlockHeight256BytesC = 8; 349 } 350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 352 } 353 #ifdef __DML_VBA_DEBUG__ 354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); 355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); 356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); 357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); 358 #endif 359 360 if (SurfaceTiling == dm_sw_linear) { 361 *MacroTileHeightY = *BlockHeight256BytesY; 362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; 363 *MacroTileHeightC = *BlockHeight256BytesC; 364 if (*MacroTileHeightC == 0) 365 *MacroTileWidthC = 0; 366 else 367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; 368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || 369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { 370 *MacroTileHeightY = 16 * *BlockHeight256BytesY; 371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; 372 *MacroTileHeightC = 16 * *BlockHeight256BytesC; 373 if (*MacroTileHeightC == 0) 374 *MacroTileWidthC = 0; 375 else 376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; 377 } else { 378 *MacroTileHeightY = 32 * *BlockHeight256BytesY; 379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; 380 *MacroTileHeightC = 32 * *BlockHeight256BytesC; 381 if (*MacroTileHeightC == 0) 382 *MacroTileWidthC = 0; 383 else 384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; 385 } 386 387 #ifdef __DML_VBA_DEBUG__ 388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); 389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); 390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); 391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); 392 #endif 393 } // CalculateBytePerPixelAndBlockSizes 394 395 void dml32_CalculateSwathAndDETConfiguration( 396 unsigned int DETSizeOverride[], 397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 398 unsigned int ConfigReturnBufferSizeInKByte, 399 unsigned int MaxTotalDETInKByte, 400 unsigned int MinCompressedBufferSizeInKByte, 401 double ForceSingleDPP, 402 unsigned int NumberOfActiveSurfaces, 403 unsigned int nomDETInKByte, 404 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, 406 unsigned int PixelChunkSizeKBytes, 407 unsigned int ROBSizeKBytes, 408 unsigned int CompressedBufferSegmentSizeInkByteFinal, 409 enum output_encoder_class Output[], 410 double ReadBandwidthLuma[], 411 double ReadBandwidthChroma[], 412 double MaximumSwathWidthLuma[], 413 double MaximumSwathWidthChroma[], 414 enum dm_rotation_angle SourceRotation[], 415 bool ViewportStationary[], 416 enum source_format_class SourcePixelFormat[], 417 enum dm_swizzle_mode SurfaceTiling[], 418 unsigned int ViewportWidth[], 419 unsigned int ViewportHeight[], 420 unsigned int ViewportXStart[], 421 unsigned int ViewportYStart[], 422 unsigned int ViewportXStartC[], 423 unsigned int ViewportYStartC[], 424 unsigned int SurfaceWidthY[], 425 unsigned int SurfaceWidthC[], 426 unsigned int SurfaceHeightY[], 427 unsigned int SurfaceHeightC[], 428 unsigned int Read256BytesBlockHeightY[], 429 unsigned int Read256BytesBlockHeightC[], 430 unsigned int Read256BytesBlockWidthY[], 431 unsigned int Read256BytesBlockWidthC[], 432 enum odm_combine_mode ODMMode[], 433 unsigned int BlendingAndTiming[], 434 unsigned int BytePerPixY[], 435 unsigned int BytePerPixC[], 436 double BytePerPixDETY[], 437 double BytePerPixDETC[], 438 unsigned int HActive[], 439 double HRatio[], 440 double HRatioChroma[], 441 unsigned int DPPPerSurface[], 442 443 /* Output */ 444 unsigned int swath_width_luma_ub[], 445 unsigned int swath_width_chroma_ub[], 446 double SwathWidth[], 447 double SwathWidthChroma[], 448 unsigned int SwathHeightY[], 449 unsigned int SwathHeightC[], 450 unsigned int DETBufferSizeInKByte[], 451 unsigned int DETBufferSizeY[], 452 unsigned int DETBufferSizeC[], 453 bool *UnboundedRequestEnabled, 454 unsigned int *CompressedBufferSizeInkByte, 455 unsigned int *CompBufReservedSpaceKBytes, 456 bool *CompBufReservedSpaceNeedAdjustment, 457 bool ViewportSizeSupportPerSurface[], 458 bool *ViewportSizeSupport) 459 { 460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 464 unsigned int RoundedUpSwathSizeBytesY; 465 unsigned int RoundedUpSwathSizeBytesC; 466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; 467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; 468 unsigned int k; 469 unsigned int TotalActiveDPP = 0; 470 bool NoChromaSurfaces = true; 471 unsigned int DETBufferSizeInKByteForSwathCalculation; 472 473 #ifdef __DML_VBA_DEBUG__ 474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); 476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); 477 #endif 478 dml32_CalculateSwathWidth(ForceSingleDPP, 479 NumberOfActiveSurfaces, 480 SourcePixelFormat, 481 SourceRotation, 482 ViewportStationary, 483 ViewportWidth, 484 ViewportHeight, 485 ViewportXStart, 486 ViewportYStart, 487 ViewportXStartC, 488 ViewportYStartC, 489 SurfaceWidthY, 490 SurfaceWidthC, 491 SurfaceHeightY, 492 SurfaceHeightC, 493 ODMMode, 494 BytePerPixY, 495 BytePerPixC, 496 Read256BytesBlockHeightY, 497 Read256BytesBlockHeightC, 498 Read256BytesBlockWidthY, 499 Read256BytesBlockWidthC, 500 BlendingAndTiming, 501 HActive, 502 HRatio, 503 DPPPerSurface, 504 505 /* Output */ 506 SwathWidthdoubleDPP, 507 SwathWidthdoubleDPPChroma, 508 SwathWidth, 509 SwathWidthChroma, 510 MaximumSwathHeightY, 511 MaximumSwathHeightC, 512 swath_width_luma_ub, 513 swath_width_chroma_ub); 514 515 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 518 #ifdef __DML_VBA_DEBUG__ 519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); 522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); 523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 524 RoundedUpMaxSwathSizeBytesY[k]); 525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); 527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); 528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 529 RoundedUpMaxSwathSizeBytesC[k]); 530 #endif 531 532 if (SourcePixelFormat[k] == dm_420_10) { 533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); 534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); 535 } 536 } 537 538 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { 542 NoChromaSurfaces = false; 543 } 544 } 545 546 // By default, just set the reserved space to 2 pixel chunks size 547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; 548 549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data 550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] 551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req 552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); 553 554 if (*CompBufReservedSpaceNeedAdjustment == 1) { 555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; 556 } 557 558 #ifdef __DML_VBA_DEBUG__ 559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); 560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); 561 #endif 562 563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 564 565 dml32_CalculateDETBufferSize(DETSizeOverride, 566 UseMALLForPStateChange, 567 ForceSingleDPP, 568 NumberOfActiveSurfaces, 569 *UnboundedRequestEnabled, 570 nomDETInKByte, 571 MaxTotalDETInKByte, 572 ConfigReturnBufferSizeInKByte, 573 MinCompressedBufferSizeInKByte, 574 CompressedBufferSegmentSizeInkByteFinal, 575 SourcePixelFormat, 576 ReadBandwidthLuma, 577 ReadBandwidthChroma, 578 RoundedUpMaxSwathSizeBytesY, 579 RoundedUpMaxSwathSizeBytesC, 580 DPPPerSurface, 581 582 /* Output */ 583 DETBufferSizeInKByte, // per hubp pipe 584 CompressedBufferSizeInkByte); 585 586 #ifdef __DML_VBA_DEBUG__ 587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 593 #endif 594 595 *ViewportSizeSupport = true; 596 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 597 598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); 600 #ifdef __DML_VBA_DEBUG__ 601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, 602 DETBufferSizeInKByteForSwathCalculation); 603 #endif 604 605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= 606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 607 SwathHeightY[k] = MaximumSwathHeightY[k]; 608 SwathHeightC[k] = MaximumSwathHeightC[k]; 609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= 613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 615 SwathHeightC[k] = MaximumSwathHeightC[k]; 616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= 620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 621 SwathHeightY[k] = MaximumSwathHeightY[k]; 622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 625 } else { 626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 630 } 631 632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > 633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && 635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 636 *ViewportSizeSupport = false; 637 ViewportSizeSupportPerSurface[k] = false; 638 } else { 639 ViewportSizeSupportPerSurface[k] = true; 640 } 641 642 if (SwathHeightC[k] == 0) { 643 #ifdef __DML_VBA_DEBUG__ 644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); 645 #endif 646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; 647 DETBufferSizeC[k] = 0; 648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 649 #ifdef __DML_VBA_DEBUG__ 650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); 651 #endif 652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; 653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; 654 } else { 655 #ifdef __DML_VBA_DEBUG__ 656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); 657 #endif 658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); 659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; 660 } 661 662 #ifdef __DML_VBA_DEBUG__ 663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); 665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, 666 k, RoundedUpMaxSwathSizeBytesY[k]); 667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, 668 k, RoundedUpMaxSwathSizeBytesC[k]); 669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); 670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); 671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); 674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, 675 ViewportSizeSupportPerSurface[k]); 676 #endif 677 678 } 679 } // CalculateSwathAndDETConfiguration 680 681 void dml32_CalculateSwathWidth( 682 bool ForceSingleDPP, 683 unsigned int NumberOfActiveSurfaces, 684 enum source_format_class SourcePixelFormat[], 685 enum dm_rotation_angle SourceRotation[], 686 bool ViewportStationary[], 687 unsigned int ViewportWidth[], 688 unsigned int ViewportHeight[], 689 unsigned int ViewportXStart[], 690 unsigned int ViewportYStart[], 691 unsigned int ViewportXStartC[], 692 unsigned int ViewportYStartC[], 693 unsigned int SurfaceWidthY[], 694 unsigned int SurfaceWidthC[], 695 unsigned int SurfaceHeightY[], 696 unsigned int SurfaceHeightC[], 697 enum odm_combine_mode ODMMode[], 698 unsigned int BytePerPixY[], 699 unsigned int BytePerPixC[], 700 unsigned int Read256BytesBlockHeightY[], 701 unsigned int Read256BytesBlockHeightC[], 702 unsigned int Read256BytesBlockWidthY[], 703 unsigned int Read256BytesBlockWidthC[], 704 unsigned int BlendingAndTiming[], 705 unsigned int HActive[], 706 double HRatio[], 707 unsigned int DPPPerSurface[], 708 709 /* Output */ 710 double SwathWidthdoubleDPPY[], 711 double SwathWidthdoubleDPPC[], 712 double SwathWidthY[], // per-pipe 713 double SwathWidthC[], // per-pipe 714 unsigned int MaximumSwathHeightY[], 715 unsigned int MaximumSwathHeightC[], 716 unsigned int swath_width_luma_ub[], // per-pipe 717 unsigned int swath_width_chroma_ub[]) // per-pipe 718 { 719 unsigned int k, j; 720 enum odm_combine_mode MainSurfaceODMMode; 721 722 unsigned int surface_width_ub_l; 723 unsigned int surface_height_ub_l; 724 unsigned int surface_width_ub_c = 0; 725 unsigned int surface_height_ub_c = 0; 726 727 #ifdef __DML_VBA_DEBUG__ 728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 730 #endif 731 732 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 733 if (!IsVertical(SourceRotation[k])) 734 SwathWidthdoubleDPPY[k] = ViewportWidth[k]; 735 else 736 SwathWidthdoubleDPPY[k] = ViewportHeight[k]; 737 738 #ifdef __DML_VBA_DEBUG__ 739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 741 #endif 742 743 MainSurfaceODMMode = ODMMode[k]; 744 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 745 if (BlendingAndTiming[k] == j) 746 MainSurfaceODMMode = ODMMode[j]; 747 } 748 749 if (ForceSingleDPP) { 750 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 751 } else { 752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { 753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 754 dml_round(HActive[k] / 4.0 * HRatio[k])); 755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { 756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 757 dml_round(HActive[k] / 2.0 * HRatio[k])); 758 } else if (DPPPerSurface[k] == 2) { 759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; 760 } else { 761 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 762 } 763 } 764 765 #ifdef __DML_VBA_DEBUG__ 766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); 767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); 768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); 769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); 770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); 771 #endif 772 773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 774 SourcePixelFormat[k] == dm_420_12) { 775 SwathWidthC[k] = SwathWidthY[k] / 2; 776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; 777 } else { 778 SwathWidthC[k] = SwathWidthY[k]; 779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; 780 } 781 782 if (ForceSingleDPP == true) { 783 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 784 SwathWidthC[k] = SwathWidthdoubleDPPC[k]; 785 } 786 787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 789 790 if (!IsVertical(SourceRotation[k])) { 791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 795 dml_floor(ViewportXStart[k] + 796 SwathWidthY[k] + 797 Read256BytesBlockWidthY[k] - 1, 798 Read256BytesBlockWidthY[k]) - 799 dml_floor(ViewportXStart[k], 800 Read256BytesBlockWidthY[k])); 801 } else { 802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 803 dml_ceil(SwathWidthY[k] - 1, 804 Read256BytesBlockWidthY[k]) + 805 Read256BytesBlockWidthY[k]); 806 } 807 if (BytePerPixC[k] > 0) { 808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] + 812 Read256BytesBlockWidthC[k] - 1, 813 Read256BytesBlockWidthC[k]) - 814 dml_floor(ViewportXStartC[k], 815 Read256BytesBlockWidthC[k])); 816 } else { 817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 818 dml_ceil(SwathWidthC[k] - 1, 819 Read256BytesBlockWidthC[k]) + 820 Read256BytesBlockWidthC[k]); 821 } 822 } else { 823 swath_width_chroma_ub[k] = 0; 824 } 825 } else { 826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 828 829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + 831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, 832 Read256BytesBlockHeightY[k]) - 833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); 834 } else { 835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, 836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 837 } 838 if (BytePerPixC[k] > 0) { 839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] + 843 Read256BytesBlockHeightC[k] - 1, 844 Read256BytesBlockHeightC[k]) - 845 dml_floor(ViewportYStartC[k], 846 Read256BytesBlockHeightC[k])); 847 } else { 848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + 850 Read256BytesBlockHeightC[k]); 851 } 852 } else { 853 swath_width_chroma_ub[k] = 0; 854 } 855 } 856 857 #ifdef __DML_VBA_DEBUG__ 858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); 860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); 861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); 862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); 863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); 864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); 865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); 866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); 867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); 868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); 869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); 870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); 871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); 872 #endif 873 874 } 875 } // CalculateSwathWidth 876 877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, 878 unsigned int TotalNumberOfActiveDPP, 879 bool NoChroma, 880 enum output_encoder_class Output, 881 enum dm_swizzle_mode SurfaceTiling, 882 bool CompBufReservedSpaceNeedAdjustment, 883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 884 { 885 bool ret_val = false; 886 887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && 888 TotalNumberOfActiveDPP == 1 && NoChroma); 889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 890 ret_val = false; 891 892 if (SurfaceTiling == dm_sw_linear) 893 ret_val = false; 894 895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 896 ret_val = false; 897 898 #ifdef __DML_VBA_DEBUG__ 899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); 900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); 902 #endif 903 904 return (ret_val); 905 } 906 907 void dml32_CalculateDETBufferSize( 908 unsigned int DETSizeOverride[], 909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 910 bool ForceSingleDPP, 911 unsigned int NumberOfActiveSurfaces, 912 bool UnboundedRequestEnabled, 913 unsigned int nomDETInKByte, 914 unsigned int MaxTotalDETInKByte, 915 unsigned int ConfigReturnBufferSizeInKByte, 916 unsigned int MinCompressedBufferSizeInKByte, 917 unsigned int CompressedBufferSegmentSizeInkByteFinal, 918 enum source_format_class SourcePixelFormat[], 919 double ReadBandwidthLuma[], 920 double ReadBandwidthChroma[], 921 unsigned int RoundedUpMaxSwathSizeBytesY[], 922 unsigned int RoundedUpMaxSwathSizeBytesC[], 923 unsigned int DPPPerSurface[], 924 /* Output */ 925 unsigned int DETBufferSizeInKByte[], 926 unsigned int *CompressedBufferSizeInkByte) 927 { 928 unsigned int DETBufferSizePoolInKByte; 929 unsigned int NextDETBufferPieceInKByte; 930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; 931 bool NextPotentialSurfaceToAssignDETPieceFound; 932 unsigned int NextSurfaceToAssignDETPiece; 933 double TotalBandwidth; 934 double BandwidthOfSurfacesNotAssignedDETPiece; 935 unsigned int max_minDET; 936 unsigned int minDET; 937 unsigned int minDET_pipe; 938 unsigned int j, k; 939 940 #ifdef __DML_VBA_DEBUG__ 941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); 946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); 948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, 949 CompressedBufferSegmentSizeInkByteFinal); 950 #endif 951 952 // Note: Will use default det size if that fits 2 swaths 953 if (UnboundedRequestEnabled) { 954 if (DETSizeOverride[0] > 0) { 955 DETBufferSizeInKByte[0] = DETSizeOverride[0]; 956 } else { 957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * 958 ((double) RoundedUpMaxSwathSizeBytesY[0] + 959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); 960 } 961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; 962 } else { 963 DETBufferSizePoolInKByte = MaxTotalDETInKByte; 964 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 965 DETBufferSizeInKByte[k] = nomDETInKByte; 966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 967 SourcePixelFormat[k] == dm_420_12) { 968 max_minDET = nomDETInKByte - 64; 969 } else { 970 max_minDET = nomDETInKByte; 971 } 972 minDET = 128; 973 minDET_pipe = 0; 974 975 // add DET resource until can hold 2 full swaths 976 while (minDET <= max_minDET && minDET_pipe == 0) { 977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + 978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) 979 minDET_pipe = minDET; 980 minDET = minDET + 64; 981 } 982 983 #ifdef __DML_VBA_DEBUG__ 984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); 985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); 986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); 987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 988 RoundedUpMaxSwathSizeBytesY[k]); 989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 990 RoundedUpMaxSwathSizeBytesC[k]); 991 #endif 992 993 if (minDET_pipe == 0) { 994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + 995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); 996 #ifdef __DML_VBA_DEBUG__ 997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", 998 __func__, k, minDET_pipe); 999 #endif 1000 } 1001 1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1003 DETBufferSizeInKByte[k] = 0; 1004 } else if (DETSizeOverride[k] > 0) { 1005 DETBufferSizeInKByte[k] = DETSizeOverride[k]; 1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; 1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { 1009 DETBufferSizeInKByte[k] = minDET_pipe; 1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; 1012 } 1013 1014 #ifdef __DML_VBA_DEBUG__ 1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); 1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); 1019 #endif 1020 } 1021 1022 TotalBandwidth = 0; 1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) 1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 1026 } 1027 #ifdef __DML_VBA_DEBUG__ 1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); 1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) 1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); 1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); 1033 #endif 1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; 1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1036 1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1038 DETPieceAssignedToThisSurfaceAlready[k] = true; 1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * 1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= 1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { 1042 DETPieceAssignedToThisSurfaceAlready[k] = true; 1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; 1045 } else { 1046 DETPieceAssignedToThisSurfaceAlready[k] = false; 1047 } 1048 #ifdef __DML_VBA_DEBUG__ 1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, 1050 DETPieceAssignedToThisSurfaceAlready[k]); 1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, 1052 BandwidthOfSurfacesNotAssignedDETPiece); 1053 #endif 1054 } 1055 1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 1057 NextPotentialSurfaceToAssignDETPieceFound = false; 1058 NextSurfaceToAssignDETPiece = 0; 1059 1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1061 #ifdef __DML_VBA_DEBUG__ 1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, 1063 ReadBandwidthLuma[k]); 1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, 1065 ReadBandwidthChroma[k]); 1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, 1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, 1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, 1071 NextSurfaceToAssignDETPiece); 1072 #endif 1073 if (!DETPieceAssignedToThisSurfaceAlready[k] && 1074 (!NextPotentialSurfaceToAssignDETPieceFound || 1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < 1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { 1078 NextSurfaceToAssignDETPiece = k; 1079 NextPotentialSurfaceToAssignDETPieceFound = true; 1080 } 1081 #ifdef __DML_VBA_DEBUG__ 1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", 1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); 1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", 1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); 1086 #endif 1087 } 1088 1089 if (NextPotentialSurfaceToAssignDETPieceFound) { 1090 // Note: To show the banker's rounding behavior in VBA and also the fact 1091 // that the DET buffer size varies due to precision issue 1092 // 1093 //double tmp1 = ((double) DETBufferSizePoolInKByte * 1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1096 // BandwidthOfSurfacesNotAssignedDETPiece / 1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * 1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1101 //BandwidthOfSurfacesNotAssignedDETPiece / 1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1103 // 1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); 1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); 1106 1107 NextDETBufferPieceInKByte = dml_min( 1108 dml_round((double) DETBufferSizePoolInKByte * 1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1111 BandwidthOfSurfacesNotAssignedDETPiece / 1112 ((ForceSingleDPP ? 1 : 1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * 1114 (ForceSingleDPP ? 1 : 1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, 1116 dml_floor((double) DETBufferSizePoolInKByte, 1117 (ForceSingleDPP ? 1 : 1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1119 1120 // Above calculation can assign the entire DET buffer allocation to a single pipe. 1121 // We should limit the per-pipe DET size to the nominal / max per pipe. 1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < 1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - 1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; 1127 } else { 1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1129 // already has the max per-pipe value 1130 NextDETBufferPieceInKByte = 0; 1131 } 1132 } 1133 1134 #ifdef __DML_VBA_DEBUG__ 1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, 1136 DETBufferSizePoolInKByte); 1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, 1138 NextSurfaceToAssignDETPiece); 1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, 1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, 1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", 1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); 1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, 1146 NextDETBufferPieceInKByte); 1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", 1148 __func__, j, NextSurfaceToAssignDETPiece, 1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1150 #endif 1151 1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = 1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1154 + NextDETBufferPieceInKByte 1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); 1156 #ifdef __DML_VBA_DEBUG__ 1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1158 #endif 1159 1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; 1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; 1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1165 } 1166 } 1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; 1168 } 1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 1170 1171 #ifdef __DML_VBA_DEBUG__ 1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); 1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { 1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", 1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1177 } 1178 #endif 1179 } // CalculateDETBufferSize 1180 1181 void dml32_CalculateODMMode( 1182 unsigned int MaximumPixelsPerLinePerDSCUnit, 1183 unsigned int HActive, 1184 enum output_format_class OutFormat, 1185 enum output_encoder_class Output, 1186 enum odm_combine_policy ODMUse, 1187 double StateDispclk, 1188 double MaxDispclk, 1189 bool DSCEnable, 1190 unsigned int TotalNumberOfActiveDPP, 1191 unsigned int MaxNumDPP, 1192 double PixelClock, 1193 double DISPCLKDPPCLKDSCCLKDownSpreading, 1194 double DISPCLKRampingMargin, 1195 double DISPCLKDPPCLKVCOSpeed, 1196 unsigned int NumberOfDSCSlices, 1197 1198 /* Output */ 1199 bool *TotalAvailablePipesSupport, 1200 unsigned int *NumberOfDPP, 1201 enum odm_combine_mode *ODMMode, 1202 double *RequiredDISPCLKPerSurface) 1203 { 1204 1205 double SurfaceRequiredDISPCLKWithoutODMCombine; 1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1208 1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, 1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1211 MaxDispclk); 1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, 1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1214 MaxDispclk); 1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, 1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1217 MaxDispclk); 1218 *TotalAvailablePipesSupport = true; 1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable 1220 1221 if (ODMUse == dm_odm_combine_policy_none) 1222 *ODMMode = dm_odm_combine_mode_disabled; 1223 1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; 1225 *NumberOfDPP = 0; 1226 1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? 1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded") 1229 1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || 1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || 1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) 1233 || NumberOfDSCSlices > 8)))) { 1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { 1235 *ODMMode = dm_odm_combine_mode_4to1; 1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1237 *NumberOfDPP = 4; 1238 } else { 1239 *TotalAvailablePipesSupport = false; 1240 } 1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || 1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && 1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || 1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) 1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { 1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { 1247 *ODMMode = dm_odm_combine_mode_2to1; 1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1249 *NumberOfDPP = 2; 1250 } else { 1251 *TotalAvailablePipesSupport = false; 1252 } 1253 } else { 1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) 1255 *NumberOfDPP = 1; 1256 else 1257 *TotalAvailablePipesSupport = false; 1258 } 1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH && 1260 ODMUse != dm_odm_combine_policy_4to1) { 1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) { 1262 *ODMMode = dm_odm_combine_mode_disabled; 1263 *NumberOfDPP = 0; 1264 *TotalAvailablePipesSupport = false; 1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 || 1266 *ODMMode == dm_odm_combine_mode_4to1) { 1267 *ODMMode = dm_odm_combine_mode_4to1; 1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1269 *NumberOfDPP = 4; 1270 } else { 1271 *ODMMode = dm_odm_combine_mode_2to1; 1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1273 *NumberOfDPP = 2; 1274 } 1275 } 1276 if (Output == dm_hdmi && OutFormat == dm_420 && 1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) { 1278 *ODMMode = dm_odm_combine_mode_disabled; 1279 *NumberOfDPP = 0; 1280 *TotalAvailablePipesSupport = false; 1281 } 1282 } 1283 1284 double dml32_CalculateRequiredDispclk( 1285 enum odm_combine_mode ODMMode, 1286 double PixelClock, 1287 double DISPCLKDPPCLKDSCCLKDownSpreading, 1288 double DISPCLKRampingMargin, 1289 double DISPCLKDPPCLKVCOSpeed, 1290 double MaxDispclk) 1291 { 1292 double RequiredDispclk = 0.; 1293 double PixelClockAfterODM; 1294 double DISPCLKWithRampingRoundedToDFSGranularity; 1295 double DISPCLKWithoutRampingRoundedToDFSGranularity; 1296 double MaxDispclkRoundedDownToDFSGranularity; 1297 1298 if (ODMMode == dm_odm_combine_mode_4to1) 1299 PixelClockAfterODM = PixelClock / 4; 1300 else if (ODMMode == dm_odm_combine_mode_2to1) 1301 PixelClockAfterODM = PixelClock / 2; 1302 else 1303 PixelClockAfterODM = PixelClock; 1304 1305 1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) 1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); 1309 1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); 1312 1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); 1314 1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; 1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; 1319 else 1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; 1321 1322 return RequiredDispclk; 1323 } 1324 1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) 1326 { 1327 if (Clock <= 0.0) 1328 return 0.0; 1329 1330 if (round_up) 1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); 1332 else 1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); 1334 } 1335 1336 void dml32_CalculateOutputLink( 1337 double PHYCLKPerState, 1338 double PHYCLKD18PerState, 1339 double PHYCLKD32PerState, 1340 double Downspreading, 1341 bool IsMainSurfaceUsingTheIndicatedTiming, 1342 enum output_encoder_class Output, 1343 enum output_format_class OutputFormat, 1344 unsigned int HTotal, 1345 unsigned int HActive, 1346 double PixelClockBackEnd, 1347 double ForcedOutputLinkBPP, 1348 unsigned int DSCInputBitPerComponent, 1349 unsigned int NumberOfDSCSlices, 1350 double AudioSampleRate, 1351 unsigned int AudioSampleLayout, 1352 enum odm_combine_mode ODMModeNoDSC, 1353 enum odm_combine_mode ODMModeDSC, 1354 bool DSCEnable, 1355 unsigned int OutputLinkDPLanes, 1356 enum dm_output_link_dp_rate OutputLinkDPRate, 1357 1358 /* Output */ 1359 bool *RequiresDSC, 1360 double *RequiresFEC, 1361 double *OutBpp, 1362 enum dm_output_type *OutputType, 1363 enum dm_output_rate *OutputRate, 1364 unsigned int *RequiredSlots) 1365 { 1366 bool LinkDSCEnable; 1367 unsigned int dummy; 1368 *RequiresDSC = false; 1369 *RequiresFEC = false; 1370 *OutBpp = 0; 1371 *OutputType = dm_output_type_unknown; 1372 *OutputRate = dm_output_rate_unknown; 1373 1374 if (IsMainSurfaceUsingTheIndicatedTiming) { 1375 if (Output == dm_hdmi) { 1376 *RequiresDSC = false; 1377 *RequiresFEC = false; 1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, 1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, 1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1381 ODMModeNoDSC, ODMModeDSC, &dummy); 1382 //OutputTypeAndRate = "HDMI"; 1383 *OutputType = dm_output_type_hdmi; 1384 1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { 1386 if (DSCEnable == true) { 1387 *RequiresDSC = true; 1388 LinkDSCEnable = true; 1389 if (Output == dm_dp || Output == dm_dp2p0) 1390 *RequiresFEC = true; 1391 else 1392 *RequiresFEC = false; 1393 } else { 1394 *RequiresDSC = false; 1395 LinkDSCEnable = false; 1396 if (Output == dm_dp2p0) 1397 *RequiresFEC = true; 1398 else 1399 *RequiresFEC = false; 1400 } 1401 if (Output == dm_dp2p0) { 1402 *OutBpp = 0; 1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && 1404 PHYCLKD32PerState >= 10000 / 32) { 1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && 1411 ForcedOutputLinkBPP == 0) { 1412 *RequiresDSC = true; 1413 LinkDSCEnable = true; 1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1416 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1417 OutputFormat, DSCInputBitPerComponent, 1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1420 } 1421 //OutputTypeAndRate = Output & " UHBR10"; 1422 *OutputType = dm_output_type_dp2p0; 1423 *OutputRate = dm_output_rate_dp_rate_uhbr10; 1424 } 1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && 1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { 1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1432 1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && 1434 ForcedOutputLinkBPP == 0) { 1435 *RequiresDSC = true; 1436 LinkDSCEnable = true; 1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1439 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1440 OutputFormat, DSCInputBitPerComponent, 1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1443 } 1444 //OutputTypeAndRate = Output & " UHBR13p5"; 1445 *OutputType = dm_output_type_dp2p0; 1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5; 1447 } 1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && 1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { 1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1456 *RequiresDSC = true; 1457 LinkDSCEnable = true; 1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1460 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1461 OutputFormat, DSCInputBitPerComponent, 1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1464 } 1465 //OutputTypeAndRate = Output & " UHBR20"; 1466 *OutputType = dm_output_type_dp2p0; 1467 *OutputRate = dm_output_rate_dp_rate_uhbr20; 1468 } 1469 } else { 1470 *OutBpp = 0; 1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && 1472 PHYCLKPerState >= 270) { 1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && 1479 ForcedOutputLinkBPP == 0) { 1480 *RequiresDSC = true; 1481 LinkDSCEnable = true; 1482 if (Output == dm_dp) 1483 *RequiresFEC = true; 1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1486 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1487 OutputFormat, DSCInputBitPerComponent, 1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1490 } 1491 //OutputTypeAndRate = Output & " HBR"; 1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1493 *OutputRate = dm_output_rate_dp_rate_hbr; 1494 } 1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && 1496 *OutBpp == 0 && PHYCLKPerState >= 540) { 1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1502 1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && 1504 ForcedOutputLinkBPP == 0) { 1505 *RequiresDSC = true; 1506 LinkDSCEnable = true; 1507 if (Output == dm_dp) 1508 *RequiresFEC = true; 1509 1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1512 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1513 OutputFormat, DSCInputBitPerComponent, 1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1516 } 1517 //OutputTypeAndRate = Output & " HBR2"; 1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1519 *OutputRate = dm_output_rate_dp_rate_hbr2; 1520 } 1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { 1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1524 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, 1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, 1527 RequiredSlots); 1528 1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1530 *RequiresDSC = true; 1531 LinkDSCEnable = true; 1532 if (Output == dm_dp) 1533 *RequiresFEC = true; 1534 1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1537 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1538 OutputFormat, DSCInputBitPerComponent, 1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1541 } 1542 //OutputTypeAndRate = Output & " HBR3"; 1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1544 *OutputRate = dm_output_rate_dp_rate_hbr3; 1545 } 1546 } 1547 } 1548 } 1549 } 1550 1551 void dml32_CalculateDPPCLK( 1552 unsigned int NumberOfActiveSurfaces, 1553 double DISPCLKDPPCLKDSCCLKDownSpreading, 1554 double DISPCLKDPPCLKVCOSpeed, 1555 double DPPCLKUsingSingleDPP[], 1556 unsigned int DPPPerSurface[], 1557 1558 /* output */ 1559 double *GlobalDPPCLK, 1560 double Dppclk[]) 1561 { 1562 unsigned int k; 1563 *GlobalDPPCLK = 0; 1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); 1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); 1567 } 1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); 1569 for (k = 0; k < NumberOfActiveSurfaces; ++k) 1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); 1571 } 1572 1573 double dml32_TruncToValidBPP( 1574 double LinkBitRate, 1575 unsigned int Lanes, 1576 unsigned int HTotal, 1577 unsigned int HActive, 1578 double PixelClock, 1579 double DesiredBPP, 1580 bool DSCEnable, 1581 enum output_encoder_class Output, 1582 enum output_format_class Format, 1583 unsigned int DSCInputBitPerComponent, 1584 unsigned int DSCSlices, 1585 unsigned int AudioRate, 1586 unsigned int AudioLayout, 1587 enum odm_combine_mode ODMModeNoDSC, 1588 enum odm_combine_mode ODMModeDSC, 1589 /* Output */ 1590 unsigned int *RequiredSlots) 1591 { 1592 double MaxLinkBPP; 1593 unsigned int MinDSCBPP; 1594 double MaxDSCBPP; 1595 unsigned int NonDSCBPP0; 1596 unsigned int NonDSCBPP1; 1597 unsigned int NonDSCBPP2; 1598 1599 if (Format == dm_420) { 1600 NonDSCBPP0 = 12; 1601 NonDSCBPP1 = 15; 1602 NonDSCBPP2 = 18; 1603 MinDSCBPP = 6; 1604 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 1605 } else if (Format == dm_444) { 1606 NonDSCBPP0 = 24; 1607 NonDSCBPP1 = 30; 1608 NonDSCBPP2 = 36; 1609 MinDSCBPP = 8; 1610 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 1611 } else { 1612 if (Output == dm_hdmi) { 1613 NonDSCBPP0 = 24; 1614 NonDSCBPP1 = 24; 1615 NonDSCBPP2 = 24; 1616 } else { 1617 NonDSCBPP0 = 16; 1618 NonDSCBPP1 = 20; 1619 NonDSCBPP2 = 24; 1620 } 1621 if (Format == dm_n422) { 1622 MinDSCBPP = 7; 1623 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 1624 } else { 1625 MinDSCBPP = 8; 1626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 1627 } 1628 } 1629 if (Output == dm_dp2p0) { 1630 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; 1631 } else if (DSCEnable && Output == dm_dp) { 1632 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 1633 } else { 1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 1635 } 1636 1637 if (DSCEnable) { 1638 if (ODMModeDSC == dm_odm_combine_mode_4to1) 1639 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1640 else if (ODMModeDSC == dm_odm_combine_mode_2to1) 1641 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1642 else if (ODMModeDSC == dm_odm_split_mode_1to2) 1643 MaxLinkBPP = 2 * MaxLinkBPP; 1644 } else { 1645 if (ODMModeNoDSC == dm_odm_combine_mode_4to1) 1646 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1647 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) 1648 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1649 else if (ODMModeNoDSC == dm_odm_split_mode_1to2) 1650 MaxLinkBPP = 2 * MaxLinkBPP; 1651 } 1652 1653 if (DesiredBPP == 0) { 1654 if (DSCEnable) { 1655 if (MaxLinkBPP < MinDSCBPP) 1656 return BPP_INVALID; 1657 else if (MaxLinkBPP >= MaxDSCBPP) 1658 return MaxDSCBPP; 1659 else 1660 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 1661 } else { 1662 if (MaxLinkBPP >= NonDSCBPP2) 1663 return NonDSCBPP2; 1664 else if (MaxLinkBPP >= NonDSCBPP1) 1665 return NonDSCBPP1; 1666 else if (MaxLinkBPP >= NonDSCBPP0) 1667 return 16.0; 1668 else 1669 return BPP_INVALID; 1670 } 1671 } else { 1672 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || 1673 DesiredBPP <= NonDSCBPP0)) || 1674 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) 1675 return BPP_INVALID; 1676 else 1677 return DesiredBPP; 1678 } 1679 1680 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); 1681 1682 return BPP_INVALID; 1683 } // TruncToValidBPP 1684 1685 double dml32_RequiredDTBCLK( 1686 bool DSCEnable, 1687 double PixelClock, 1688 enum output_format_class OutputFormat, 1689 double OutputBpp, 1690 unsigned int DSCSlices, 1691 unsigned int HTotal, 1692 unsigned int HActive, 1693 unsigned int AudioRate, 1694 unsigned int AudioLayout) 1695 { 1696 double PixelWordRate; 1697 double HCActive; 1698 double HCBlank; 1699 double AverageTribyteRate; 1700 double HActiveTribyteRate; 1701 1702 if (DSCEnable != true) 1703 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); 1704 1705 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); 1706 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * 1707 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); 1708 HCBlank = 64 + 32 * 1709 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); 1710 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; 1711 HActiveTribyteRate = PixelWordRate * HCActive / HActive; 1712 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; 1713 } 1714 1715 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, 1716 enum odm_combine_mode ODMMode, 1717 unsigned int DSCInputBitPerComponent, 1718 double OutputBpp, 1719 unsigned int HActive, 1720 unsigned int HTotal, 1721 unsigned int NumberOfDSCSlices, 1722 enum output_format_class OutputFormat, 1723 enum output_encoder_class Output, 1724 double PixelClock, 1725 double PixelClockBackEnd, 1726 double dsc_delay_factor_wa) 1727 { 1728 unsigned int DSCDelayRequirement_val; 1729 1730 if (DSCEnabled == true && OutputBpp != 0) { 1731 if (ODMMode == dm_odm_combine_mode_4to1) { 1732 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1733 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, 1734 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1735 } else if (ODMMode == dm_odm_combine_mode_2to1) { 1736 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1737 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, 1738 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1739 } else { 1740 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1741 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, 1742 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); 1743 } 1744 1745 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * 1746 dml_ceil((double)DSCDelayRequirement_val / HActive, 1); 1747 1748 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; 1749 1750 } else { 1751 DSCDelayRequirement_val = 0; 1752 } 1753 1754 #ifdef __DML_VBA_DEBUG__ 1755 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); 1756 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); 1757 dml_print("DML::%s: HActive = %d\n", __func__, HActive); 1758 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); 1759 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); 1760 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); 1761 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); 1762 #endif 1763 1764 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1); 1765 } 1766 1767 void dml32_CalculateSurfaceSizeInMall( 1768 unsigned int NumberOfActiveSurfaces, 1769 unsigned int MALLAllocatedForDCN, 1770 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1771 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[], 1772 bool DCCEnable[], 1773 bool ViewportStationary[], 1774 unsigned int ViewportXStartY[], 1775 unsigned int ViewportYStartY[], 1776 unsigned int ViewportXStartC[], 1777 unsigned int ViewportYStartC[], 1778 unsigned int ViewportWidthY[], 1779 unsigned int ViewportHeightY[], 1780 unsigned int BytesPerPixelY[], 1781 unsigned int ViewportWidthC[], 1782 unsigned int ViewportHeightC[], 1783 unsigned int BytesPerPixelC[], 1784 unsigned int SurfaceWidthY[], 1785 unsigned int SurfaceWidthC[], 1786 unsigned int SurfaceHeightY[], 1787 unsigned int SurfaceHeightC[], 1788 unsigned int Read256BytesBlockWidthY[], 1789 unsigned int Read256BytesBlockWidthC[], 1790 unsigned int Read256BytesBlockHeightY[], 1791 unsigned int Read256BytesBlockHeightC[], 1792 unsigned int ReadBlockWidthY[], 1793 unsigned int ReadBlockWidthC[], 1794 unsigned int ReadBlockHeightY[], 1795 unsigned int ReadBlockHeightC[], 1796 unsigned int DCCMetaPitchY[], 1797 unsigned int DCCMetaPitchC[], 1798 1799 /* Output */ 1800 unsigned int SurfaceSizeInMALL[], 1801 bool *ExceededMALLSize) 1802 { 1803 unsigned int k; 1804 unsigned int TotalSurfaceSizeInMALLForSS = 0; 1805 unsigned int TotalSurfaceSizeInMALLForSubVP = 0; 1806 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024; 1807 1808 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1809 if (ViewportStationary[k]) { 1810 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), 1811 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, 1812 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 1813 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], 1814 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1815 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - 1816 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; 1817 1818 if (ReadBlockWidthC[k] > 0) { 1819 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1820 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), 1821 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 1822 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - 1823 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * 1824 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), 1825 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1826 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - 1827 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * 1828 BytesPerPixelC[k]; 1829 } 1830 if (DCCEnable[k] == true) { 1831 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1832 (dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]), 1833 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * 1834 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) 1835 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) 1836 * dml_min(dml_ceil(SurfaceHeightY[k], 8 * 1837 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1838 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * 1839 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 * 1840 Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024); 1841 if (Read256BytesBlockWidthC[k] > 0) { 1842 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1843 dml_min(dml_ceil(DCCMetaPitchC[k], 8 * 1844 Read256BytesBlockWidthC[k]), 1845 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 1846 * Read256BytesBlockWidthC[k] - 1, 8 * 1847 Read256BytesBlockWidthC[k]) - 1848 dml_floor(ViewportXStartC[k], 8 * 1849 Read256BytesBlockWidthC[k])) * 1850 dml_min(dml_ceil(SurfaceHeightC[k], 8 * 1851 Read256BytesBlockHeightC[k]), 1852 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1853 8 * Read256BytesBlockHeightC[k] - 1, 8 * 1854 Read256BytesBlockHeightC[k]) - 1855 dml_floor(ViewportYStartC[k], 8 * 1856 Read256BytesBlockHeightC[k])) * 1857 BytesPerPixelC[k] / 256; 1858 } 1859 } 1860 } else { 1861 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 1862 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * 1863 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 1864 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * 1865 BytesPerPixelY[k]; 1866 if (ReadBlockWidthC[k] > 0) { 1867 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1868 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 1869 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * 1870 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 1871 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * 1872 BytesPerPixelC[k]; 1873 } 1874 if (DCCEnable[k] == true) { 1875 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1876 (dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 * 1877 Read256BytesBlockWidthY[k] - 1), 8 * 1878 Read256BytesBlockWidthY[k]) * 1879 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * 1880 Read256BytesBlockHeightY[k] - 1), 8 * 1881 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024); 1882 1883 if (Read256BytesBlockWidthC[k] > 0) { 1884 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1885 dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 * 1886 Read256BytesBlockWidthC[k] - 1), 8 * 1887 Read256BytesBlockWidthC[k]) * 1888 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * 1889 Read256BytesBlockHeightC[k] - 1), 8 * 1890 Read256BytesBlockHeightC[k]) * 1891 BytesPerPixelC[k] / 256; 1892 } 1893 } 1894 } 1895 } 1896 1897 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1898 /* SS and Subvp counted separate as they are never used at the same time */ 1899 if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) 1900 TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k]; 1901 else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) 1902 TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k]; 1903 } 1904 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) || 1905 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes); 1906 } // CalculateSurfaceSizeInMall 1907 1908 void dml32_CalculateVMRowAndSwath( 1909 unsigned int NumberOfActiveSurfaces, 1910 DmlPipe myPipe[], 1911 unsigned int SurfaceSizeInMALL[], 1912 unsigned int PTEBufferSizeInRequestsLuma, 1913 unsigned int PTEBufferSizeInRequestsChroma, 1914 unsigned int DCCMetaBufferSizeBytes, 1915 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1916 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 1917 unsigned int MALLAllocatedForDCN, 1918 double SwathWidthY[], 1919 double SwathWidthC[], 1920 bool GPUVMEnable, 1921 bool HostVMEnable, 1922 unsigned int HostVMMaxNonCachedPageTableLevels, 1923 unsigned int GPUVMMaxPageTableLevels, 1924 unsigned int GPUVMMinPageSizeKBytes[], 1925 unsigned int HostVMMinPageSize, 1926 1927 /* Output */ 1928 bool PTEBufferSizeNotExceeded[], 1929 bool DCCMetaBufferSizeNotExceeded[], 1930 unsigned int dpte_row_width_luma_ub[], 1931 unsigned int dpte_row_width_chroma_ub[], 1932 unsigned int dpte_row_height_luma[], 1933 unsigned int dpte_row_height_chroma[], 1934 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA 1935 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA 1936 unsigned int meta_req_width[], 1937 unsigned int meta_req_width_chroma[], 1938 unsigned int meta_req_height[], 1939 unsigned int meta_req_height_chroma[], 1940 unsigned int meta_row_width[], 1941 unsigned int meta_row_width_chroma[], 1942 unsigned int meta_row_height[], 1943 unsigned int meta_row_height_chroma[], 1944 unsigned int vm_group_bytes[], 1945 unsigned int dpte_group_bytes[], 1946 unsigned int PixelPTEReqWidthY[], 1947 unsigned int PixelPTEReqHeightY[], 1948 unsigned int PTERequestSizeY[], 1949 unsigned int PixelPTEReqWidthC[], 1950 unsigned int PixelPTEReqHeightC[], 1951 unsigned int PTERequestSizeC[], 1952 unsigned int dpde0_bytes_per_frame_ub_l[], 1953 unsigned int meta_pte_bytes_per_frame_ub_l[], 1954 unsigned int dpde0_bytes_per_frame_ub_c[], 1955 unsigned int meta_pte_bytes_per_frame_ub_c[], 1956 double PrefetchSourceLinesY[], 1957 double PrefetchSourceLinesC[], 1958 double VInitPreFillY[], 1959 double VInitPreFillC[], 1960 unsigned int MaxNumSwathY[], 1961 unsigned int MaxNumSwathC[], 1962 double meta_row_bw[], 1963 double dpte_row_bw[], 1964 double PixelPTEBytesPerRow[], 1965 double PDEAndMetaPTEBytesFrame[], 1966 double MetaRowByte[], 1967 bool use_one_row_for_frame[], 1968 bool use_one_row_for_frame_flip[], 1969 bool UsesMALLForStaticScreen[], 1970 bool PTE_BUFFER_MODE[], 1971 unsigned int BIGK_FRAGMENT_SIZE[]) 1972 { 1973 unsigned int k; 1974 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; 1975 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; 1976 unsigned int PDEAndMetaPTEBytesFrameY; 1977 unsigned int PDEAndMetaPTEBytesFrameC; 1978 unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; 1979 unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; 1980 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; 1981 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; 1982 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; 1983 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; 1984 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1985 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; 1986 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1987 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; 1988 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; 1989 1990 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1991 if (HostVMEnable == true) { 1992 vm_group_bytes[k] = 512; 1993 dpte_group_bytes[k] = 512; 1994 } else if (GPUVMEnable == true) { 1995 vm_group_bytes[k] = 2048; 1996 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) 1997 dpte_group_bytes[k] = 512; 1998 else 1999 dpte_group_bytes[k] = 2048; 2000 } else { 2001 vm_group_bytes[k] = 0; 2002 dpte_group_bytes[k] = 0; 2003 } 2004 2005 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || 2006 myPipe[k].SourcePixelFormat == dm_420_12 || 2007 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { 2008 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && 2009 !IsVertical(myPipe[k].SourceRotation)) { 2010 PTEBufferSizeInRequestsForLuma[k] = 2011 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; 2012 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; 2013 } else { 2014 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 2015 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 2016 } 2017 2018 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 2019 myPipe[k].ViewportStationary, 2020 myPipe[k].DCCEnable, 2021 myPipe[k].DPPPerSurface, 2022 myPipe[k].BlockHeight256BytesC, 2023 myPipe[k].BlockWidth256BytesC, 2024 myPipe[k].SourcePixelFormat, 2025 myPipe[k].SurfaceTiling, 2026 myPipe[k].BytePerPixelC, 2027 myPipe[k].SourceRotation, 2028 SwathWidthC[k], 2029 myPipe[k].ViewportHeightChroma, 2030 myPipe[k].ViewportXStartC, 2031 myPipe[k].ViewportYStartC, 2032 GPUVMEnable, 2033 HostVMEnable, 2034 HostVMMaxNonCachedPageTableLevels, 2035 GPUVMMaxPageTableLevels, 2036 GPUVMMinPageSizeKBytes[k], 2037 HostVMMinPageSize, 2038 PTEBufferSizeInRequestsForChroma[k], 2039 myPipe[k].PitchC, 2040 myPipe[k].DCCMetaPitchC, 2041 myPipe[k].BlockWidthC, 2042 myPipe[k].BlockHeightC, 2043 2044 /* Output */ 2045 &MetaRowByteC[k], 2046 &PixelPTEBytesPerRowC[k], 2047 &dpte_row_width_chroma_ub[k], 2048 &dpte_row_height_chroma[k], 2049 &dpte_row_height_linear_chroma[k], 2050 &PixelPTEBytesPerRowC_one_row_per_frame[k], 2051 &dpte_row_width_chroma_ub_one_row_per_frame[k], 2052 &dpte_row_height_chroma_one_row_per_frame[k], 2053 &meta_req_width_chroma[k], 2054 &meta_req_height_chroma[k], 2055 &meta_row_width_chroma[k], 2056 &meta_row_height_chroma[k], 2057 &PixelPTEReqWidthC[k], 2058 &PixelPTEReqHeightC[k], 2059 &PTERequestSizeC[k], 2060 &dpde0_bytes_per_frame_ub_c[k], 2061 &meta_pte_bytes_per_frame_ub_c[k]); 2062 2063 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( 2064 myPipe[k].VRatioChroma, 2065 myPipe[k].VTapsChroma, 2066 myPipe[k].InterlaceEnable, 2067 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2068 myPipe[k].SwathHeightC, 2069 myPipe[k].SourceRotation, 2070 myPipe[k].ViewportStationary, 2071 SwathWidthC[k], 2072 myPipe[k].ViewportHeightChroma, 2073 myPipe[k].ViewportXStartC, 2074 myPipe[k].ViewportYStartC, 2075 2076 /* Output */ 2077 &VInitPreFillC[k], 2078 &MaxNumSwathC[k]); 2079 } else { 2080 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2081 PTEBufferSizeInRequestsForChroma[k] = 0; 2082 PixelPTEBytesPerRowC[k] = 0; 2083 PDEAndMetaPTEBytesFrameC = 0; 2084 MetaRowByteC[k] = 0; 2085 MaxNumSwathC[k] = 0; 2086 PrefetchSourceLinesC[k] = 0; 2087 dpte_row_height_chroma_one_row_per_frame[k] = 0; 2088 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2089 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2090 } 2091 2092 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2093 myPipe[k].ViewportStationary, 2094 myPipe[k].DCCEnable, 2095 myPipe[k].DPPPerSurface, 2096 myPipe[k].BlockHeight256BytesY, 2097 myPipe[k].BlockWidth256BytesY, 2098 myPipe[k].SourcePixelFormat, 2099 myPipe[k].SurfaceTiling, 2100 myPipe[k].BytePerPixelY, 2101 myPipe[k].SourceRotation, 2102 SwathWidthY[k], 2103 myPipe[k].ViewportHeight, 2104 myPipe[k].ViewportXStart, 2105 myPipe[k].ViewportYStart, 2106 GPUVMEnable, 2107 HostVMEnable, 2108 HostVMMaxNonCachedPageTableLevels, 2109 GPUVMMaxPageTableLevels, 2110 GPUVMMinPageSizeKBytes[k], 2111 HostVMMinPageSize, 2112 PTEBufferSizeInRequestsForLuma[k], 2113 myPipe[k].PitchY, 2114 myPipe[k].DCCMetaPitchY, 2115 myPipe[k].BlockWidthY, 2116 myPipe[k].BlockHeightY, 2117 2118 /* Output */ 2119 &MetaRowByteY[k], 2120 &PixelPTEBytesPerRowY[k], 2121 &dpte_row_width_luma_ub[k], 2122 &dpte_row_height_luma[k], 2123 &dpte_row_height_linear_luma[k], 2124 &PixelPTEBytesPerRowY_one_row_per_frame[k], 2125 &dpte_row_width_luma_ub_one_row_per_frame[k], 2126 &dpte_row_height_luma_one_row_per_frame[k], 2127 &meta_req_width[k], 2128 &meta_req_height[k], 2129 &meta_row_width[k], 2130 &meta_row_height[k], 2131 &PixelPTEReqWidthY[k], 2132 &PixelPTEReqHeightY[k], 2133 &PTERequestSizeY[k], 2134 &dpde0_bytes_per_frame_ub_l[k], 2135 &meta_pte_bytes_per_frame_ub_l[k]); 2136 2137 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( 2138 myPipe[k].VRatio, 2139 myPipe[k].VTaps, 2140 myPipe[k].InterlaceEnable, 2141 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2142 myPipe[k].SwathHeightY, 2143 myPipe[k].SourceRotation, 2144 myPipe[k].ViewportStationary, 2145 SwathWidthY[k], 2146 myPipe[k].ViewportHeight, 2147 myPipe[k].ViewportXStart, 2148 myPipe[k].ViewportYStart, 2149 2150 /* Output */ 2151 &VInitPreFillY[k], 2152 &MaxNumSwathY[k]); 2153 2154 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2155 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; 2156 2157 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && 2158 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { 2159 PTEBufferSizeNotExceeded[k] = true; 2160 } else { 2161 PTEBufferSizeNotExceeded[k] = false; 2162 } 2163 2164 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2165 PTEBufferSizeInRequestsForLuma[k] && 2166 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); 2167 } 2168 2169 dml32_CalculateMALLUseForStaticScreen( 2170 NumberOfActiveSurfaces, 2171 MALLAllocatedForDCN, 2172 UseMALLForStaticScreen, // mode 2173 SurfaceSizeInMALL, 2174 one_row_per_frame_fits_in_buffer, 2175 /* Output */ 2176 UsesMALLForStaticScreen); // boolen 2177 2178 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2179 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2180 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2181 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2182 (GPUVMMinPageSizeKBytes[k] > 64); 2183 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; 2184 } 2185 2186 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2187 #ifdef __DML_VBA_DEBUG__ 2188 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); 2189 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2190 #endif 2191 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2192 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2193 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2194 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); 2195 2196 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && 2197 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); 2198 2199 if (use_one_row_for_frame[k]) { 2200 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; 2201 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; 2202 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; 2203 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; 2204 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; 2205 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; 2206 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; 2207 } 2208 2209 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) 2210 DCCMetaBufferSizeNotExceeded[k] = true; 2211 else 2212 DCCMetaBufferSizeNotExceeded[k] = false; 2213 2214 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; 2215 if (use_one_row_for_frame[k]) 2216 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; 2217 2218 dml32_CalculateRowBandwidth( 2219 GPUVMEnable, 2220 myPipe[k].SourcePixelFormat, 2221 myPipe[k].VRatio, 2222 myPipe[k].VRatioChroma, 2223 myPipe[k].DCCEnable, 2224 myPipe[k].HTotal / myPipe[k].PixelClock, 2225 MetaRowByteY[k], MetaRowByteC[k], 2226 meta_row_height[k], 2227 meta_row_height_chroma[k], 2228 PixelPTEBytesPerRowY[k], 2229 PixelPTEBytesPerRowC[k], 2230 dpte_row_height_luma[k], 2231 dpte_row_height_chroma[k], 2232 2233 /* Output */ 2234 &meta_row_bw[k], 2235 &dpte_row_bw[k]); 2236 #ifdef __DML_VBA_DEBUG__ 2237 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); 2238 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", 2239 __func__, k, use_one_row_for_frame_flip[k]); 2240 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", 2241 __func__, k, UseMALLForPStateChange[k]); 2242 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); 2243 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", 2244 __func__, k, dpte_row_width_luma_ub[k]); 2245 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); 2246 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", 2247 __func__, k, dpte_row_height_chroma[k]); 2248 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", 2249 __func__, k, dpte_row_width_chroma_ub[k]); 2250 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); 2251 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); 2252 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", 2253 __func__, k, PTEBufferSizeNotExceeded[k]); 2254 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); 2255 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); 2256 #endif 2257 } 2258 } // CalculateVMRowAndSwath 2259 2260 unsigned int dml32_CalculateVMAndRowBytes( 2261 bool ViewportStationary, 2262 bool DCCEnable, 2263 unsigned int NumberOfDPPs, 2264 unsigned int BlockHeight256Bytes, 2265 unsigned int BlockWidth256Bytes, 2266 enum source_format_class SourcePixelFormat, 2267 unsigned int SurfaceTiling, 2268 unsigned int BytePerPixel, 2269 enum dm_rotation_angle SourceRotation, 2270 double SwathWidth, 2271 unsigned int ViewportHeight, 2272 unsigned int ViewportXStart, 2273 unsigned int ViewportYStart, 2274 bool GPUVMEnable, 2275 bool HostVMEnable, 2276 unsigned int HostVMMaxNonCachedPageTableLevels, 2277 unsigned int GPUVMMaxPageTableLevels, 2278 unsigned int GPUVMMinPageSizeKBytes, 2279 unsigned int HostVMMinPageSize, 2280 unsigned int PTEBufferSizeInRequests, 2281 unsigned int Pitch, 2282 unsigned int DCCMetaPitch, 2283 unsigned int MacroTileWidth, 2284 unsigned int MacroTileHeight, 2285 2286 /* Output */ 2287 unsigned int *MetaRowByte, 2288 unsigned int *PixelPTEBytesPerRow, 2289 unsigned int *dpte_row_width_ub, 2290 unsigned int *dpte_row_height, 2291 unsigned int *dpte_row_height_linear, 2292 unsigned int *PixelPTEBytesPerRow_one_row_per_frame, 2293 unsigned int *dpte_row_width_ub_one_row_per_frame, 2294 unsigned int *dpte_row_height_one_row_per_frame, 2295 unsigned int *MetaRequestWidth, 2296 unsigned int *MetaRequestHeight, 2297 unsigned int *meta_row_width, 2298 unsigned int *meta_row_height, 2299 unsigned int *PixelPTEReqWidth, 2300 unsigned int *PixelPTEReqHeight, 2301 unsigned int *PTERequestSize, 2302 unsigned int *DPDE0BytesFrame, 2303 unsigned int *MetaPTEBytesFrame) 2304 { 2305 unsigned int MPDEBytesFrame; 2306 unsigned int DCCMetaSurfaceBytes; 2307 unsigned int ExtraDPDEBytesFrame; 2308 unsigned int PDEAndMetaPTEBytesFrame; 2309 unsigned int HostVMDynamicLevels = 0; 2310 unsigned int MacroTileSizeBytes; 2311 unsigned int vp_height_meta_ub; 2312 unsigned int vp_height_dpte_ub; 2313 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this 2314 2315 if (GPUVMEnable == true && HostVMEnable == true) { 2316 if (HostVMMinPageSize < 2048) 2317 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 2318 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 2319 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 2320 else 2321 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 2322 } 2323 2324 *MetaRequestHeight = 8 * BlockHeight256Bytes; 2325 *MetaRequestWidth = 8 * BlockWidth256Bytes; 2326 if (SurfaceTiling == dm_sw_linear) { 2327 *meta_row_height = 32; 2328 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) 2329 - dml_floor(ViewportXStart, *MetaRequestWidth); 2330 } else if (!IsVertical(SourceRotation)) { 2331 *meta_row_height = *MetaRequestHeight; 2332 if (ViewportStationary && NumberOfDPPs == 1) { 2333 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, 2334 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); 2335 } else { 2336 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 2337 } 2338 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 2339 } else { 2340 *meta_row_height = *MetaRequestWidth; 2341 if (ViewportStationary && NumberOfDPPs == 1) { 2342 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, 2343 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); 2344 } else { 2345 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 2346 } 2347 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 2348 } 2349 2350 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2351 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 2352 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); 2353 } else if (!IsVertical(SourceRotation)) { 2354 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2355 } else { 2356 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2357 } 2358 2359 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; 2360 2361 if (GPUVMEnable == true) { 2362 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / 2363 (8 * 4.0 * 1024), 1) + 1) * 64; 2364 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); 2365 } else { 2366 *MetaPTEBytesFrame = 0; 2367 MPDEBytesFrame = 0; 2368 } 2369 2370 if (DCCEnable != true) { 2371 *MetaPTEBytesFrame = 0; 2372 MPDEBytesFrame = 0; 2373 *MetaRowByte = 0; 2374 } 2375 2376 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; 2377 2378 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { 2379 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2380 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + 2381 MacroTileHeight - 1, MacroTileHeight) - 2382 dml_floor(ViewportYStart, MacroTileHeight); 2383 } else if (!IsVertical(SourceRotation)) { 2384 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; 2385 } else { 2386 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; 2387 } 2388 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / 2389 (8 * 2097152), 1) + 1); 2390 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); 2391 } else { 2392 *DPDE0BytesFrame = 0; 2393 ExtraDPDEBytesFrame = 0; 2394 vp_height_dpte_ub = 0; 2395 } 2396 2397 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2398 2399 #ifdef __DML_VBA_DEBUG__ 2400 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 2401 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 2402 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); 2403 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); 2404 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); 2405 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); 2406 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); 2407 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); 2408 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); 2409 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2410 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2411 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2412 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2413 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2414 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); 2415 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); 2416 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); 2417 #endif 2418 2419 if (HostVMEnable == true) 2420 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2421 2422 if (SurfaceTiling == dm_sw_linear) { 2423 *PixelPTEReqHeight = 1; 2424 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2425 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2426 *PTERequestSize = 64; 2427 } else if (GPUVMMinPageSizeKBytes == 4) { 2428 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2429 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2430 *PTERequestSize = 128; 2431 } else { 2432 *PixelPTEReqHeight = MacroTileHeight; 2433 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); 2434 *PTERequestSize = 64; 2435 } 2436 #ifdef __DML_VBA_DEBUG__ 2437 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2438 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); 2439 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); 2440 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); 2441 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); 2442 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); 2443 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); 2444 #endif 2445 2446 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; 2447 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / 2448 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * 2449 (double) *PixelPTEReqWidth; 2450 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * 2451 *PTERequestSize; 2452 2453 if (SurfaceTiling == dm_sw_linear) { 2454 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2455 *PixelPTEReqWidth / Pitch), 1)); 2456 #ifdef __DML_VBA_DEBUG__ 2457 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, 2458 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); 2459 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, 2460 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); 2461 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, 2462 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2463 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, 2464 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2465 *PixelPTEReqWidth / Pitch), 1)); 2466 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2467 #endif 2468 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), 2469 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; 2470 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; 2471 2472 // VBA_DELTA, VBA doesn't have programming value for pte row height linear. 2473 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2474 PixelPTEReqWidth_linear / Pitch), 1); 2475 if (*dpte_row_height_linear > 128) 2476 *dpte_row_height_linear = 128; 2477 2478 } else if (!IsVertical(SourceRotation)) { 2479 *dpte_row_height = *PixelPTEReqHeight; 2480 2481 if (GPUVMMinPageSizeKBytes > 64) { 2482 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / 2483 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2484 } else if (ViewportStationary && (NumberOfDPPs == 1)) { 2485 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + 2486 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - 2487 dml_floor(ViewportXStart, *PixelPTEReqWidth); 2488 } else { 2489 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * 2490 *PixelPTEReqWidth; 2491 } 2492 2493 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2494 } else { 2495 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); 2496 2497 if (ViewportStationary && (NumberOfDPPs == 1)) { 2498 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, 2499 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); 2500 } else { 2501 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) 2502 * *PixelPTEReqHeight; 2503 } 2504 2505 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2506 } 2507 2508 if (GPUVMEnable != true) 2509 *PixelPTEBytesPerRow = 0; 2510 if (HostVMEnable == true) 2511 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2512 2513 #ifdef __DML_VBA_DEBUG__ 2514 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2515 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2516 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); 2517 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); 2518 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); 2519 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); 2520 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); 2521 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", 2522 __func__, *dpte_row_width_ub_one_row_per_frame); 2523 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", 2524 __func__, *PixelPTEBytesPerRow_one_row_per_frame); 2525 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", 2526 *MetaPTEBytesFrame); 2527 #endif 2528 2529 return PDEAndMetaPTEBytesFrame; 2530 } // CalculateVMAndRowBytes 2531 2532 double dml32_CalculatePrefetchSourceLines( 2533 double VRatio, 2534 unsigned int VTaps, 2535 bool Interlace, 2536 bool ProgressiveToInterlaceUnitInOPP, 2537 unsigned int SwathHeight, 2538 enum dm_rotation_angle SourceRotation, 2539 bool ViewportStationary, 2540 double SwathWidth, 2541 unsigned int ViewportHeight, 2542 unsigned int ViewportXStart, 2543 unsigned int ViewportYStart, 2544 2545 /* Output */ 2546 double *VInitPreFill, 2547 unsigned int *MaxNumSwath) 2548 { 2549 2550 unsigned int vp_start_rot; 2551 unsigned int sw0_tmp; 2552 unsigned int MaxPartialSwath; 2553 double numLines; 2554 2555 #ifdef __DML_VBA_DEBUG__ 2556 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 2557 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); 2558 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); 2559 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); 2560 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); 2561 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 2562 #endif 2563 if (ProgressiveToInterlaceUnitInOPP) 2564 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); 2565 else 2566 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 2567 2568 if (ViewportStationary) { 2569 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { 2570 vp_start_rot = SwathHeight - 2571 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); 2572 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { 2573 vp_start_rot = ViewportXStart; 2574 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { 2575 vp_start_rot = SwathHeight - 2576 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); 2577 } else { 2578 vp_start_rot = ViewportYStart; 2579 } 2580 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); 2581 if (sw0_tmp < *VInitPreFill) 2582 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; 2583 else 2584 *MaxNumSwath = 1; 2585 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); 2586 } else { 2587 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; 2588 if (*VInitPreFill > 1) 2589 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); 2590 else 2591 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); 2592 } 2593 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; 2594 2595 #ifdef __DML_VBA_DEBUG__ 2596 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); 2597 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); 2598 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 2599 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 2600 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); 2601 #endif 2602 return numLines; 2603 2604 } // CalculatePrefetchSourceLines 2605 2606 void dml32_CalculateMALLUseForStaticScreen( 2607 unsigned int NumberOfActiveSurfaces, 2608 unsigned int MALLAllocatedForDCNFinal, 2609 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, 2610 unsigned int SurfaceSizeInMALL[], 2611 bool one_row_per_frame_fits_in_buffer[], 2612 2613 /* output */ 2614 bool UsesMALLForStaticScreen[]) 2615 { 2616 unsigned int k; 2617 unsigned int SurfaceToAddToMALL; 2618 bool CanAddAnotherSurfaceToMALL; 2619 unsigned int TotalSurfaceSizeInMALL; 2620 2621 TotalSurfaceSizeInMALL = 0; 2622 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2623 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); 2624 if (UsesMALLForStaticScreen[k]) 2625 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 2626 #ifdef __DML_VBA_DEBUG__ 2627 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2628 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); 2629 #endif 2630 } 2631 2632 SurfaceToAddToMALL = 0; 2633 CanAddAnotherSurfaceToMALL = true; 2634 while (CanAddAnotherSurfaceToMALL) { 2635 CanAddAnotherSurfaceToMALL = false; 2636 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2637 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && 2638 !UsesMALLForStaticScreen[k] && 2639 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && 2640 one_row_per_frame_fits_in_buffer[k] && 2641 (!CanAddAnotherSurfaceToMALL || 2642 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { 2643 CanAddAnotherSurfaceToMALL = true; 2644 SurfaceToAddToMALL = k; 2645 #ifdef __DML_VBA_DEBUG__ 2646 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", 2647 __func__, k, UseMALLForStaticScreen[k]); 2648 #endif 2649 } 2650 } 2651 if (CanAddAnotherSurfaceToMALL) { 2652 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; 2653 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; 2654 2655 #ifdef __DML_VBA_DEBUG__ 2656 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); 2657 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); 2658 #endif 2659 2660 } 2661 } 2662 } 2663 2664 void dml32_CalculateRowBandwidth( 2665 bool GPUVMEnable, 2666 enum source_format_class SourcePixelFormat, 2667 double VRatio, 2668 double VRatioChroma, 2669 bool DCCEnable, 2670 double LineTime, 2671 unsigned int MetaRowByteLuma, 2672 unsigned int MetaRowByteChroma, 2673 unsigned int meta_row_height_luma, 2674 unsigned int meta_row_height_chroma, 2675 unsigned int PixelPTEBytesPerRowLuma, 2676 unsigned int PixelPTEBytesPerRowChroma, 2677 unsigned int dpte_row_height_luma, 2678 unsigned int dpte_row_height_chroma, 2679 /* Output */ 2680 double *meta_row_bw, 2681 double *dpte_row_bw) 2682 { 2683 if (DCCEnable != true) { 2684 *meta_row_bw = 0; 2685 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2686 SourcePixelFormat == dm_rgbe_alpha) { 2687 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * 2688 MetaRowByteChroma / (meta_row_height_chroma * LineTime); 2689 } else { 2690 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 2691 } 2692 2693 if (GPUVMEnable != true) { 2694 *dpte_row_bw = 0; 2695 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2696 SourcePixelFormat == dm_rgbe_alpha) { 2697 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + 2698 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 2699 } else { 2700 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 2701 } 2702 } 2703 2704 double dml32_CalculateUrgentLatency( 2705 double UrgentLatencyPixelDataOnly, 2706 double UrgentLatencyPixelMixedWithVMData, 2707 double UrgentLatencyVMDataOnly, 2708 bool DoUrgentLatencyAdjustment, 2709 double UrgentLatencyAdjustmentFabricClockComponent, 2710 double UrgentLatencyAdjustmentFabricClockReference, 2711 double FabricClock) 2712 { 2713 double ret; 2714 2715 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 2716 if (DoUrgentLatencyAdjustment == true) { 2717 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * 2718 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 2719 } 2720 return ret; 2721 } 2722 2723 void dml32_CalculateUrgentBurstFactor( 2724 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 2725 unsigned int swath_width_luma_ub, 2726 unsigned int swath_width_chroma_ub, 2727 unsigned int SwathHeightY, 2728 unsigned int SwathHeightC, 2729 double LineTime, 2730 double UrgentLatency, 2731 double CursorBufferSize, 2732 unsigned int CursorWidth, 2733 unsigned int CursorBPP, 2734 double VRatio, 2735 double VRatioC, 2736 double BytePerPixelInDETY, 2737 double BytePerPixelInDETC, 2738 unsigned int DETBufferSizeY, 2739 unsigned int DETBufferSizeC, 2740 /* Output */ 2741 double *UrgentBurstFactorCursor, 2742 double *UrgentBurstFactorLuma, 2743 double *UrgentBurstFactorChroma, 2744 bool *NotEnoughUrgentLatencyHiding) 2745 { 2746 double LinesInDETLuma; 2747 double LinesInDETChroma; 2748 unsigned int LinesInCursorBuffer; 2749 double CursorBufferSizeInTime; 2750 double DETBufferSizeInTimeLuma; 2751 double DETBufferSizeInTimeChroma; 2752 2753 *NotEnoughUrgentLatencyHiding = 0; 2754 2755 if (CursorWidth > 0) { 2756 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / 2757 (CursorWidth * CursorBPP / 8.0)), 1.0); 2758 if (VRatio > 0) { 2759 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 2760 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 2761 *NotEnoughUrgentLatencyHiding = 1; 2762 *UrgentBurstFactorCursor = 0; 2763 } else { 2764 *UrgentBurstFactorCursor = CursorBufferSizeInTime / 2765 (CursorBufferSizeInTime - UrgentLatency); 2766 } 2767 } else { 2768 *UrgentBurstFactorCursor = 1; 2769 } 2770 } 2771 2772 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : 2773 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; 2774 2775 if (VRatio > 0) { 2776 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 2777 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 2778 *NotEnoughUrgentLatencyHiding = 1; 2779 *UrgentBurstFactorLuma = 0; 2780 } else { 2781 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 2782 } 2783 } else { 2784 *UrgentBurstFactorLuma = 1; 2785 } 2786 2787 if (BytePerPixelInDETC > 0) { 2788 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 2789 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC 2790 / swath_width_chroma_ub; 2791 2792 if (VRatio > 0) { 2793 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 2794 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 2795 *NotEnoughUrgentLatencyHiding = 1; 2796 *UrgentBurstFactorChroma = 0; 2797 } else { 2798 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma 2799 / (DETBufferSizeInTimeChroma - UrgentLatency); 2800 } 2801 } else { 2802 *UrgentBurstFactorChroma = 1; 2803 } 2804 } 2805 } // CalculateUrgentBurstFactor 2806 2807 void dml32_CalculateDCFCLKDeepSleep( 2808 unsigned int NumberOfActiveSurfaces, 2809 unsigned int BytePerPixelY[], 2810 unsigned int BytePerPixelC[], 2811 double VRatio[], 2812 double VRatioChroma[], 2813 double SwathWidthY[], 2814 double SwathWidthC[], 2815 unsigned int DPPPerSurface[], 2816 double HRatio[], 2817 double HRatioChroma[], 2818 double PixelClock[], 2819 double PSCL_THROUGHPUT[], 2820 double PSCL_THROUGHPUT_CHROMA[], 2821 double Dppclk[], 2822 double ReadBandwidthLuma[], 2823 double ReadBandwidthChroma[], 2824 unsigned int ReturnBusWidth, 2825 2826 /* Output */ 2827 double *DCFClkDeepSleep) 2828 { 2829 unsigned int k; 2830 double DisplayPipeLineDeliveryTimeLuma; 2831 double DisplayPipeLineDeliveryTimeChroma; 2832 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; 2833 double ReadBandwidth = 0.0; 2834 2835 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2836 2837 if (VRatio[k] <= 1) { 2838 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] 2839 / PixelClock[k]; 2840 } else { 2841 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 2842 } 2843 if (BytePerPixelC[k] == 0) { 2844 DisplayPipeLineDeliveryTimeChroma = 0; 2845 } else { 2846 if (VRatioChroma[k] <= 1) { 2847 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * 2848 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 2849 } else { 2850 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] 2851 / Dppclk[k]; 2852 } 2853 } 2854 2855 if (BytePerPixelC[k] > 0) { 2856 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * 2857 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 2858 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 2859 32.0 / DisplayPipeLineDeliveryTimeChroma); 2860 } else { 2861 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 2862 64.0 / DisplayPipeLineDeliveryTimeLuma; 2863 } 2864 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); 2865 2866 #ifdef __DML_VBA_DEBUG__ 2867 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); 2868 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 2869 #endif 2870 } 2871 2872 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2873 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 2874 2875 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); 2876 2877 #ifdef __DML_VBA_DEBUG__ 2878 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); 2879 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); 2880 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); 2881 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); 2882 #endif 2883 2884 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2885 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 2886 #ifdef __DML_VBA_DEBUG__ 2887 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 2888 #endif 2889 } // CalculateDCFCLKDeepSleep 2890 2891 double dml32_CalculateWriteBackDelay( 2892 enum source_format_class WritebackPixelFormat, 2893 double WritebackHRatio, 2894 double WritebackVRatio, 2895 unsigned int WritebackVTaps, 2896 unsigned int WritebackDestinationWidth, 2897 unsigned int WritebackDestinationHeight, 2898 unsigned int WritebackSourceHeight, 2899 unsigned int HTotal) 2900 { 2901 double CalculateWriteBackDelay; 2902 double Line_length; 2903 double Output_lines_last_notclamped; 2904 double WritebackVInit; 2905 2906 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 2907 Line_length = dml_max((double) WritebackDestinationWidth, 2908 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); 2909 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - 2910 dml_ceil(((double)WritebackSourceHeight - 2911 (double) WritebackVInit) / (double)WritebackVRatio, 1.0); 2912 if (Output_lines_last_notclamped < 0) { 2913 CalculateWriteBackDelay = 0; 2914 } else { 2915 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + 2916 (HTotal - WritebackDestinationWidth) + 80; 2917 } 2918 return CalculateWriteBackDelay; 2919 } 2920 2921 void dml32_UseMinimumDCFCLK( 2922 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 2923 bool DRRDisplay[], 2924 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 2925 unsigned int MaxInterDCNTileRepeaters, 2926 unsigned int MaxPrefetchMode, 2927 double DRAMClockChangeLatencyFinal, 2928 double FCLKChangeLatency, 2929 double SREnterPlusExitTime, 2930 unsigned int ReturnBusWidth, 2931 unsigned int RoundTripPingLatencyCycles, 2932 unsigned int ReorderingBytes, 2933 unsigned int PixelChunkSizeInKByte, 2934 unsigned int MetaChunkSize, 2935 bool GPUVMEnable, 2936 unsigned int GPUVMMaxPageTableLevels, 2937 bool HostVMEnable, 2938 unsigned int NumberOfActiveSurfaces, 2939 double HostVMMinPageSize, 2940 unsigned int HostVMMaxNonCachedPageTableLevels, 2941 bool DynamicMetadataVMEnabled, 2942 bool ImmediateFlipRequirement, 2943 bool ProgressiveToInterlaceUnitInOPP, 2944 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, 2945 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, 2946 unsigned int VTotal[], 2947 unsigned int VActive[], 2948 unsigned int DynamicMetadataTransmittedBytes[], 2949 unsigned int DynamicMetadataLinesBeforeActiveRequired[], 2950 bool Interlace[], 2951 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], 2952 double RequiredDISPCLK[][2], 2953 double UrgLatency[], 2954 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 2955 double ProjectedDCFClkDeepSleep[][2], 2956 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 2957 unsigned int TotalNumberOfActiveDPP[][2], 2958 unsigned int TotalNumberOfDCCActiveDPP[][2], 2959 unsigned int dpte_group_bytes[], 2960 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 2961 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 2962 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 2963 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 2964 unsigned int BytePerPixelY[], 2965 unsigned int BytePerPixelC[], 2966 unsigned int HTotal[], 2967 double PixelClock[], 2968 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 2969 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 2970 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 2971 bool DynamicMetadataEnable[], 2972 double ReadBandwidthLuma[], 2973 double ReadBandwidthChroma[], 2974 double DCFCLKPerState[], 2975 /* Output */ 2976 double DCFCLKState[][2]) 2977 { 2978 unsigned int i, j, k; 2979 unsigned int dummy1; 2980 double dummy2, dummy3; 2981 double NormalEfficiency; 2982 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 2983 2984 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; 2985 for (i = 0; i < DC__VOLTAGE_STATES; ++i) { 2986 for (j = 0; j <= 1; ++j) { 2987 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 2988 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 2989 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; 2990 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 2991 double MinimumTWait = 0.0; 2992 double DPTEBandwidth; 2993 double DCFCLKRequiredForAverageBandwidth; 2994 unsigned int ExtraLatencyBytes; 2995 double ExtraLatencyCycles; 2996 double DCFCLKRequiredForPeakBandwidth; 2997 unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; 2998 double MinimumTvmPlus2Tr0; 2999 3000 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 3001 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3002 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 3003 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] 3004 / (15.75 * HTotal[k] / PixelClock[k]); 3005 } 3006 3007 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) 3008 NoOfDPPState[k] = NoOfDPP[i][j][k]; 3009 3010 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; 3011 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); 3012 3013 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, 3014 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, 3015 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, 3016 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, 3017 HostVMMaxNonCachedPageTableLevels); 3018 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ 3019 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 3020 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3021 double DCFCLKCyclesRequiredInPrefetch; 3022 double PrefetchTime; 3023 3024 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] 3025 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 3026 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] 3027 * BytePerPixelC[k]) / NormalEfficiency 3028 / ReturnBusWidth; 3029 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 3030 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency 3031 / NormalEfficiency / ReturnBusWidth 3032 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 3033 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency 3034 / ReturnBusWidth 3035 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth 3036 + PixelDCFCLKCyclesRequiredInPrefetch[k]; 3037 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) 3038 * HTotal[k] / PixelClock[k]; 3039 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && 3040 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 3041 UrgLatency[i] * GPUVMMaxPageTableLevels * 3042 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 3043 3044 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, 3045 UseMALLForPStateChange[k], 3046 SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3047 DRRDisplay[k], 3048 DRAMClockChangeLatencyFinal, 3049 FCLKChangeLatency, 3050 UrgLatency[i], 3051 SREnterPlusExitTime); 3052 3053 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - 3054 MinimumTWait - UrgLatency[i] * 3055 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : 3056 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? 3057 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - 3058 DynamicMetadataVMExtraLatency[k]; 3059 3060 if (PrefetchTime > 0) { 3061 double ExpectedVRatioPrefetch; 3062 3063 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * 3064 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3065 DCFCLKCyclesRequiredInPrefetch); 3066 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * 3067 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3068 PrefetchPixelLinesTime[k] * 3069 dml_max(1.0, ExpectedVRatioPrefetch) * 3070 dml_max(1.0, ExpectedVRatioPrefetch / 4); 3071 if (HostVMEnable == true || ImmediateFlipRequirement == true) { 3072 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3073 DCFCLKRequiredForPeakBandwidthPerSurface[k] + 3074 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / 3075 NormalEfficiency / ReturnBusWidth; 3076 } 3077 } else { 3078 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3079 } 3080 if (DynamicMetadataEnable[k] == true) { 3081 double TSetupPipe; 3082 double TdmbfPipe; 3083 double TdmsksPipe; 3084 double TdmecPipe; 3085 double AllowedTimeForUrgentExtraLatency; 3086 3087 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3088 MaxInterDCNTileRepeaters, 3089 RequiredDPPCLKPerSurface[i][j][k], 3090 RequiredDISPCLK[i][j], 3091 ProjectedDCFClkDeepSleep[i][j], 3092 PixelClock[k], 3093 HTotal[k], 3094 VTotal[k] - VActive[k], 3095 DynamicMetadataTransmittedBytes[k], 3096 DynamicMetadataLinesBeforeActiveRequired[k], 3097 Interlace[k], 3098 ProgressiveToInterlaceUnitInOPP, 3099 3100 /* output */ 3101 &TSetupPipe, 3102 &TdmbfPipe, 3103 &TdmecPipe, 3104 &TdmsksPipe, 3105 &dummy1, 3106 &dummy2, 3107 &dummy3); 3108 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / 3109 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - 3110 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 3111 if (AllowedTimeForUrgentExtraLatency > 0) 3112 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3113 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], 3114 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 3115 else 3116 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3117 } 3118 } 3119 DCFCLKRequiredForPeakBandwidth = 0; 3120 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { 3121 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + 3122 DCFCLKRequiredForPeakBandwidthPerSurface[k]; 3123 } 3124 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? 3125 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * 3126 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); 3127 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3128 double MaximumTvmPlus2Tr0PlusTsw; 3129 3130 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / 3131 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 3132 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 3133 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 3134 } else { 3135 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 3136 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - 3137 MinimumTvmPlus2Tr0 - 3138 PrefetchPixelLinesTime[k] / 4), 3139 (2 * ExtraLatencyCycles + 3140 PixelDCFCLKCyclesRequiredInPrefetch[k]) / 3141 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 3142 } 3143 } 3144 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * 3145 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 3146 } 3147 } 3148 } 3149 3150 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, 3151 unsigned int TotalNumberOfActiveDPP, 3152 unsigned int PixelChunkSizeInKByte, 3153 unsigned int TotalNumberOfDCCActiveDPP, 3154 unsigned int MetaChunkSize, 3155 bool GPUVMEnable, 3156 bool HostVMEnable, 3157 unsigned int NumberOfActiveSurfaces, 3158 unsigned int NumberOfDPP[], 3159 unsigned int dpte_group_bytes[], 3160 double HostVMInefficiencyFactor, 3161 double HostVMMinPageSize, 3162 unsigned int HostVMMaxNonCachedPageTableLevels) 3163 { 3164 unsigned int k; 3165 double ret; 3166 unsigned int HostVMDynamicLevels; 3167 3168 if (GPUVMEnable == true && HostVMEnable == true) { 3169 if (HostVMMinPageSize < 2048) 3170 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 3171 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 3172 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 3173 else 3174 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 3175 } else { 3176 HostVMDynamicLevels = 0; 3177 } 3178 3179 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + 3180 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 3181 3182 if (GPUVMEnable == true) { 3183 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3184 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * 3185 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 3186 } 3187 } 3188 return ret; 3189 } 3190 3191 void dml32_CalculateVUpdateAndDynamicMetadataParameters( 3192 unsigned int MaxInterDCNTileRepeaters, 3193 double Dppclk, 3194 double Dispclk, 3195 double DCFClkDeepSleep, 3196 double PixelClock, 3197 unsigned int HTotal, 3198 unsigned int VBlank, 3199 unsigned int DynamicMetadataTransmittedBytes, 3200 unsigned int DynamicMetadataLinesBeforeActiveRequired, 3201 unsigned int InterlaceEnable, 3202 bool ProgressiveToInterlaceUnitInOPP, 3203 3204 /* output */ 3205 double *TSetup, 3206 double *Tdmbf, 3207 double *Tdmec, 3208 double *Tdmsks, 3209 unsigned int *VUpdateOffsetPix, 3210 double *VUpdateWidthPix, 3211 double *VReadyOffsetPix) 3212 { 3213 double TotalRepeaterDelayTime; 3214 3215 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); 3216 *VUpdateWidthPix = 3217 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); 3218 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, 3219 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); 3220 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); 3221 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3222 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; 3223 *Tdmec = HTotal / PixelClock; 3224 3225 if (DynamicMetadataLinesBeforeActiveRequired == 0) 3226 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3227 else 3228 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3229 3230 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) 3231 *Tdmsks = *Tdmsks / 2; 3232 #ifdef __DML_VBA_DEBUG__ 3233 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3234 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3235 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3236 3237 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", 3238 __func__, DynamicMetadataLinesBeforeActiveRequired); 3239 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); 3240 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); 3241 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); 3242 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); 3243 #endif 3244 } 3245 3246 double dml32_CalculateTWait( 3247 unsigned int PrefetchMode, 3248 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 3249 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3250 bool DRRDisplay, 3251 double DRAMClockChangeLatency, 3252 double FCLKChangeLatency, 3253 double UrgentLatency, 3254 double SREnterPlusExitTime) 3255 { 3256 double TWait = 0.0; 3257 3258 if (PrefetchMode == 0 && 3259 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && 3260 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && 3261 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && 3262 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { 3263 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3264 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3265 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3266 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3267 TWait = dml_max(SREnterPlusExitTime, UrgentLatency); 3268 } else { 3269 TWait = UrgentLatency; 3270 } 3271 3272 #ifdef __DML_VBA_DEBUG__ 3273 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); 3274 dml_print("DML::%s: TWait = %f\n", __func__, TWait); 3275 #endif 3276 return TWait; 3277 } // CalculateTWait 3278 3279 // Function: get_return_bw_mbps 3280 // Megabyte per second 3281 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, 3282 const int VoltageLevel, 3283 const bool HostVMEnable, 3284 const double DCFCLK, 3285 const double FabricClock, 3286 const double DRAMSpeed) 3287 { 3288 double ReturnBW = 0.; 3289 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; 3290 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; 3291 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; 3292 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3293 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3294 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3295 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3296 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3297 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3298 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3299 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3300 3301 if (HostVMEnable != true) 3302 ReturnBW = PixelDataOnlyReturnBW; 3303 else 3304 ReturnBW = PixelMixedWithVMDataReturnBW; 3305 3306 #ifdef __DML_VBA_DEBUG__ 3307 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3308 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); 3309 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3310 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3311 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3312 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); 3313 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); 3314 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); 3315 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); 3316 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); 3317 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); 3318 #endif 3319 return ReturnBW; 3320 } 3321 3322 // Function: get_return_bw_mbps_vm_only 3323 // Megabyte per second 3324 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, 3325 const int VoltageLevel, 3326 const double DCFCLK, 3327 const double FabricClock, 3328 const double DRAMSpeed) 3329 { 3330 double VMDataOnlyReturnBW = dml_min3( 3331 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3332 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes 3333 * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3334 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes 3335 * (VoltageLevel < 2 ? 3336 soc->pct_ideal_dram_bw_after_urgent_strobe : 3337 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); 3338 #ifdef __DML_VBA_DEBUG__ 3339 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3340 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3341 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3342 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3343 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 3344 #endif 3345 return VMDataOnlyReturnBW; 3346 } 3347 3348 double dml32_CalculateExtraLatency( 3349 unsigned int RoundTripPingLatencyCycles, 3350 unsigned int ReorderingBytes, 3351 double DCFCLK, 3352 unsigned int TotalNumberOfActiveDPP, 3353 unsigned int PixelChunkSizeInKByte, 3354 unsigned int TotalNumberOfDCCActiveDPP, 3355 unsigned int MetaChunkSize, 3356 double ReturnBW, 3357 bool GPUVMEnable, 3358 bool HostVMEnable, 3359 unsigned int NumberOfActiveSurfaces, 3360 unsigned int NumberOfDPP[], 3361 unsigned int dpte_group_bytes[], 3362 double HostVMInefficiencyFactor, 3363 double HostVMMinPageSize, 3364 unsigned int HostVMMaxNonCachedPageTableLevels) 3365 { 3366 double ExtraLatencyBytes; 3367 double ExtraLatency; 3368 3369 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( 3370 ReorderingBytes, 3371 TotalNumberOfActiveDPP, 3372 PixelChunkSizeInKByte, 3373 TotalNumberOfDCCActiveDPP, 3374 MetaChunkSize, 3375 GPUVMEnable, 3376 HostVMEnable, 3377 NumberOfActiveSurfaces, 3378 NumberOfDPP, 3379 dpte_group_bytes, 3380 HostVMInefficiencyFactor, 3381 HostVMMinPageSize, 3382 HostVMMaxNonCachedPageTableLevels); 3383 3384 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 3385 3386 #ifdef __DML_VBA_DEBUG__ 3387 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 3388 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 3389 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 3390 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 3391 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 3392 #endif 3393 3394 return ExtraLatency; 3395 } // CalculateExtraLatency 3396 3397 bool dml32_CalculatePrefetchSchedule( 3398 struct vba_vars_st *v, 3399 unsigned int k, 3400 double HostVMInefficiencyFactor, 3401 DmlPipe *myPipe, 3402 unsigned int DSCDelay, 3403 unsigned int DPP_RECOUT_WIDTH, 3404 unsigned int VStartup, 3405 unsigned int MaxVStartup, 3406 double UrgentLatency, 3407 double UrgentExtraLatency, 3408 double TCalc, 3409 unsigned int PDEAndMetaPTEBytesFrame, 3410 unsigned int MetaRowByte, 3411 unsigned int PixelPTEBytesPerRow, 3412 double PrefetchSourceLinesY, 3413 unsigned int SwathWidthY, 3414 unsigned int VInitPreFillY, 3415 unsigned int MaxNumSwathY, 3416 double PrefetchSourceLinesC, 3417 unsigned int SwathWidthC, 3418 unsigned int VInitPreFillC, 3419 unsigned int MaxNumSwathC, 3420 unsigned int swath_width_luma_ub, 3421 unsigned int swath_width_chroma_ub, 3422 unsigned int SwathHeightY, 3423 unsigned int SwathHeightC, 3424 double TWait, 3425 double TPreReq, 3426 /* Output */ 3427 double *DSTXAfterScaler, 3428 double *DSTYAfterScaler, 3429 double *DestinationLinesForPrefetch, 3430 double *PrefetchBandwidth, 3431 double *DestinationLinesToRequestVMInVBlank, 3432 double *DestinationLinesToRequestRowInVBlank, 3433 double *VRatioPrefetchY, 3434 double *VRatioPrefetchC, 3435 double *RequiredPrefetchPixDataBWLuma, 3436 double *RequiredPrefetchPixDataBWChroma, 3437 bool *NotEnoughTimeForDynamicMetadata, 3438 double *Tno_bw, 3439 double *prefetch_vmrow_bw, 3440 double *Tdmdl_vm, 3441 double *Tdmdl, 3442 double *TSetup, 3443 unsigned int *VUpdateOffsetPix, 3444 double *VUpdateWidthPix, 3445 double *VReadyOffsetPix) 3446 { 3447 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; 3448 bool MyError = false; 3449 unsigned int DPPCycles, DISPCLKCycles; 3450 double DSTTotalPixelsAfterScaler; 3451 double LineTime; 3452 double dst_y_prefetch_equ; 3453 double prefetch_bw_oto; 3454 double Tvm_oto; 3455 double Tr0_oto; 3456 double Tvm_oto_lines; 3457 double Tr0_oto_lines; 3458 double dst_y_prefetch_oto; 3459 double TimeForFetchingMetaPTE = 0; 3460 double TimeForFetchingRowInVBlank = 0; 3461 double LinesToRequestPrefetchPixelData = 0; 3462 unsigned int HostVMDynamicLevelsTrips; 3463 double trip_to_mem; 3464 double Tvm_trips; 3465 double Tr0_trips; 3466 double Tvm_trips_rounded; 3467 double Tr0_trips_rounded; 3468 double Lsw_oto; 3469 double Tpre_rounded; 3470 double prefetch_bw_equ; 3471 double Tvm_equ; 3472 double Tr0_equ; 3473 double Tdmbf; 3474 double Tdmec; 3475 double Tdmsks; 3476 double prefetch_sw_bytes; 3477 double bytes_pp; 3478 double dep_bytes; 3479 unsigned int max_vratio_pre = v->MaxVRatioPre; 3480 double min_Lsw; 3481 double Tsw_est1 = 0; 3482 double Tsw_est3 = 0; 3483 3484 if (v->GPUVMEnable == true && v->HostVMEnable == true) 3485 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3486 else 3487 HostVMDynamicLevelsTrips = 0; 3488 #ifdef __DML_VBA_DEBUG__ 3489 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); 3490 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); 3491 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); 3492 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", 3493 __func__, v->HostVMEnable, HostVMInefficiencyFactor); 3494 #endif 3495 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3496 v->MaxInterDCNTileRepeaters, 3497 myPipe->Dppclk, 3498 myPipe->Dispclk, 3499 myPipe->DCFClkDeepSleep, 3500 myPipe->PixelClock, 3501 myPipe->HTotal, 3502 myPipe->VBlank, 3503 v->DynamicMetadataTransmittedBytes[k], 3504 v->DynamicMetadataLinesBeforeActiveRequired[k], 3505 myPipe->InterlaceEnable, 3506 myPipe->ProgressiveToInterlaceUnitInOPP, 3507 TSetup, 3508 3509 /* output */ 3510 &Tdmbf, 3511 &Tdmec, 3512 &Tdmsks, 3513 VUpdateOffsetPix, 3514 VUpdateWidthPix, 3515 VReadyOffsetPix); 3516 3517 LineTime = myPipe->HTotal / myPipe->PixelClock; 3518 trip_to_mem = UrgentLatency; 3519 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 3520 3521 if (v->DynamicMetadataVMEnabled == true) 3522 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 3523 else 3524 *Tdmdl = TWait + UrgentExtraLatency; 3525 3526 #ifdef __DML_VBA_ALLOW_DELTA__ 3527 if (v->DynamicMetadataEnable[k] == false) 3528 *Tdmdl = 0.0; 3529 #endif 3530 3531 if (v->DynamicMetadataEnable[k] == true) { 3532 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 3533 *NotEnoughTimeForDynamicMetadata = true; 3534 #ifdef __DML_VBA_DEBUG__ 3535 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3536 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3537 __func__, Tdmbf); 3538 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3539 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3540 __func__, Tdmsks); 3541 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3542 __func__, *Tdmdl); 3543 #endif 3544 } else { 3545 *NotEnoughTimeForDynamicMetadata = false; 3546 } 3547 } else { 3548 *NotEnoughTimeForDynamicMetadata = false; 3549 } 3550 3551 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && 3552 v->GPUVMEnable == true ? TWait + Tvm_trips : 0); 3553 3554 if (myPipe->ScalerEnabled) 3555 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; 3556 else 3557 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; 3558 3559 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; 3560 3561 DISPCLKCycles = v->DISPCLKDelaySubtotal; 3562 3563 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3564 return true; 3565 3566 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * 3567 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3568 3569 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) 3570 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH 3571 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? 3572 myPipe->HActive / 2 : 0) 3573 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3574 3575 #ifdef __DML_VBA_DEBUG__ 3576 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 3577 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3578 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3579 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 3580 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3581 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3582 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); 3583 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); 3584 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 3585 #endif 3586 3587 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 3588 *DSTYAfterScaler = 1; 3589 else 3590 *DSTYAfterScaler = 0; 3591 3592 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3593 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3594 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3595 #ifdef __DML_VBA_DEBUG__ 3596 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3597 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); 3598 #endif 3599 3600 MyError = false; 3601 3602 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 3603 3604 if (v->GPUVMEnable == true) { 3605 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; 3606 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3607 if (v->GPUVMMaxPageTableLevels >= 3) { 3608 *Tno_bw = UrgentExtraLatency + trip_to_mem * 3609 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); 3610 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { 3611 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 3612 4.0 * LineTime; // VBA_ERROR 3613 *Tno_bw = UrgentExtraLatency; 3614 } else { 3615 *Tno_bw = 0; 3616 } 3617 } else if (myPipe->DCCEnable == true) { 3618 Tvm_trips_rounded = LineTime / 4.0; 3619 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3620 *Tno_bw = 0; 3621 } else { 3622 Tvm_trips_rounded = LineTime / 4.0; 3623 Tr0_trips_rounded = LineTime / 2.0; 3624 *Tno_bw = 0; 3625 } 3626 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); 3627 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); 3628 3629 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3630 || myPipe->SourcePixelFormat == dm_420_12) { 3631 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3632 } else { 3633 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3634 } 3635 3636 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3637 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3638 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3639 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 3640 3641 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 3642 min_Lsw = dml_max(min_Lsw, 1.0); 3643 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; 3644 3645 if (v->GPUVMEnable == true) { 3646 Tvm_oto = dml_max3( 3647 Tvm_trips, 3648 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 3649 LineTime / 4.0); 3650 } else 3651 Tvm_oto = LineTime / 4.0; 3652 3653 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3654 Tr0_oto = dml_max4( 3655 Tr0_trips, 3656 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 3657 (LineTime - Tvm_oto)/2.0, 3658 LineTime / 4.0); 3659 #ifdef __DML_VBA_DEBUG__ 3660 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3661 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); 3662 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); 3663 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); 3664 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); 3665 #endif 3666 } else 3667 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 3668 3669 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 3670 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 3671 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 3672 3673 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - 3674 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3675 3676 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__); 3677 #ifdef __DML_VBA_DEBUG__ 3678 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3679 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); 3680 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3681 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3682 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); 3683 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3684 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3685 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3686 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3687 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3688 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3689 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); 3690 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); 3691 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3692 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3693 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3694 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3695 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 3696 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 3697 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 3698 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 3699 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 3700 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); 3701 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); 3702 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); 3703 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); 3704 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); 3705 #endif 3706 3707 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 3708 Tpre_rounded = dst_y_prefetch_equ * LineTime; 3709 #ifdef __DML_VBA_DEBUG__ 3710 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); 3711 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); 3712 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3713 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3714 __func__, VStartup * LineTime); 3715 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3716 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3717 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 3718 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3719 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3720 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3721 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3722 __func__, *DSTYAfterScaler); 3723 #endif 3724 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3725 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3726 3727 if (prefetch_sw_bytes < dep_bytes) 3728 prefetch_sw_bytes = 2 * dep_bytes; 3729 3730 *PrefetchBandwidth = 0; 3731 *DestinationLinesToRequestVMInVBlank = 0; 3732 *DestinationLinesToRequestRowInVBlank = 0; 3733 *VRatioPrefetchY = 0; 3734 *VRatioPrefetchC = 0; 3735 *RequiredPrefetchPixDataBWLuma = 0; 3736 if (dst_y_prefetch_equ > 1 && 3737 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) { 3738 double PrefetchBandwidth1; 3739 double PrefetchBandwidth2; 3740 double PrefetchBandwidth3; 3741 double PrefetchBandwidth4; 3742 3743 if (Tpre_rounded - *Tno_bw > 0) { 3744 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3745 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3746 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 3747 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 3748 } else 3749 PrefetchBandwidth1 = 0; 3750 3751 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) 3752 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 3753 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3754 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3755 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 3756 } 3757 3758 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 3759 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / 3760 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 3761 else 3762 PrefetchBandwidth2 = 0; 3763 3764 if (Tpre_rounded - Tvm_trips_rounded > 0) { 3765 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3766 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 3767 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 3768 } else 3769 PrefetchBandwidth3 = 0; 3770 3771 3772 if (VStartup == MaxVStartup && 3773 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * 3774 LineTime - Tvm_trips_rounded > 0) { 3775 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3776 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 3777 } 3778 3779 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { 3780 PrefetchBandwidth4 = prefetch_sw_bytes / 3781 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 3782 } else { 3783 PrefetchBandwidth4 = 0; 3784 } 3785 3786 #ifdef __DML_VBA_DEBUG__ 3787 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 3788 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3789 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 3790 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); 3791 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); 3792 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3793 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3794 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 3795 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); 3796 #endif 3797 { 3798 bool Case1OK; 3799 bool Case2OK; 3800 bool Case3OK; 3801 3802 if (PrefetchBandwidth1 > 0) { 3803 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3804 >= Tvm_trips_rounded 3805 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3806 / PrefetchBandwidth1 >= Tr0_trips_rounded) { 3807 Case1OK = true; 3808 } else { 3809 Case1OK = false; 3810 } 3811 } else { 3812 Case1OK = false; 3813 } 3814 3815 if (PrefetchBandwidth2 > 0) { 3816 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3817 >= Tvm_trips_rounded 3818 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3819 / PrefetchBandwidth2 < Tr0_trips_rounded) { 3820 Case2OK = true; 3821 } else { 3822 Case2OK = false; 3823 } 3824 } else { 3825 Case2OK = false; 3826 } 3827 3828 if (PrefetchBandwidth3 > 0) { 3829 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3830 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3831 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3832 Tr0_trips_rounded) { 3833 Case3OK = true; 3834 } else { 3835 Case3OK = false; 3836 } 3837 } else { 3838 Case3OK = false; 3839 } 3840 3841 if (Case1OK) 3842 prefetch_bw_equ = PrefetchBandwidth1; 3843 else if (Case2OK) 3844 prefetch_bw_equ = PrefetchBandwidth2; 3845 else if (Case3OK) 3846 prefetch_bw_equ = PrefetchBandwidth3; 3847 else 3848 prefetch_bw_equ = PrefetchBandwidth4; 3849 3850 #ifdef __DML_VBA_DEBUG__ 3851 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3852 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3853 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3854 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 3855 #endif 3856 3857 if (prefetch_bw_equ > 0) { 3858 if (v->GPUVMEnable == true) { 3859 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3860 HostVMInefficiencyFactor / prefetch_bw_equ, 3861 Tvm_trips, LineTime / 4); 3862 } else { 3863 Tvm_equ = LineTime / 4; 3864 } 3865 3866 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3867 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3868 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, 3869 (LineTime - Tvm_equ) / 2, LineTime / 4); 3870 } else { 3871 Tr0_equ = (LineTime - Tvm_equ) / 2; 3872 } 3873 } else { 3874 Tvm_equ = 0; 3875 Tr0_equ = 0; 3876 #ifdef __DML_VBA_DEBUG__ 3877 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3878 #endif 3879 } 3880 } 3881 3882 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 3883 if (dst_y_prefetch_oto * LineTime < TPreReq) { 3884 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3885 } else { 3886 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 3887 } 3888 TimeForFetchingMetaPTE = Tvm_oto; 3889 TimeForFetchingRowInVBlank = Tr0_oto; 3890 *PrefetchBandwidth = prefetch_bw_oto; 3891 } else { 3892 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3893 TimeForFetchingMetaPTE = Tvm_equ; 3894 TimeForFetchingRowInVBlank = Tr0_equ; 3895 *PrefetchBandwidth = prefetch_bw_equ; 3896 } 3897 3898 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 3899 3900 *DestinationLinesToRequestRowInVBlank = 3901 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 3902 3903 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3904 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3905 3906 #ifdef __DML_VBA_DEBUG__ 3907 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3908 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3909 __func__, *DestinationLinesToRequestVMInVBlank); 3910 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 3911 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3912 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3913 __func__, *DestinationLinesToRequestRowInVBlank); 3914 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3915 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 3916 #endif 3917 3918 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { 3919 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 3920 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3921 #ifdef __DML_VBA_DEBUG__ 3922 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3923 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 3924 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3925 #endif 3926 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3927 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3928 *VRatioPrefetchY = 3929 dml_max((double) PrefetchSourceLinesY / 3930 LinesToRequestPrefetchPixelData, 3931 (double) MaxNumSwathY * SwathHeightY / 3932 (LinesToRequestPrefetchPixelData - 3933 (VInitPreFillY - 3.0) / 2.0)); 3934 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3935 } else { 3936 MyError = true; 3937 *VRatioPrefetchY = 0; 3938 } 3939 #ifdef __DML_VBA_DEBUG__ 3940 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3941 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3942 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 3943 #endif 3944 } 3945 3946 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 3947 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3948 3949 #ifdef __DML_VBA_DEBUG__ 3950 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3951 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 3952 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3953 #endif 3954 if ((SwathHeightC > 4)) { 3955 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3956 *VRatioPrefetchC = 3957 dml_max(*VRatioPrefetchC, 3958 (double) MaxNumSwathC * SwathHeightC / 3959 (LinesToRequestPrefetchPixelData - 3960 (VInitPreFillC - 3.0) / 2.0)); 3961 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3962 } else { 3963 MyError = true; 3964 *VRatioPrefetchC = 0; 3965 } 3966 #ifdef __DML_VBA_DEBUG__ 3967 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3968 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3969 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 3970 #endif 3971 } 3972 3973 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 3974 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3975 / LineTime; 3976 3977 #ifdef __DML_VBA_DEBUG__ 3978 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3979 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3980 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3981 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 3982 __func__, *RequiredPrefetchPixDataBWLuma); 3983 #endif 3984 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 3985 LinesToRequestPrefetchPixelData 3986 * myPipe->BytePerPixelC 3987 * swath_width_chroma_ub / LineTime; 3988 } else { 3989 MyError = true; 3990 #ifdef __DML_VBA_DEBUG__ 3991 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 3992 __func__, LinesToRequestPrefetchPixelData); 3993 #endif 3994 *VRatioPrefetchY = 0; 3995 *VRatioPrefetchC = 0; 3996 *RequiredPrefetchPixDataBWLuma = 0; 3997 *RequiredPrefetchPixDataBWChroma = 0; 3998 } 3999 #ifdef __DML_VBA_DEBUG__ 4000 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 4001 (double)LinesToRequestPrefetchPixelData * LineTime + 4002 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 4003 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 4004 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 4005 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 4006 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 4007 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - 4008 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 4009 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 4010 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 4011 PixelPTEBytesPerRow); 4012 #endif 4013 } else { 4014 MyError = true; 4015 #ifdef __DML_VBA_DEBUG__ 4016 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 4017 __func__, dst_y_prefetch_equ); 4018 #endif 4019 } 4020 4021 { 4022 double prefetch_vm_bw; 4023 double prefetch_row_bw; 4024 4025 if (PDEAndMetaPTEBytesFrame == 0) { 4026 prefetch_vm_bw = 0; 4027 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 4028 #ifdef __DML_VBA_DEBUG__ 4029 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 4030 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 4031 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 4032 __func__, *DestinationLinesToRequestVMInVBlank); 4033 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 4034 #endif 4035 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 4036 (*DestinationLinesToRequestVMInVBlank * LineTime); 4037 #ifdef __DML_VBA_DEBUG__ 4038 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 4039 #endif 4040 } else { 4041 prefetch_vm_bw = 0; 4042 MyError = true; 4043 #ifdef __DML_VBA_DEBUG__ 4044 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", 4045 __func__, *DestinationLinesToRequestVMInVBlank); 4046 #endif 4047 } 4048 4049 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 4050 prefetch_row_bw = 0; 4051 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 4052 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 4053 (*DestinationLinesToRequestRowInVBlank * LineTime); 4054 4055 #ifdef __DML_VBA_DEBUG__ 4056 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 4057 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 4058 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 4059 __func__, *DestinationLinesToRequestRowInVBlank); 4060 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 4061 #endif 4062 } else { 4063 prefetch_row_bw = 0; 4064 MyError = true; 4065 #ifdef __DML_VBA_DEBUG__ 4066 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", 4067 __func__, *DestinationLinesToRequestRowInVBlank); 4068 #endif 4069 } 4070 4071 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 4072 } 4073 4074 if (MyError) { 4075 *PrefetchBandwidth = 0; 4076 TimeForFetchingMetaPTE = 0; 4077 TimeForFetchingRowInVBlank = 0; 4078 *DestinationLinesToRequestVMInVBlank = 0; 4079 *DestinationLinesToRequestRowInVBlank = 0; 4080 *DestinationLinesForPrefetch = 0; 4081 LinesToRequestPrefetchPixelData = 0; 4082 *VRatioPrefetchY = 0; 4083 *VRatioPrefetchC = 0; 4084 *RequiredPrefetchPixDataBWLuma = 0; 4085 *RequiredPrefetchPixDataBWChroma = 0; 4086 } 4087 4088 return MyError; 4089 } // CalculatePrefetchSchedule 4090 4091 void dml32_CalculateFlipSchedule( 4092 double HostVMInefficiencyFactor, 4093 double UrgentExtraLatency, 4094 double UrgentLatency, 4095 unsigned int GPUVMMaxPageTableLevels, 4096 bool HostVMEnable, 4097 unsigned int HostVMMaxNonCachedPageTableLevels, 4098 bool GPUVMEnable, 4099 double HostVMMinPageSize, 4100 double PDEAndMetaPTEBytesPerFrame, 4101 double MetaRowBytes, 4102 double DPTEBytesPerRow, 4103 double BandwidthAvailableForImmediateFlip, 4104 unsigned int TotImmediateFlipBytes, 4105 enum source_format_class SourcePixelFormat, 4106 double LineTime, 4107 double VRatio, 4108 double VRatioChroma, 4109 double Tno_bw, 4110 bool DCCEnable, 4111 unsigned int dpte_row_height, 4112 unsigned int meta_row_height, 4113 unsigned int dpte_row_height_chroma, 4114 unsigned int meta_row_height_chroma, 4115 bool use_one_row_for_frame_flip, 4116 4117 /* Output */ 4118 double *DestinationLinesToRequestVMInImmediateFlip, 4119 double *DestinationLinesToRequestRowInImmediateFlip, 4120 double *final_flip_bw, 4121 bool *ImmediateFlipSupportedForPipe) 4122 { 4123 double min_row_time = 0.0; 4124 unsigned int HostVMDynamicLevelsTrips; 4125 double TimeForFetchingMetaPTEImmediateFlip; 4126 double TimeForFetchingRowInVBlankImmediateFlip; 4127 double ImmediateFlipBW; 4128 4129 if (GPUVMEnable == true && HostVMEnable == true) 4130 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 4131 else 4132 HostVMDynamicLevelsTrips = 0; 4133 4134 #ifdef __DML_VBA_DEBUG__ 4135 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); 4136 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 4137 #endif 4138 4139 if (TotImmediateFlipBytes > 0) { 4140 if (use_one_row_for_frame_flip) { 4141 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * 4142 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4143 } else { 4144 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * 4145 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4146 } 4147 if (GPUVMEnable == true) { 4148 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * 4149 HostVMInefficiencyFactor / ImmediateFlipBW, 4150 UrgentExtraLatency + UrgentLatency * 4151 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 4152 LineTime / 4.0); 4153 } else { 4154 TimeForFetchingMetaPTEImmediateFlip = 0; 4155 } 4156 if ((GPUVMEnable == true || DCCEnable == true)) { 4157 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 4158 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 4159 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); 4160 } else { 4161 TimeForFetchingRowInVBlankImmediateFlip = 0; 4162 } 4163 4164 *DestinationLinesToRequestVMInImmediateFlip = 4165 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; 4166 *DestinationLinesToRequestRowInImmediateFlip = 4167 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; 4168 4169 if (GPUVMEnable == true) { 4170 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / 4171 (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 4172 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4173 (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 4174 } else if ((GPUVMEnable == true || DCCEnable == true)) { 4175 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4176 (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 4177 } else { 4178 *final_flip_bw = 0; 4179 } 4180 } else { 4181 TimeForFetchingMetaPTEImmediateFlip = 0; 4182 TimeForFetchingRowInVBlankImmediateFlip = 0; 4183 *DestinationLinesToRequestVMInImmediateFlip = 0; 4184 *DestinationLinesToRequestRowInImmediateFlip = 0; 4185 *final_flip_bw = 0; 4186 } 4187 4188 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 4189 if (GPUVMEnable == true && DCCEnable != true) { 4190 min_row_time = dml_min(dpte_row_height * 4191 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 4192 } else if (GPUVMEnable != true && DCCEnable == true) { 4193 min_row_time = dml_min(meta_row_height * 4194 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 4195 } else { 4196 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * 4197 LineTime / VRatio, dpte_row_height_chroma * LineTime / 4198 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); 4199 } 4200 } else { 4201 if (GPUVMEnable == true && DCCEnable != true) { 4202 min_row_time = dpte_row_height * LineTime / VRatio; 4203 } else if (GPUVMEnable != true && DCCEnable == true) { 4204 min_row_time = meta_row_height * LineTime / VRatio; 4205 } else { 4206 min_row_time = 4207 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 4208 } 4209 } 4210 4211 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 4212 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip 4213 > min_row_time) { 4214 *ImmediateFlipSupportedForPipe = false; 4215 } else { 4216 *ImmediateFlipSupportedForPipe = true; 4217 } 4218 4219 #ifdef __DML_VBA_DEBUG__ 4220 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 4221 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 4222 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", 4223 __func__, *DestinationLinesToRequestVMInImmediateFlip); 4224 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", 4225 __func__, *DestinationLinesToRequestRowInImmediateFlip); 4226 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 4227 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", 4228 __func__, TimeForFetchingRowInVBlankImmediateFlip); 4229 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 4230 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 4231 #endif 4232 } // CalculateFlipSchedule 4233 4234 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 4235 struct vba_vars_st *v, 4236 unsigned int PrefetchMode, 4237 double DCFCLK, 4238 double ReturnBW, 4239 SOCParametersList mmSOCParameters, 4240 double SOCCLK, 4241 double DCFClkDeepSleep, 4242 unsigned int DETBufferSizeY[], 4243 unsigned int DETBufferSizeC[], 4244 unsigned int SwathHeightY[], 4245 unsigned int SwathHeightC[], 4246 double SwathWidthY[], 4247 double SwathWidthC[], 4248 unsigned int DPPPerSurface[], 4249 double BytePerPixelDETY[], 4250 double BytePerPixelDETC[], 4251 double DSTXAfterScaler[], 4252 double DSTYAfterScaler[], 4253 bool UnboundedRequestEnabled, 4254 unsigned int CompressedBufferSizeInkByte, 4255 4256 /* Output */ 4257 enum clock_change_support *DRAMClockChangeSupport, 4258 double MaxActiveDRAMClockChangeLatencySupported[], 4259 unsigned int SubViewportLinesNeededInMALL[], 4260 enum dm_fclock_change_support *FCLKChangeSupport, 4261 double *MinActiveFCLKChangeLatencySupported, 4262 bool *USRRetrainingSupport, 4263 double ActiveDRAMClockChangeLatencyMargin[]) 4264 { 4265 unsigned int i, j, k; 4266 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; 4267 unsigned int DRAMClockChangeSupportNumber = 0; 4268 unsigned int LastSurfaceWithoutMargin; 4269 unsigned int DRAMClockChangeMethod = 0; 4270 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4271 double MinActiveFCLKChangeMargin = 0.; 4272 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4273 double ActiveClockChangeLatencyHidingY; 4274 double ActiveClockChangeLatencyHidingC; 4275 double ActiveClockChangeLatencyHiding; 4276 double EffectiveDETBufferSizeY; 4277 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; 4278 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; 4279 double TotalPixelBW = 0.0; 4280 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; 4281 double EffectiveLBLatencyHidingY; 4282 double EffectiveLBLatencyHidingC; 4283 double LinesInDETY[DC__NUM_DPP__MAX]; 4284 double LinesInDETC[DC__NUM_DPP__MAX]; 4285 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 4286 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; 4287 double FullDETBufferingTimeY; 4288 double FullDETBufferingTimeC; 4289 double WritebackDRAMClockChangeLatencyMargin; 4290 double WritebackFCLKChangeLatencyMargin; 4291 double WritebackLatencyHiding; 4292 bool SameTimingForFCLKChange; 4293 4294 unsigned int TotalActiveWriteback = 0; 4295 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; 4296 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 4297 4298 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; 4299 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency 4300 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; 4301 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; 4302 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; 4303 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency 4304 + 10 / DCFClkDeepSleep; 4305 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency 4306 + 10 / DCFClkDeepSleep; 4307 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency 4308 + 10 / DCFClkDeepSleep; 4309 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time 4310 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; 4311 4312 #ifdef __DML_VBA_DEBUG__ 4313 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); 4314 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); 4315 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); 4316 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); 4317 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); 4318 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); 4319 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); 4320 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); 4321 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); 4322 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); 4323 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", 4324 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); 4325 #endif 4326 4327 4328 TotalActiveWriteback = 0; 4329 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4330 if (v->WritebackEnable[k] == true) 4331 TotalActiveWriteback = TotalActiveWriteback + 1; 4332 } 4333 4334 if (TotalActiveWriteback <= 1) { 4335 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; 4336 } else { 4337 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency 4338 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4339 } 4340 if (v->USRRetrainingRequiredFinal) 4341 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark 4342 + mmSOCParameters.USRRetrainingLatency; 4343 4344 if (TotalActiveWriteback <= 1) { 4345 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4346 + mmSOCParameters.WritebackLatency; 4347 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4348 + mmSOCParameters.WritebackLatency; 4349 } else { 4350 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4351 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4352 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4353 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; 4354 } 4355 4356 if (v->USRRetrainingRequiredFinal) 4357 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark 4358 + mmSOCParameters.USRRetrainingLatency; 4359 4360 if (v->USRRetrainingRequiredFinal) 4361 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark 4362 + mmSOCParameters.USRRetrainingLatency; 4363 4364 #ifdef __DML_VBA_DEBUG__ 4365 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", 4366 __func__, v->Watermark.WritebackDRAMClockChangeWatermark); 4367 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); 4368 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); 4369 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); 4370 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); 4371 #endif 4372 4373 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4374 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + 4375 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); 4376 } 4377 4378 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4379 4380 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 4381 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 4382 4383 4384 #ifdef __DML_VBA_DEBUG__ 4385 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); 4386 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); 4387 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); 4388 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); 4389 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); 4390 #endif 4391 4392 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 4393 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 4394 EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4395 4396 if (UnboundedRequestEnabled) { 4397 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 4398 + CompressedBufferSizeInkByte * 1024 4399 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) 4400 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 4401 } 4402 4403 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4404 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 4405 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 4406 4407 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 4408 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; 4409 4410 if (v->NumberOfActiveSurfaces > 1) { 4411 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY 4412 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] 4413 / v->PixelClock[k] / v->VRatio[k]; 4414 } 4415 4416 if (BytePerPixelDETC[k] > 0) { 4417 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4418 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); 4419 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) 4420 / v->VRatioChroma[k]; 4421 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 4422 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] 4423 / v->PixelClock[k]; 4424 if (v->NumberOfActiveSurfaces > 1) { 4425 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC 4426 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] 4427 / v->PixelClock[k] / v->VRatioChroma[k]; 4428 } 4429 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, 4430 ActiveClockChangeLatencyHidingC); 4431 } else { 4432 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; 4433 } 4434 4435 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4436 - v->Watermark.DRAMClockChangeWatermark; 4437 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4438 - v->Watermark.FCLKChangeWatermark; 4439 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; 4440 4441 if (v->WritebackEnable[k]) { 4442 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 4443 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4444 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 4445 if (v->WritebackPixelFormat[k] == dm_444_64) 4446 WritebackLatencyHiding = WritebackLatencyHiding / 2; 4447 4448 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding 4449 - v->Watermark.WritebackDRAMClockChangeWatermark; 4450 4451 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding 4452 - v->Watermark.WritebackFCLKChangeWatermark; 4453 4454 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], 4455 WritebackFCLKChangeLatencyMargin); 4456 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], 4457 WritebackDRAMClockChangeLatencyMargin); 4458 } 4459 MaxActiveDRAMClockChangeLatencySupported[k] = 4460 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 4461 0 : 4462 (ActiveDRAMClockChangeLatencyMargin[k] 4463 + mmSOCParameters.DRAMClockChangeLatency); 4464 } 4465 4466 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { 4467 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { 4468 if (i == j || 4469 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || 4470 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || 4471 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || 4472 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && 4473 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && 4474 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && 4475 (v->DRRDisplay[i] || v->DRRDisplay[j]))) { 4476 SynchronizedSurfaces[i][j] = true; 4477 } else { 4478 SynchronizedSurfaces[i][j] = false; 4479 } 4480 } 4481 } 4482 4483 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4484 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4485 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4486 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { 4487 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4488 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; 4489 SurfaceWithMinActiveFCLKChangeMargin = k; 4490 } 4491 } 4492 4493 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4494 4495 SameTimingForFCLKChange = true; 4496 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4497 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { 4498 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4499 (SameTimingForFCLKChange || 4500 ActiveFCLKChangeLatencyMargin[k] < 4501 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4502 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; 4503 } 4504 SameTimingForFCLKChange = false; 4505 } 4506 } 4507 4508 if (MinActiveFCLKChangeMargin > 0) { 4509 *FCLKChangeSupport = dm_fclock_change_vactive; 4510 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4511 (PrefetchMode <= 1)) { 4512 *FCLKChangeSupport = dm_fclock_change_vblank; 4513 } else { 4514 *FCLKChangeSupport = dm_fclock_change_unsupported; 4515 } 4516 4517 *USRRetrainingSupport = true; 4518 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4519 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4520 (USRRetrainingLatencyMargin[k] < 0)) { 4521 *USRRetrainingSupport = false; 4522 } 4523 } 4524 4525 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4526 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && 4527 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && 4528 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && 4529 ActiveDRAMClockChangeLatencyMargin[k] < 0) { 4530 if (PrefetchMode > 0) { 4531 DRAMClockChangeSupportNumber = 2; 4532 } else if (DRAMClockChangeSupportNumber == 0) { 4533 DRAMClockChangeSupportNumber = 1; 4534 LastSurfaceWithoutMargin = k; 4535 } else if (DRAMClockChangeSupportNumber == 1 && 4536 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { 4537 DRAMClockChangeSupportNumber = 2; 4538 } 4539 } 4540 } 4541 4542 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4543 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) 4544 DRAMClockChangeMethod = 1; 4545 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) 4546 DRAMClockChangeMethod = 2; 4547 } 4548 4549 if (DRAMClockChangeMethod == 0) { 4550 if (DRAMClockChangeSupportNumber == 0) 4551 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 4552 else if (DRAMClockChangeSupportNumber == 1) 4553 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 4554 else 4555 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4556 } else if (DRAMClockChangeMethod == 1) { 4557 if (DRAMClockChangeSupportNumber == 0) 4558 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; 4559 else if (DRAMClockChangeSupportNumber == 1) 4560 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; 4561 else 4562 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4563 } else { 4564 if (DRAMClockChangeSupportNumber == 0) 4565 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; 4566 else if (DRAMClockChangeSupportNumber == 1) 4567 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; 4568 else 4569 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4570 } 4571 4572 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4573 unsigned int dst_y_pstate; 4574 unsigned int src_y_pstate_l; 4575 unsigned int src_y_pstate_c; 4576 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; 4577 4578 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); 4579 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); 4580 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; 4581 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; 4582 4583 #ifdef __DML_VBA_DEBUG__ 4584 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 4585 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 4586 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 4587 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 4588 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); 4589 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); 4590 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); 4591 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); 4592 dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); 4593 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); 4594 #endif 4595 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; 4596 4597 if (BytePerPixelDETC[k] > 0) { 4598 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); 4599 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; 4600 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; 4601 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); 4602 4603 #ifdef __DML_VBA_DEBUG__ 4604 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); 4605 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); 4606 dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); 4607 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); 4608 #endif 4609 } 4610 } 4611 #ifdef __DML_VBA_DEBUG__ 4612 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); 4613 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); 4614 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", 4615 __func__, *MinActiveFCLKChangeLatencySupported); 4616 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); 4617 #endif 4618 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport 4619 4620 double dml32_CalculateWriteBackDISPCLK( 4621 enum source_format_class WritebackPixelFormat, 4622 double PixelClock, 4623 double WritebackHRatio, 4624 double WritebackVRatio, 4625 unsigned int WritebackHTaps, 4626 unsigned int WritebackVTaps, 4627 unsigned int WritebackSourceWidth, 4628 unsigned int WritebackDestinationWidth, 4629 unsigned int HTotal, 4630 unsigned int WritebackLineBufferSize, 4631 double DISPCLKDPPCLKVCOSpeed) 4632 { 4633 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 4634 4635 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 4636 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 4637 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * 4638 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 4639 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); 4640 } 4641 4642 void dml32_CalculateMinAndMaxPrefetchMode( 4643 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, 4644 unsigned int *MinPrefetchMode, 4645 unsigned int *MaxPrefetchMode) 4646 { 4647 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { 4648 *MinPrefetchMode = 3; 4649 *MaxPrefetchMode = 3; 4650 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { 4651 *MinPrefetchMode = 2; 4652 *MaxPrefetchMode = 2; 4653 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { 4654 *MinPrefetchMode = 1; 4655 *MaxPrefetchMode = 1; 4656 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { 4657 *MinPrefetchMode = 0; 4658 *MaxPrefetchMode = 0; 4659 } else if (AllowForPStateChangeOrStutterInVBlankFinal == 4660 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) { 4661 *MinPrefetchMode = 0; 4662 *MaxPrefetchMode = 3; 4663 } else { 4664 *MinPrefetchMode = 0; 4665 *MaxPrefetchMode = 3; 4666 } 4667 } // CalculateMinAndMaxPrefetchMode 4668 4669 void dml32_CalculatePixelDeliveryTimes( 4670 unsigned int NumberOfActiveSurfaces, 4671 double VRatio[], 4672 double VRatioChroma[], 4673 double VRatioPrefetchY[], 4674 double VRatioPrefetchC[], 4675 unsigned int swath_width_luma_ub[], 4676 unsigned int swath_width_chroma_ub[], 4677 unsigned int DPPPerSurface[], 4678 double HRatio[], 4679 double HRatioChroma[], 4680 double PixelClock[], 4681 double PSCL_THROUGHPUT[], 4682 double PSCL_THROUGHPUT_CHROMA[], 4683 double Dppclk[], 4684 unsigned int BytePerPixelC[], 4685 enum dm_rotation_angle SourceRotation[], 4686 unsigned int NumberOfCursors[], 4687 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 4688 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 4689 unsigned int BlockWidth256BytesY[], 4690 unsigned int BlockHeight256BytesY[], 4691 unsigned int BlockWidth256BytesC[], 4692 unsigned int BlockHeight256BytesC[], 4693 4694 /* Output */ 4695 double DisplayPipeLineDeliveryTimeLuma[], 4696 double DisplayPipeLineDeliveryTimeChroma[], 4697 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 4698 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 4699 double DisplayPipeRequestDeliveryTimeLuma[], 4700 double DisplayPipeRequestDeliveryTimeChroma[], 4701 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 4702 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 4703 double CursorRequestDeliveryTime[], 4704 double CursorRequestDeliveryTimePrefetch[]) 4705 { 4706 double req_per_swath_ub; 4707 unsigned int k; 4708 4709 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4710 4711 #ifdef __DML_VBA_DEBUG__ 4712 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 4713 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 4714 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 4715 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 4716 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 4717 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 4718 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); 4719 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); 4720 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 4721 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); 4722 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); 4723 #endif 4724 4725 if (VRatio[k] <= 1) { 4726 DisplayPipeLineDeliveryTimeLuma[k] = 4727 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4728 } else { 4729 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4730 } 4731 4732 if (BytePerPixelC[k] == 0) { 4733 DisplayPipeLineDeliveryTimeChroma[k] = 0; 4734 } else { 4735 if (VRatioChroma[k] <= 1) { 4736 DisplayPipeLineDeliveryTimeChroma[k] = 4737 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4738 } else { 4739 DisplayPipeLineDeliveryTimeChroma[k] = 4740 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4741 } 4742 } 4743 4744 if (VRatioPrefetchY[k] <= 1) { 4745 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4746 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4747 } else { 4748 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4749 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4750 } 4751 4752 if (BytePerPixelC[k] == 0) { 4753 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 4754 } else { 4755 if (VRatioPrefetchC[k] <= 1) { 4756 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * 4757 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4758 } else { 4759 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 4760 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4761 } 4762 } 4763 #ifdef __DML_VBA_DEBUG__ 4764 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", 4765 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 4766 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", 4767 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 4768 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", 4769 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 4770 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", 4771 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 4772 #endif 4773 } 4774 4775 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4776 if (!IsVertical(SourceRotation[k])) 4777 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 4778 else 4779 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 4780 #ifdef __DML_VBA_DEBUG__ 4781 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); 4782 #endif 4783 4784 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 4785 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = 4786 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 4787 if (BytePerPixelC[k] == 0) { 4788 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 4789 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 4790 } else { 4791 if (!IsVertical(SourceRotation[k])) 4792 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 4793 else 4794 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 4795 #ifdef __DML_VBA_DEBUG__ 4796 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); 4797 #endif 4798 DisplayPipeRequestDeliveryTimeChroma[k] = 4799 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 4800 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 4801 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 4802 } 4803 #ifdef __DML_VBA_DEBUG__ 4804 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", 4805 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 4806 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", 4807 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 4808 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", 4809 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 4810 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", 4811 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 4812 #endif 4813 } 4814 4815 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4816 unsigned int cursor_req_per_width; 4817 4818 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / 4819 256.0 / 8.0, 1.0); 4820 if (NumberOfCursors[k] > 0) { 4821 if (VRatio[k] <= 1) { 4822 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4823 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4824 } else { 4825 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4826 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4827 } 4828 if (VRatioPrefetchY[k] <= 1) { 4829 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4830 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4831 } else { 4832 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4833 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4834 } 4835 } else { 4836 CursorRequestDeliveryTime[k] = 0; 4837 CursorRequestDeliveryTimePrefetch[k] = 0; 4838 } 4839 #ifdef __DML_VBA_DEBUG__ 4840 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", 4841 __func__, k, NumberOfCursors[k]); 4842 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", 4843 __func__, k, CursorRequestDeliveryTime[k]); 4844 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", 4845 __func__, k, CursorRequestDeliveryTimePrefetch[k]); 4846 #endif 4847 } 4848 } // CalculatePixelDeliveryTimes 4849 4850 void dml32_CalculateMetaAndPTETimes( 4851 bool use_one_row_for_frame[], 4852 unsigned int NumberOfActiveSurfaces, 4853 bool GPUVMEnable, 4854 unsigned int MetaChunkSize, 4855 unsigned int MinMetaChunkSizeBytes, 4856 unsigned int HTotal[], 4857 double VRatio[], 4858 double VRatioChroma[], 4859 double DestinationLinesToRequestRowInVBlank[], 4860 double DestinationLinesToRequestRowInImmediateFlip[], 4861 bool DCCEnable[], 4862 double PixelClock[], 4863 unsigned int BytePerPixelY[], 4864 unsigned int BytePerPixelC[], 4865 enum dm_rotation_angle SourceRotation[], 4866 unsigned int dpte_row_height[], 4867 unsigned int dpte_row_height_chroma[], 4868 unsigned int meta_row_width[], 4869 unsigned int meta_row_width_chroma[], 4870 unsigned int meta_row_height[], 4871 unsigned int meta_row_height_chroma[], 4872 unsigned int meta_req_width[], 4873 unsigned int meta_req_width_chroma[], 4874 unsigned int meta_req_height[], 4875 unsigned int meta_req_height_chroma[], 4876 unsigned int dpte_group_bytes[], 4877 unsigned int PTERequestSizeY[], 4878 unsigned int PTERequestSizeC[], 4879 unsigned int PixelPTEReqWidthY[], 4880 unsigned int PixelPTEReqHeightY[], 4881 unsigned int PixelPTEReqWidthC[], 4882 unsigned int PixelPTEReqHeightC[], 4883 unsigned int dpte_row_width_luma_ub[], 4884 unsigned int dpte_row_width_chroma_ub[], 4885 4886 /* Output */ 4887 double DST_Y_PER_PTE_ROW_NOM_L[], 4888 double DST_Y_PER_PTE_ROW_NOM_C[], 4889 double DST_Y_PER_META_ROW_NOM_L[], 4890 double DST_Y_PER_META_ROW_NOM_C[], 4891 double TimePerMetaChunkNominal[], 4892 double TimePerChromaMetaChunkNominal[], 4893 double TimePerMetaChunkVBlank[], 4894 double TimePerChromaMetaChunkVBlank[], 4895 double TimePerMetaChunkFlip[], 4896 double TimePerChromaMetaChunkFlip[], 4897 double time_per_pte_group_nom_luma[], 4898 double time_per_pte_group_vblank_luma[], 4899 double time_per_pte_group_flip_luma[], 4900 double time_per_pte_group_nom_chroma[], 4901 double time_per_pte_group_vblank_chroma[], 4902 double time_per_pte_group_flip_chroma[]) 4903 { 4904 unsigned int meta_chunk_width; 4905 unsigned int min_meta_chunk_width; 4906 unsigned int meta_chunk_per_row_int; 4907 unsigned int meta_row_remainder; 4908 unsigned int meta_chunk_threshold; 4909 unsigned int meta_chunks_per_row_ub; 4910 unsigned int meta_chunk_width_chroma; 4911 unsigned int min_meta_chunk_width_chroma; 4912 unsigned int meta_chunk_per_row_int_chroma; 4913 unsigned int meta_row_remainder_chroma; 4914 unsigned int meta_chunk_threshold_chroma; 4915 unsigned int meta_chunks_per_row_ub_chroma; 4916 unsigned int dpte_group_width_luma; 4917 unsigned int dpte_groups_per_row_luma_ub; 4918 unsigned int dpte_group_width_chroma; 4919 unsigned int dpte_groups_per_row_chroma_ub; 4920 unsigned int k; 4921 4922 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4923 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 4924 if (BytePerPixelC[k] == 0) 4925 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 4926 else 4927 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 4928 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 4929 if (BytePerPixelC[k] == 0) 4930 DST_Y_PER_META_ROW_NOM_C[k] = 0; 4931 else 4932 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 4933 } 4934 4935 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4936 if (DCCEnable[k] == true) { 4937 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 4938 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 4939 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 4940 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 4941 if (!IsVertical(SourceRotation[k])) 4942 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 4943 else 4944 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 4945 4946 if (meta_row_remainder <= meta_chunk_threshold) 4947 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 4948 else 4949 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 4950 4951 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * 4952 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4953 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4954 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4955 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4956 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4957 if (BytePerPixelC[k] == 0) { 4958 TimePerChromaMetaChunkNominal[k] = 0; 4959 TimePerChromaMetaChunkVBlank[k] = 0; 4960 TimePerChromaMetaChunkFlip[k] = 0; 4961 } else { 4962 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / 4963 meta_row_height_chroma[k]; 4964 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / 4965 meta_row_height_chroma[k]; 4966 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / 4967 meta_chunk_width_chroma; 4968 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 4969 if (!IsVertical(SourceRotation[k])) { 4970 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4971 meta_req_width_chroma[k]; 4972 } else { 4973 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4974 meta_req_height_chroma[k]; 4975 } 4976 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) 4977 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 4978 else 4979 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 4980 4981 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * 4982 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4983 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4984 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4985 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4986 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4987 } 4988 } else { 4989 TimePerMetaChunkNominal[k] = 0; 4990 TimePerMetaChunkVBlank[k] = 0; 4991 TimePerMetaChunkFlip[k] = 0; 4992 TimePerChromaMetaChunkNominal[k] = 0; 4993 TimePerChromaMetaChunkVBlank[k] = 0; 4994 TimePerChromaMetaChunkFlip[k] = 0; 4995 } 4996 } 4997 4998 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4999 if (GPUVMEnable == true) { 5000 if (!IsVertical(SourceRotation[k])) { 5001 dpte_group_width_luma = (double) dpte_group_bytes[k] / 5002 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 5003 } else { 5004 dpte_group_width_luma = (double) dpte_group_bytes[k] / 5005 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 5006 } 5007 5008 if (use_one_row_for_frame[k]) { 5009 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5010 (double) dpte_group_width_luma / 2.0, 1.0); 5011 } else { 5012 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5013 (double) dpte_group_width_luma, 1.0); 5014 } 5015 #ifdef __DML_VBA_DEBUG__ 5016 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", 5017 __func__, k, use_one_row_for_frame[k]); 5018 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", 5019 __func__, k, dpte_group_bytes[k]); 5020 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", 5021 __func__, k, PTERequestSizeY[k]); 5022 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", 5023 __func__, k, PixelPTEReqWidthY[k]); 5024 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", 5025 __func__, k, PixelPTEReqHeightY[k]); 5026 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", 5027 __func__, k, dpte_row_width_luma_ub[k]); 5028 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", 5029 __func__, k, dpte_group_width_luma); 5030 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", 5031 __func__, k, dpte_groups_per_row_luma_ub); 5032 #endif 5033 5034 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * 5035 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5036 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * 5037 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5038 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5039 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5040 if (BytePerPixelC[k] == 0) { 5041 time_per_pte_group_nom_chroma[k] = 0; 5042 time_per_pte_group_vblank_chroma[k] = 0; 5043 time_per_pte_group_flip_chroma[k] = 0; 5044 } else { 5045 if (!IsVertical(SourceRotation[k])) { 5046 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5047 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 5048 } else { 5049 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5050 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 5051 } 5052 5053 if (use_one_row_for_frame[k]) { 5054 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5055 (double) dpte_group_width_chroma / 2.0, 1.0); 5056 } else { 5057 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5058 (double) dpte_group_width_chroma, 1.0); 5059 } 5060 #ifdef __DML_VBA_DEBUG__ 5061 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", 5062 __func__, k, dpte_row_width_chroma_ub[k]); 5063 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", 5064 __func__, k, dpte_group_width_chroma); 5065 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", 5066 __func__, k, dpte_groups_per_row_chroma_ub); 5067 #endif 5068 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * 5069 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5070 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * 5071 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5072 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5073 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5074 } 5075 } else { 5076 time_per_pte_group_nom_luma[k] = 0; 5077 time_per_pte_group_vblank_luma[k] = 0; 5078 time_per_pte_group_flip_luma[k] = 0; 5079 time_per_pte_group_nom_chroma[k] = 0; 5080 time_per_pte_group_vblank_chroma[k] = 0; 5081 time_per_pte_group_flip_chroma[k] = 0; 5082 } 5083 #ifdef __DML_VBA_DEBUG__ 5084 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", 5085 __func__, k, DestinationLinesToRequestRowInVBlank[k]); 5086 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", 5087 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); 5088 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", 5089 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); 5090 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", 5091 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); 5092 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", 5093 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); 5094 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", 5095 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); 5096 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", 5097 __func__, k, TimePerMetaChunkNominal[k]); 5098 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", 5099 __func__, k, TimePerMetaChunkVBlank[k]); 5100 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", 5101 __func__, k, TimePerMetaChunkFlip[k]); 5102 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", 5103 __func__, k, TimePerChromaMetaChunkNominal[k]); 5104 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", 5105 __func__, k, TimePerChromaMetaChunkVBlank[k]); 5106 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", 5107 __func__, k, TimePerChromaMetaChunkFlip[k]); 5108 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", 5109 __func__, k, time_per_pte_group_nom_luma[k]); 5110 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", 5111 __func__, k, time_per_pte_group_vblank_luma[k]); 5112 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", 5113 __func__, k, time_per_pte_group_flip_luma[k]); 5114 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", 5115 __func__, k, time_per_pte_group_nom_chroma[k]); 5116 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", 5117 __func__, k, time_per_pte_group_vblank_chroma[k]); 5118 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", 5119 __func__, k, time_per_pte_group_flip_chroma[k]); 5120 #endif 5121 } 5122 } // CalculateMetaAndPTETimes 5123 5124 void dml32_CalculateVMGroupAndRequestTimes( 5125 unsigned int NumberOfActiveSurfaces, 5126 bool GPUVMEnable, 5127 unsigned int GPUVMMaxPageTableLevels, 5128 unsigned int HTotal[], 5129 unsigned int BytePerPixelC[], 5130 double DestinationLinesToRequestVMInVBlank[], 5131 double DestinationLinesToRequestVMInImmediateFlip[], 5132 bool DCCEnable[], 5133 double PixelClock[], 5134 unsigned int dpte_row_width_luma_ub[], 5135 unsigned int dpte_row_width_chroma_ub[], 5136 unsigned int vm_group_bytes[], 5137 unsigned int dpde0_bytes_per_frame_ub_l[], 5138 unsigned int dpde0_bytes_per_frame_ub_c[], 5139 unsigned int meta_pte_bytes_per_frame_ub_l[], 5140 unsigned int meta_pte_bytes_per_frame_ub_c[], 5141 5142 /* Output */ 5143 double TimePerVMGroupVBlank[], 5144 double TimePerVMGroupFlip[], 5145 double TimePerVMRequestVBlank[], 5146 double TimePerVMRequestFlip[]) 5147 { 5148 unsigned int k; 5149 unsigned int num_group_per_lower_vm_stage; 5150 unsigned int num_req_per_lower_vm_stage; 5151 5152 #ifdef __DML_VBA_DEBUG__ 5153 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 5154 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 5155 #endif 5156 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5157 5158 #ifdef __DML_VBA_DEBUG__ 5159 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); 5160 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); 5161 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", 5162 __func__, k, dpde0_bytes_per_frame_ub_l[k]); 5163 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", 5164 __func__, k, dpde0_bytes_per_frame_ub_c[k]); 5165 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", 5166 __func__, k, meta_pte_bytes_per_frame_ub_l[k]); 5167 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", 5168 __func__, k, meta_pte_bytes_per_frame_ub_c[k]); 5169 #endif 5170 5171 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 5172 if (DCCEnable[k] == false) { 5173 if (BytePerPixelC[k] > 0) { 5174 num_group_per_lower_vm_stage = dml_ceil( 5175 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5176 (double) (vm_group_bytes[k]), 1.0) + 5177 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5178 (double) (vm_group_bytes[k]), 1.0); 5179 } else { 5180 num_group_per_lower_vm_stage = dml_ceil( 5181 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5182 (double) (vm_group_bytes[k]), 1.0); 5183 } 5184 } else { 5185 if (GPUVMMaxPageTableLevels == 1) { 5186 if (BytePerPixelC[k] > 0) { 5187 num_group_per_lower_vm_stage = dml_ceil( 5188 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5189 (double) (vm_group_bytes[k]), 1.0) + 5190 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5191 (double) (vm_group_bytes[k]), 1.0); 5192 } else { 5193 num_group_per_lower_vm_stage = dml_ceil( 5194 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5195 (double) (vm_group_bytes[k]), 1.0); 5196 } 5197 } else { 5198 if (BytePerPixelC[k] > 0) { 5199 num_group_per_lower_vm_stage = 2 + dml_ceil( 5200 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5201 (double) (vm_group_bytes[k]), 1) + 5202 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5203 (double) (vm_group_bytes[k]), 1) + 5204 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / 5205 (double) (vm_group_bytes[k]), 1) + 5206 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5207 (double) (vm_group_bytes[k]), 1); 5208 } else { 5209 num_group_per_lower_vm_stage = 1 + dml_ceil( 5210 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5211 (double) (vm_group_bytes[k]), 1) + dml_ceil( 5212 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5213 (double) (vm_group_bytes[k]), 1); 5214 } 5215 } 5216 } 5217 5218 if (DCCEnable[k] == false) { 5219 if (BytePerPixelC[k] > 0) { 5220 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + 5221 dpde0_bytes_per_frame_ub_c[k] / 64; 5222 } else { 5223 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 5224 } 5225 } else { 5226 if (GPUVMMaxPageTableLevels == 1) { 5227 if (BytePerPixelC[k] > 0) { 5228 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + 5229 meta_pte_bytes_per_frame_ub_c[k] / 64; 5230 } else { 5231 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 5232 } 5233 } else { 5234 if (BytePerPixelC[k] > 0) { 5235 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5236 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + 5237 meta_pte_bytes_per_frame_ub_l[k] / 64 + 5238 meta_pte_bytes_per_frame_ub_c[k] / 64; 5239 } else { 5240 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5241 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 5242 } 5243 } 5244 } 5245 5246 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5247 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5248 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5249 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5250 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5251 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5252 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5253 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5254 5255 if (GPUVMMaxPageTableLevels > 2) { 5256 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 5257 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 5258 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 5259 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 5260 } 5261 5262 } else { 5263 TimePerVMGroupVBlank[k] = 0; 5264 TimePerVMGroupFlip[k] = 0; 5265 TimePerVMRequestVBlank[k] = 0; 5266 TimePerVMRequestFlip[k] = 0; 5267 } 5268 5269 #ifdef __DML_VBA_DEBUG__ 5270 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); 5271 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); 5272 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); 5273 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); 5274 #endif 5275 } 5276 } // CalculateVMGroupAndRequestTimes 5277 5278 void dml32_CalculateDCCConfiguration( 5279 bool DCCEnabled, 5280 bool DCCProgrammingAssumesScanDirectionUnknown, 5281 enum source_format_class SourcePixelFormat, 5282 unsigned int SurfaceWidthLuma, 5283 unsigned int SurfaceWidthChroma, 5284 unsigned int SurfaceHeightLuma, 5285 unsigned int SurfaceHeightChroma, 5286 unsigned int nomDETInKByte, 5287 unsigned int RequestHeight256ByteLuma, 5288 unsigned int RequestHeight256ByteChroma, 5289 enum dm_swizzle_mode TilingFormat, 5290 unsigned int BytePerPixelY, 5291 unsigned int BytePerPixelC, 5292 double BytePerPixelDETY, 5293 double BytePerPixelDETC, 5294 enum dm_rotation_angle SourceRotation, 5295 /* Output */ 5296 unsigned int *MaxUncompressedBlockLuma, 5297 unsigned int *MaxUncompressedBlockChroma, 5298 unsigned int *MaxCompressedBlockLuma, 5299 unsigned int *MaxCompressedBlockChroma, 5300 unsigned int *IndependentBlockLuma, 5301 unsigned int *IndependentBlockChroma) 5302 { 5303 typedef enum { 5304 REQ_256Bytes, 5305 REQ_128BytesNonContiguous, 5306 REQ_128BytesContiguous, 5307 REQ_NA 5308 } RequestType; 5309 5310 RequestType RequestLuma; 5311 RequestType RequestChroma; 5312 5313 unsigned int segment_order_horz_contiguous_luma; 5314 unsigned int segment_order_horz_contiguous_chroma; 5315 unsigned int segment_order_vert_contiguous_luma; 5316 unsigned int segment_order_vert_contiguous_chroma; 5317 unsigned int req128_horz_wc_l; 5318 unsigned int req128_horz_wc_c; 5319 unsigned int req128_vert_wc_l; 5320 unsigned int req128_vert_wc_c; 5321 unsigned int MAS_vp_horz_limit; 5322 unsigned int MAS_vp_vert_limit; 5323 unsigned int max_vp_horz_width; 5324 unsigned int max_vp_vert_height; 5325 unsigned int eff_surf_width_l; 5326 unsigned int eff_surf_width_c; 5327 unsigned int eff_surf_height_l; 5328 unsigned int eff_surf_height_c; 5329 unsigned int full_swath_bytes_horz_wc_l; 5330 unsigned int full_swath_bytes_horz_wc_c; 5331 unsigned int full_swath_bytes_vert_wc_l; 5332 unsigned int full_swath_bytes_vert_wc_c; 5333 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; 5334 5335 unsigned int yuv420; 5336 unsigned int horz_div_l; 5337 unsigned int horz_div_c; 5338 unsigned int vert_div_l; 5339 unsigned int vert_div_c; 5340 5341 unsigned int swath_buf_size; 5342 double detile_buf_vp_horz_limit; 5343 double detile_buf_vp_vert_limit; 5344 5345 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || 5346 SourcePixelFormat == dm_420_12) ? 1 : 0); 5347 horz_div_l = 1; 5348 horz_div_c = 1; 5349 vert_div_l = 1; 5350 vert_div_c = 1; 5351 5352 if (BytePerPixelY == 1) 5353 vert_div_l = 0; 5354 if (BytePerPixelC == 1) 5355 vert_div_c = 0; 5356 5357 if (BytePerPixelC == 0) { 5358 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; 5359 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5360 BytePerPixelY / (1 + horz_div_l)); 5361 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5362 (1 + vert_div_l)); 5363 } else { 5364 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; 5365 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5366 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * 5367 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 5368 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5369 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / 5370 (1 + vert_div_c) / (1 + yuv420)); 5371 } 5372 5373 if (SourcePixelFormat == dm_420_10) { 5374 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 5375 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 5376 } 5377 5378 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 5379 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 5380 5381 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; 5382 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); 5383 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 5384 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 5385 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 5386 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 5387 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 5388 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 5389 5390 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 5391 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 5392 if (BytePerPixelC > 0) { 5393 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 5394 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 5395 } else { 5396 full_swath_bytes_horz_wc_c = 0; 5397 full_swath_bytes_vert_wc_c = 0; 5398 } 5399 5400 if (SourcePixelFormat == dm_420_10) { 5401 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); 5402 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); 5403 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); 5404 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); 5405 } 5406 5407 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5408 req128_horz_wc_l = 0; 5409 req128_horz_wc_c = 0; 5410 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + 5411 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5412 req128_horz_wc_l = 0; 5413 req128_horz_wc_c = 1; 5414 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * 5415 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5416 req128_horz_wc_l = 1; 5417 req128_horz_wc_c = 0; 5418 } else { 5419 req128_horz_wc_l = 1; 5420 req128_horz_wc_c = 1; 5421 } 5422 5423 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5424 req128_vert_wc_l = 0; 5425 req128_vert_wc_c = 0; 5426 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * 5427 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5428 req128_vert_wc_l = 0; 5429 req128_vert_wc_c = 1; 5430 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && 5431 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5432 req128_vert_wc_l = 1; 5433 req128_vert_wc_c = 0; 5434 } else { 5435 req128_vert_wc_l = 1; 5436 req128_vert_wc_c = 1; 5437 } 5438 5439 if (BytePerPixelY == 2) { 5440 segment_order_horz_contiguous_luma = 0; 5441 segment_order_vert_contiguous_luma = 1; 5442 } else { 5443 segment_order_horz_contiguous_luma = 1; 5444 segment_order_vert_contiguous_luma = 0; 5445 } 5446 5447 if (BytePerPixelC == 2) { 5448 segment_order_horz_contiguous_chroma = 0; 5449 segment_order_vert_contiguous_chroma = 1; 5450 } else { 5451 segment_order_horz_contiguous_chroma = 1; 5452 segment_order_vert_contiguous_chroma = 0; 5453 } 5454 #ifdef __DML_VBA_DEBUG__ 5455 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); 5456 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 5457 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); 5458 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); 5459 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); 5460 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); 5461 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); 5462 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); 5463 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", 5464 __func__, segment_order_horz_contiguous_chroma); 5465 #endif 5466 5467 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 5468 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) 5469 RequestLuma = REQ_256Bytes; 5470 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || 5471 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) 5472 RequestLuma = REQ_128BytesNonContiguous; 5473 else 5474 RequestLuma = REQ_128BytesContiguous; 5475 5476 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) 5477 RequestChroma = REQ_256Bytes; 5478 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || 5479 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) 5480 RequestChroma = REQ_128BytesNonContiguous; 5481 else 5482 RequestChroma = REQ_128BytesContiguous; 5483 5484 } else if (!IsVertical(SourceRotation)) { 5485 if (req128_horz_wc_l == 0) 5486 RequestLuma = REQ_256Bytes; 5487 else if (segment_order_horz_contiguous_luma == 0) 5488 RequestLuma = REQ_128BytesNonContiguous; 5489 else 5490 RequestLuma = REQ_128BytesContiguous; 5491 5492 if (req128_horz_wc_c == 0) 5493 RequestChroma = REQ_256Bytes; 5494 else if (segment_order_horz_contiguous_chroma == 0) 5495 RequestChroma = REQ_128BytesNonContiguous; 5496 else 5497 RequestChroma = REQ_128BytesContiguous; 5498 5499 } else { 5500 if (req128_vert_wc_l == 0) 5501 RequestLuma = REQ_256Bytes; 5502 else if (segment_order_vert_contiguous_luma == 0) 5503 RequestLuma = REQ_128BytesNonContiguous; 5504 else 5505 RequestLuma = REQ_128BytesContiguous; 5506 5507 if (req128_vert_wc_c == 0) 5508 RequestChroma = REQ_256Bytes; 5509 else if (segment_order_vert_contiguous_chroma == 0) 5510 RequestChroma = REQ_128BytesNonContiguous; 5511 else 5512 RequestChroma = REQ_128BytesContiguous; 5513 } 5514 5515 if (RequestLuma == REQ_256Bytes) { 5516 *MaxUncompressedBlockLuma = 256; 5517 *MaxCompressedBlockLuma = 256; 5518 *IndependentBlockLuma = 0; 5519 } else if (RequestLuma == REQ_128BytesContiguous) { 5520 *MaxUncompressedBlockLuma = 256; 5521 *MaxCompressedBlockLuma = 128; 5522 *IndependentBlockLuma = 128; 5523 } else { 5524 *MaxUncompressedBlockLuma = 256; 5525 *MaxCompressedBlockLuma = 64; 5526 *IndependentBlockLuma = 64; 5527 } 5528 5529 if (RequestChroma == REQ_256Bytes) { 5530 *MaxUncompressedBlockChroma = 256; 5531 *MaxCompressedBlockChroma = 256; 5532 *IndependentBlockChroma = 0; 5533 } else if (RequestChroma == REQ_128BytesContiguous) { 5534 *MaxUncompressedBlockChroma = 256; 5535 *MaxCompressedBlockChroma = 128; 5536 *IndependentBlockChroma = 128; 5537 } else { 5538 *MaxUncompressedBlockChroma = 256; 5539 *MaxCompressedBlockChroma = 64; 5540 *IndependentBlockChroma = 64; 5541 } 5542 5543 if (DCCEnabled != true || BytePerPixelC == 0) { 5544 *MaxUncompressedBlockChroma = 0; 5545 *MaxCompressedBlockChroma = 0; 5546 *IndependentBlockChroma = 0; 5547 } 5548 5549 if (DCCEnabled != true) { 5550 *MaxUncompressedBlockLuma = 0; 5551 *MaxCompressedBlockLuma = 0; 5552 *IndependentBlockLuma = 0; 5553 } 5554 5555 #ifdef __DML_VBA_DEBUG__ 5556 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); 5557 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); 5558 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); 5559 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); 5560 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); 5561 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); 5562 #endif 5563 5564 } // CalculateDCCConfiguration 5565 5566 void dml32_CalculateStutterEfficiency( 5567 unsigned int CompressedBufferSizeInkByte, 5568 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 5569 bool UnboundedRequestEnabled, 5570 unsigned int MetaFIFOSizeInKEntries, 5571 unsigned int ZeroSizeBufferEntries, 5572 unsigned int PixelChunkSizeInKByte, 5573 unsigned int NumberOfActiveSurfaces, 5574 unsigned int ROBBufferSizeInKByte, 5575 double TotalDataReadBandwidth, 5576 double DCFCLK, 5577 double ReturnBW, 5578 unsigned int CompbufReservedSpace64B, 5579 unsigned int CompbufReservedSpaceZs, 5580 double SRExitTime, 5581 double SRExitZ8Time, 5582 bool SynchronizeTimingsFinal, 5583 unsigned int BlendingAndTiming[], 5584 double StutterEnterPlusExitWatermark, 5585 double Z8StutterEnterPlusExitWatermark, 5586 bool ProgressiveToInterlaceUnitInOPP, 5587 bool Interlace[], 5588 double MinTTUVBlank[], 5589 unsigned int DPPPerSurface[], 5590 unsigned int DETBufferSizeY[], 5591 unsigned int BytePerPixelY[], 5592 double BytePerPixelDETY[], 5593 double SwathWidthY[], 5594 unsigned int SwathHeightY[], 5595 unsigned int SwathHeightC[], 5596 double NetDCCRateLuma[], 5597 double NetDCCRateChroma[], 5598 double DCCFractionOfZeroSizeRequestsLuma[], 5599 double DCCFractionOfZeroSizeRequestsChroma[], 5600 unsigned int HTotal[], 5601 unsigned int VTotal[], 5602 double PixelClock[], 5603 double VRatio[], 5604 enum dm_rotation_angle SourceRotation[], 5605 unsigned int BlockHeight256BytesY[], 5606 unsigned int BlockWidth256BytesY[], 5607 unsigned int BlockHeight256BytesC[], 5608 unsigned int BlockWidth256BytesC[], 5609 unsigned int DCCYMaxUncompressedBlock[], 5610 unsigned int DCCCMaxUncompressedBlock[], 5611 unsigned int VActive[], 5612 bool DCCEnable[], 5613 bool WritebackEnable[], 5614 double ReadBandwidthSurfaceLuma[], 5615 double ReadBandwidthSurfaceChroma[], 5616 double meta_row_bw[], 5617 double dpte_row_bw[], 5618 5619 /* Output */ 5620 double *StutterEfficiencyNotIncludingVBlank, 5621 double *StutterEfficiency, 5622 unsigned int *NumberOfStutterBurstsPerFrame, 5623 double *Z8StutterEfficiencyNotIncludingVBlank, 5624 double *Z8StutterEfficiency, 5625 unsigned int *Z8NumberOfStutterBurstsPerFrame, 5626 double *StutterPeriod, 5627 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) 5628 { 5629 5630 bool FoundCriticalSurface = false; 5631 unsigned int SwathSizeCriticalSurface = 0; 5632 unsigned int LastChunkOfSwathSize; 5633 unsigned int MissingPartOfLastSwathOfDETSize; 5634 double LastZ8StutterPeriod = 0.0; 5635 double LastStutterPeriod = 0.0; 5636 unsigned int TotalNumberOfActiveOTG = 0; 5637 double doublePixelClock; 5638 unsigned int doubleHTotal; 5639 unsigned int doubleVTotal; 5640 bool SameTiming = true; 5641 double DETBufferingTimeY; 5642 double SwathWidthYCriticalSurface = 0.0; 5643 double SwathHeightYCriticalSurface = 0.0; 5644 double VActiveTimeCriticalSurface = 0.0; 5645 double FrameTimeCriticalSurface = 0.0; 5646 unsigned int BytePerPixelYCriticalSurface = 0; 5647 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; 5648 unsigned int DETBufferSizeYCriticalSurface = 0; 5649 double MinTTUVBlankCriticalSurface = 0.0; 5650 unsigned int BlockWidth256BytesYCriticalSurface = 0; 5651 bool doublePlaneCriticalSurface = 0; 5652 bool doublePipeCriticalSurface = 0; 5653 double TotalCompressedReadBandwidth; 5654 double TotalRowReadBandwidth; 5655 double AverageDCCCompressionRate; 5656 double EffectiveCompressedBufferSize; 5657 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 5658 double StutterBurstTime; 5659 unsigned int TotalActiveWriteback; 5660 double LinesInDETY; 5661 double LinesInDETYRoundedDownToSwath; 5662 double MaximumEffectiveCompressionLuma; 5663 double MaximumEffectiveCompressionChroma; 5664 double TotalZeroSizeRequestReadBandwidth; 5665 double TotalZeroSizeCompressedReadBandwidth; 5666 double AverageDCCZeroSizeFraction; 5667 double AverageZeroSizeCompressionRate; 5668 unsigned int k; 5669 5670 TotalZeroSizeRequestReadBandwidth = 0; 5671 TotalZeroSizeCompressedReadBandwidth = 0; 5672 TotalRowReadBandwidth = 0; 5673 TotalCompressedReadBandwidth = 0; 5674 5675 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5676 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5677 if (DCCEnable[k] == true) { 5678 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) 5679 || (!IsVertical(SourceRotation[k]) 5680 && BlockHeight256BytesY[k] > SwathHeightY[k]) 5681 || DCCYMaxUncompressedBlock[k] < 256) { 5682 MaximumEffectiveCompressionLuma = 2; 5683 } else { 5684 MaximumEffectiveCompressionLuma = 4; 5685 } 5686 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5687 + ReadBandwidthSurfaceLuma[k] 5688 / dml_min(NetDCCRateLuma[k], 5689 MaximumEffectiveCompressionLuma); 5690 #ifdef __DML_VBA_DEBUG__ 5691 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5692 __func__, k, ReadBandwidthSurfaceLuma[k]); 5693 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", 5694 __func__, k, NetDCCRateLuma[k]); 5695 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", 5696 __func__, k, MaximumEffectiveCompressionLuma); 5697 #endif 5698 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5699 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 5700 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5701 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] 5702 / MaximumEffectiveCompressionLuma; 5703 5704 if (ReadBandwidthSurfaceChroma[k] > 0) { 5705 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) 5706 || (!IsVertical(SourceRotation[k]) 5707 && BlockHeight256BytesC[k] > SwathHeightC[k]) 5708 || DCCCMaxUncompressedBlock[k] < 256) { 5709 MaximumEffectiveCompressionChroma = 2; 5710 } else { 5711 MaximumEffectiveCompressionChroma = 4; 5712 } 5713 TotalCompressedReadBandwidth = 5714 TotalCompressedReadBandwidth 5715 + ReadBandwidthSurfaceChroma[k] 5716 / dml_min(NetDCCRateChroma[k], 5717 MaximumEffectiveCompressionChroma); 5718 #ifdef __DML_VBA_DEBUG__ 5719 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", 5720 __func__, k, ReadBandwidthSurfaceChroma[k]); 5721 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", 5722 __func__, k, NetDCCRateChroma[k]); 5723 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", 5724 __func__, k, MaximumEffectiveCompressionChroma); 5725 #endif 5726 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5727 + ReadBandwidthSurfaceChroma[k] 5728 * DCCFractionOfZeroSizeRequestsChroma[k]; 5729 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5730 + ReadBandwidthSurfaceChroma[k] 5731 * DCCFractionOfZeroSizeRequestsChroma[k] 5732 / MaximumEffectiveCompressionChroma; 5733 } 5734 } else { 5735 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5736 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; 5737 } 5738 TotalRowReadBandwidth = TotalRowReadBandwidth 5739 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); 5740 } 5741 } 5742 5743 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 5744 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 5745 5746 #ifdef __DML_VBA_DEBUG__ 5747 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 5748 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 5749 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 5750 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", 5751 __func__, TotalZeroSizeCompressedReadBandwidth); 5752 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 5753 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 5754 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5755 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 5756 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); 5757 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); 5758 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 5759 #endif 5760 if (AverageDCCZeroSizeFraction == 1) { 5761 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5762 / TotalZeroSizeCompressedReadBandwidth; 5763 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 5764 * AverageZeroSizeCompressionRate 5765 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5766 * AverageZeroSizeCompressionRate; 5767 } else if (AverageDCCZeroSizeFraction > 0) { 5768 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5769 / TotalZeroSizeCompressedReadBandwidth; 5770 EffectiveCompressedBufferSize = dml_min( 5771 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5772 (double) MetaFIFOSizeInKEntries * 1024 * 64 5773 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate 5774 + 1 / AverageDCCCompressionRate)) 5775 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5776 * AverageDCCCompressionRate, 5777 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5778 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5779 5780 #ifdef __DML_VBA_DEBUG__ 5781 dml_print("DML::%s: min 1 = %f\n", __func__, 5782 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5783 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / 5784 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / 5785 AverageDCCCompressionRate)); 5786 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - 5787 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); 5788 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / 5789 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5790 #endif 5791 } else { 5792 EffectiveCompressedBufferSize = dml_min( 5793 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5794 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) 5795 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5796 * AverageDCCCompressionRate; 5797 5798 #ifdef __DML_VBA_DEBUG__ 5799 dml_print("DML::%s: min 1 = %f\n", __func__, 5800 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5801 dml_print("DML::%s: min 2 = %f\n", __func__, 5802 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 5803 #endif 5804 } 5805 5806 #ifdef __DML_VBA_DEBUG__ 5807 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 5808 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 5809 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5810 #endif 5811 5812 *StutterPeriod = 0; 5813 5814 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5815 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5816 LinesInDETY = ((double) DETBufferSizeY[k] 5817 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) 5818 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) 5819 / BytePerPixelDETY[k] / SwathWidthY[k]; 5820 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 5821 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) 5822 / VRatio[k]; 5823 #ifdef __DML_VBA_DEBUG__ 5824 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 5825 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 5826 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 5827 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5828 __func__, k, ReadBandwidthSurfaceLuma[k]); 5829 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 5830 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); 5831 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", 5832 __func__, k, LinesInDETYRoundedDownToSwath); 5833 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); 5834 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5835 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); 5836 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 5837 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5838 #endif 5839 5840 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { 5841 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 5842 5843 FoundCriticalSurface = true; 5844 *StutterPeriod = DETBufferingTimeY; 5845 FrameTimeCriticalSurface = ( 5846 isInterlaceTiming ? 5847 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) 5848 * (double) HTotal[k] / PixelClock[k]; 5849 VActiveTimeCriticalSurface = ( 5850 isInterlaceTiming ? 5851 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) 5852 * (double) HTotal[k] / PixelClock[k]; 5853 BytePerPixelYCriticalSurface = BytePerPixelY[k]; 5854 SwathWidthYCriticalSurface = SwathWidthY[k]; 5855 SwathHeightYCriticalSurface = SwathHeightY[k]; 5856 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; 5857 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] 5858 - (LinesInDETY - LinesInDETYRoundedDownToSwath); 5859 DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; 5860 MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; 5861 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); 5862 doublePipeCriticalSurface = (DPPPerSurface[k] == 1); 5863 5864 #ifdef __DML_VBA_DEBUG__ 5865 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", 5866 __func__, k, FoundCriticalSurface); 5867 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", 5868 __func__, k, *StutterPeriod); 5869 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", 5870 __func__, k, MinTTUVBlankCriticalSurface); 5871 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", 5872 __func__, k, FrameTimeCriticalSurface); 5873 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", 5874 __func__, k, VActiveTimeCriticalSurface); 5875 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", 5876 __func__, k, BytePerPixelYCriticalSurface); 5877 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", 5878 __func__, k, SwathWidthYCriticalSurface); 5879 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", 5880 __func__, k, SwathHeightYCriticalSurface); 5881 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", 5882 __func__, k, BlockWidth256BytesYCriticalSurface); 5883 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", 5884 __func__, k, doublePlaneCriticalSurface); 5885 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", 5886 __func__, k, doublePipeCriticalSurface); 5887 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", 5888 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); 5889 #endif 5890 } 5891 } 5892 } 5893 5894 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, 5895 EffectiveCompressedBufferSize); 5896 #ifdef __DML_VBA_DEBUG__ 5897 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 5898 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5899 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5900 __func__, *StutterPeriod * TotalDataReadBandwidth); 5901 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5902 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, 5903 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 5904 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 5905 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 5906 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 5907 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 5908 #endif 5909 5910 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate 5911 / ReturnBW 5912 + (*StutterPeriod * TotalDataReadBandwidth 5913 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 5914 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 5915 #ifdef __DML_VBA_DEBUG__ 5916 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 5917 AverageDCCCompressionRate / ReturnBW); 5918 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5919 __func__, (*StutterPeriod * TotalDataReadBandwidth)); 5920 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - 5921 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 5922 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 5923 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 5924 #endif 5925 StutterBurstTime = dml_max(StutterBurstTime, 5926 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface 5927 * SwathWidthYCriticalSurface / ReturnBW); 5928 5929 #ifdef __DML_VBA_DEBUG__ 5930 dml_print("DML::%s: Time to finish residue swath=%f\n", 5931 __func__, 5932 LinesToFinishSwathTransferStutterCriticalSurface * 5933 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); 5934 #endif 5935 5936 TotalActiveWriteback = 0; 5937 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5938 if (WritebackEnable[k]) 5939 TotalActiveWriteback = TotalActiveWriteback + 1; 5940 } 5941 5942 if (TotalActiveWriteback == 0) { 5943 #ifdef __DML_VBA_DEBUG__ 5944 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 5945 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 5946 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 5947 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 5948 #endif 5949 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 5950 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 5951 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 5952 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 5953 *NumberOfStutterBurstsPerFrame = ( 5954 *StutterEfficiencyNotIncludingVBlank > 0 ? 5955 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5956 *Z8NumberOfStutterBurstsPerFrame = ( 5957 *Z8StutterEfficiencyNotIncludingVBlank > 0 ? 5958 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5959 } else { 5960 *StutterEfficiencyNotIncludingVBlank = 0.; 5961 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 5962 *NumberOfStutterBurstsPerFrame = 0; 5963 *Z8NumberOfStutterBurstsPerFrame = 0; 5964 } 5965 #ifdef __DML_VBA_DEBUG__ 5966 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); 5967 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 5968 __func__, *StutterEfficiencyNotIncludingVBlank); 5969 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", 5970 __func__, *Z8StutterEfficiencyNotIncludingVBlank); 5971 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 5972 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 5973 #endif 5974 5975 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5976 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5977 if (BlendingAndTiming[k] == k) { 5978 if (TotalNumberOfActiveOTG == 0) { 5979 doublePixelClock = PixelClock[k]; 5980 doubleHTotal = HTotal[k]; 5981 doubleVTotal = VTotal[k]; 5982 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] 5983 || doubleVTotal != VTotal[k]) { 5984 SameTiming = false; 5985 } 5986 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 5987 } 5988 } 5989 } 5990 5991 if (*StutterEfficiencyNotIncludingVBlank > 0) { 5992 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 5993 5994 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming 5995 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { 5996 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime 5997 + StutterBurstTime * VActiveTimeCriticalSurface 5998 / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 5999 } else { 6000 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6001 } 6002 } else { 6003 *StutterEfficiency = 0; 6004 } 6005 6006 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6007 LastZ8StutterPeriod = VActiveTimeCriticalSurface 6008 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6009 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + 6010 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { 6011 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime 6012 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 6013 } else { 6014 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6015 } 6016 } else { 6017 *Z8StutterEfficiency = 0.; 6018 } 6019 6020 #ifdef __DML_VBA_DEBUG__ 6021 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6022 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6023 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6024 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6025 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6026 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6027 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 6028 __func__, *StutterEfficiencyNotIncludingVBlank); 6029 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6030 #endif 6031 6032 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface 6033 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); 6034 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); 6035 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) 6036 - DETBufferSizeYCriticalSurface; 6037 6038 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) 6039 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) 6040 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) 6041 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); 6042 6043 #ifdef __DML_VBA_DEBUG__ 6044 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); 6045 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); 6046 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); 6047 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 6048 #endif 6049 } // CalculateStutterEfficiency 6050 6051 void dml32_CalculateMaxDETAndMinCompressedBufferSize( 6052 unsigned int ConfigReturnBufferSizeInKByte, 6053 unsigned int ROBBufferSizeInKByte, 6054 unsigned int MaxNumDPP, 6055 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size 6056 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA 6057 6058 /* Output */ 6059 unsigned int *MaxTotalDETInKByte, 6060 unsigned int *nomDETInKByte, 6061 unsigned int *MinCompressedBufferSizeInKByte) 6062 { 6063 bool det_buff_size_override_en = nomDETInKByteOverrideEnable; 6064 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; 6065 6066 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + 6067 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); 6068 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); 6069 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; 6070 6071 #ifdef __DML_VBA_DEBUG__ 6072 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); 6073 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); 6074 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); 6075 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); 6076 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); 6077 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); 6078 #endif 6079 6080 if (det_buff_size_override_en) { 6081 *nomDETInKByte = det_buff_size_override_val; 6082 #ifdef __DML_VBA_DEBUG__ 6083 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); 6084 #endif 6085 } 6086 } // CalculateMaxDETAndMinCompressedBufferSize 6087 6088 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, 6089 double ReturnBW, 6090 bool NotUrgentLatencyHiding[], 6091 double ReadBandwidthLuma[], 6092 double ReadBandwidthChroma[], 6093 double cursor_bw[], 6094 double meta_row_bandwidth[], 6095 double dpte_row_bandwidth[], 6096 unsigned int NumberOfDPP[], 6097 double UrgentBurstFactorLuma[], 6098 double UrgentBurstFactorChroma[], 6099 double UrgentBurstFactorCursor[]) 6100 { 6101 unsigned int k; 6102 bool NotEnoughUrgentLatencyHiding = false; 6103 bool CalculateVActiveBandwithSupport_val = false; 6104 double VActiveBandwith = 0; 6105 6106 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6107 if (NotUrgentLatencyHiding[k]) { 6108 NotEnoughUrgentLatencyHiding = true; 6109 } 6110 } 6111 6112 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6113 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; 6114 } 6115 6116 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6117 6118 #ifdef __DML_VBA_DEBUG__ 6119 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); 6120 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); 6121 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6122 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); 6123 #endif 6124 return CalculateVActiveBandwithSupport_val; 6125 } 6126 6127 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, 6128 double ReturnBW, 6129 bool NotUrgentLatencyHiding[], 6130 double ReadBandwidthLuma[], 6131 double ReadBandwidthChroma[], 6132 double PrefetchBandwidthLuma[], 6133 double PrefetchBandwidthChroma[], 6134 double cursor_bw[], 6135 double meta_row_bandwidth[], 6136 double dpte_row_bandwidth[], 6137 double cursor_bw_pre[], 6138 double prefetch_vmrow_bw[], 6139 unsigned int NumberOfDPP[], 6140 double UrgentBurstFactorLuma[], 6141 double UrgentBurstFactorChroma[], 6142 double UrgentBurstFactorCursor[], 6143 double UrgentBurstFactorLumaPre[], 6144 double UrgentBurstFactorChromaPre[], 6145 double UrgentBurstFactorCursorPre[], 6146 double PrefetchBW[], 6147 double VRatio[], 6148 double MaxVRatioPre, 6149 6150 /* output */ 6151 double *MaxPrefetchBandwidth, 6152 double *FractionOfUrgentBandwidth, 6153 bool *PrefetchBandwidthSupport) 6154 { 6155 unsigned int k; 6156 double ActiveBandwidthPerSurface; 6157 bool NotEnoughUrgentLatencyHiding = false; 6158 double TotalActiveBandwidth = 0; 6159 double TotalPrefetchBandwidth = 0; 6160 6161 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6162 if (NotUrgentLatencyHiding[k]) { 6163 NotEnoughUrgentLatencyHiding = true; 6164 } 6165 } 6166 6167 *MaxPrefetchBandwidth = 0; 6168 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6169 ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]); 6170 6171 TotalActiveBandwidth += ActiveBandwidthPerSurface; 6172 6173 TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k]; 6174 6175 *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6176 ActiveBandwidthPerSurface, 6177 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6178 } 6179 6180 if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__) 6181 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding; 6182 else 6183 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6184 6185 *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW; 6186 } 6187 6188 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, 6189 double ReturnBW, 6190 double ReadBandwidthLuma[], 6191 double ReadBandwidthChroma[], 6192 double PrefetchBandwidthLuma[], 6193 double PrefetchBandwidthChroma[], 6194 double cursor_bw[], 6195 double cursor_bw_pre[], 6196 unsigned int NumberOfDPP[], 6197 double UrgentBurstFactorLuma[], 6198 double UrgentBurstFactorChroma[], 6199 double UrgentBurstFactorCursor[], 6200 double UrgentBurstFactorLumaPre[], 6201 double UrgentBurstFactorChromaPre[], 6202 double UrgentBurstFactorCursorPre[]) 6203 { 6204 unsigned int k; 6205 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; 6206 6207 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6208 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6209 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6210 } 6211 6212 return CalculateBandwidthAvailableForImmediateFlip_val; 6213 } 6214 6215 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, 6216 double ReturnBW, 6217 enum immediate_flip_requirement ImmediateFlipRequirement[], 6218 double final_flip_bw[], 6219 double ReadBandwidthLuma[], 6220 double ReadBandwidthChroma[], 6221 double PrefetchBandwidthLuma[], 6222 double PrefetchBandwidthChroma[], 6223 double cursor_bw[], 6224 double meta_row_bandwidth[], 6225 double dpte_row_bandwidth[], 6226 double cursor_bw_pre[], 6227 double prefetch_vmrow_bw[], 6228 unsigned int NumberOfDPP[], 6229 double UrgentBurstFactorLuma[], 6230 double UrgentBurstFactorChroma[], 6231 double UrgentBurstFactorCursor[], 6232 double UrgentBurstFactorLumaPre[], 6233 double UrgentBurstFactorChromaPre[], 6234 double UrgentBurstFactorCursorPre[], 6235 6236 /* output */ 6237 double *TotalBandwidth, 6238 double *FractionOfUrgentBandwidth, 6239 bool *ImmediateFlipBandwidthSupport) 6240 { 6241 unsigned int k; 6242 *TotalBandwidth = 0; 6243 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6244 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { 6245 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6246 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6247 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6248 } else { 6249 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6250 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6251 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6252 } 6253 } 6254 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); 6255 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; 6256 } 6257 6258 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, 6259 double ReturnBW, 6260 double UrgentLatency, 6261 unsigned int SwathHeightY[], 6262 unsigned int SwathHeightC[], 6263 unsigned int SwathWidthY[], 6264 unsigned int SwathWidthC[], 6265 double BytePerPixelInDETY[], 6266 double BytePerPixelInDETC[], 6267 unsigned int DETBufferSizeY[], 6268 unsigned int DETBufferSizeC[], 6269 unsigned int NumOfDPP[], 6270 unsigned int HTotal[], 6271 double PixelClock[], 6272 double VRatioY[], 6273 double VRatioC[], 6274 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[], 6275 enum unbounded_requesting_policy UseUnboundedRequesting) 6276 { 6277 int k; 6278 double SwathSizeAllSurfaces = 0; 6279 double SwathSizeAllSurfacesInFetchTimeUs; 6280 double DETSwathLatencyHidingUs; 6281 double DETSwathLatencyHidingYUs; 6282 double DETSwathLatencyHidingCUs; 6283 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; 6284 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; 6285 bool NotEnoughDETSwathFillLatencyHiding = false; 6286 6287 if (UseUnboundedRequesting == dm_unbounded_requesting) 6288 return false; 6289 6290 /* calculate sum of single swath size for all pipes in bytes */ 6291 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6292 SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; 6293 6294 if (SwathHeightC[k] != 0) 6295 SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; 6296 else 6297 SwathSizePerSurfaceC[k] = 0; 6298 6299 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; 6300 } 6301 6302 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; 6303 6304 /* ensure all DET - 1 swath can hide a fetch for all surfaces */ 6305 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6306 double LineTime = HTotal[k] / PixelClock[k]; 6307 6308 /* only care if surface is not phantom */ 6309 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 6310 DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; 6311 6312 if (SwathHeightC[k] != 0) { 6313 DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; 6314 6315 DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); 6316 } else { 6317 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; 6318 } 6319 6320 /* DET must be able to hide time to fetch 1 swath for each surface */ 6321 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { 6322 NotEnoughDETSwathFillLatencyHiding = true; 6323 break; 6324 } 6325 } 6326 } 6327 6328 return NotEnoughDETSwathFillLatencyHiding; 6329 } 6330