1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 #include "display_mode_vba_util_32.h" 26 #include "../dml_inline_defs.h" 27 #include "display_mode_vba_32.h" 28 #include "../display_mode_lib.h" 29 30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096 31 32 unsigned int dml32_dscceComputeDelay( 33 unsigned int bpc, 34 double BPP, 35 unsigned int sliceWidth, 36 unsigned int numSlices, 37 enum output_format_class pixelFormat, 38 enum output_encoder_class Output) 39 { 40 // valid bpc = source bits per component in the set of {8, 10, 12} 41 // valid bpp = increments of 1/16 of a bit 42 // min = 6/7/8 in N420/N422/444, respectively 43 // max = such that compression is 1:1 44 //valid sliceWidth = number of pixels per slice line, 45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 48 49 // fixed value 50 unsigned int rcModelSize = 8192; 51 52 // N422/N420 operate at 2 pixels per clock 53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, 54 Delay, pixels; 55 56 if (pixelFormat == dm_420) 57 pixelsPerClock = 2; 58 else if (pixelFormat == dm_n422) 59 pixelsPerClock = 2; 60 // #all other modes operate at 1 pixel per clock 61 else 62 pixelsPerClock = 1; 63 64 //initial transmit delay as per PPS 65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 66 67 //compute ssm delay 68 if (bpc == 8) 69 D = 81; 70 else if (bpc == 10) 71 D = 89; 72 else 73 D = 113; 74 75 //divide by pixel per cycle to compute slice width as seen by DSC 76 w = sliceWidth / pixelsPerClock; 77 78 //422 mode has an additional cycle of delay 79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 80 s = 0; 81 else 82 s = 1; 83 84 //main calculation for the dscce 85 ix = initalXmitDelay + 45; 86 wx = (w + 2) / 3; 87 p = 3 * wx - w; 88 l0 = ix / w; 89 a = ix + p * l0; 90 ax = (a + 2) / 3 + D + 6 + 1; 91 L = (ax + wx - 1) / wx; 92 if ((ix % w) == 0 && p != 0) 93 lstall = 1; 94 else 95 lstall = 0; 96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 97 98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 99 pixels = Delay * 3 * pixelsPerClock; 100 101 #ifdef __DML_VBA_DEBUG__ 102 dml_print("DML::%s: bpc: %d\n", __func__, bpc); 103 dml_print("DML::%s: BPP: %f\n", __func__, BPP); 104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); 105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); 106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); 107 dml_print("DML::%s: Output: %d\n", __func__, Output); 108 dml_print("DML::%s: pixels: %d\n", __func__, pixels); 109 #endif 110 111 return pixels; 112 } 113 114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 115 { 116 unsigned int Delay = 0; 117 118 if (pixelFormat == dm_420) { 119 // sfr 120 Delay = Delay + 2; 121 // dsccif 122 Delay = Delay + 0; 123 // dscc - input deserializer 124 Delay = Delay + 3; 125 // dscc gets pixels every other cycle 126 Delay = Delay + 2; 127 // dscc - input cdc fifo 128 Delay = Delay + 12; 129 // dscc gets pixels every other cycle 130 Delay = Delay + 13; 131 // dscc - cdc uncertainty 132 Delay = Delay + 2; 133 // dscc - output cdc fifo 134 Delay = Delay + 7; 135 // dscc gets pixels every other cycle 136 Delay = Delay + 3; 137 // dscc - cdc uncertainty 138 Delay = Delay + 2; 139 // dscc - output serializer 140 Delay = Delay + 1; 141 // sft 142 Delay = Delay + 1; 143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { 144 // sfr 145 Delay = Delay + 2; 146 // dsccif 147 Delay = Delay + 1; 148 // dscc - input deserializer 149 Delay = Delay + 5; 150 // dscc - input cdc fifo 151 Delay = Delay + 25; 152 // dscc - cdc uncertainty 153 Delay = Delay + 2; 154 // dscc - output cdc fifo 155 Delay = Delay + 10; 156 // dscc - cdc uncertainty 157 Delay = Delay + 2; 158 // dscc - output serializer 159 Delay = Delay + 1; 160 // sft 161 Delay = Delay + 1; 162 } else { 163 // sfr 164 Delay = Delay + 2; 165 // dsccif 166 Delay = Delay + 0; 167 // dscc - input deserializer 168 Delay = Delay + 3; 169 // dscc - input cdc fifo 170 Delay = Delay + 12; 171 // dscc - cdc uncertainty 172 Delay = Delay + 2; 173 // dscc - output cdc fifo 174 Delay = Delay + 7; 175 // dscc - output serializer 176 Delay = Delay + 1; 177 // dscc - cdc uncertainty 178 Delay = Delay + 2; 179 // sft 180 Delay = Delay + 1; 181 } 182 183 return Delay; 184 } 185 186 187 bool IsVertical(enum dm_rotation_angle Scan) 188 { 189 bool is_vert = false; 190 191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) 192 is_vert = true; 193 else 194 is_vert = false; 195 return is_vert; 196 } 197 198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( 199 double HRatio, 200 double HRatioChroma, 201 double VRatio, 202 double VRatioChroma, 203 double MaxDCHUBToPSCLThroughput, 204 double MaxPSCLToLBThroughput, 205 double PixelClock, 206 enum source_format_class SourcePixelFormat, 207 unsigned int HTaps, 208 unsigned int HTapsChroma, 209 unsigned int VTaps, 210 unsigned int VTapsChroma, 211 212 /* output */ 213 double *PSCL_THROUGHPUT, 214 double *PSCL_THROUGHPUT_CHROMA, 215 double *DPPCLKUsingSingleDPP) 216 { 217 double DPPCLKUsingSingleDPPLuma; 218 double DPPCLKUsingSingleDPPChroma; 219 220 if (HRatio > 1) { 221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / 222 dml_ceil((double) HTaps / 6.0, 1.0)); 223 } else { 224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 225 } 226 227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / 228 *PSCL_THROUGHPUT, 1); 229 230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) 231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock; 232 233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && 234 SourcePixelFormat != dm_rgbe_alpha)) { 235 *PSCL_THROUGHPUT_CHROMA = 0; 236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; 237 } else { 238 if (HRatioChroma > 1) { 239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * 240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); 241 } else { 242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 243 } 244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), 245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); 246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) 247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock; 248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); 249 } 250 } 251 252 void dml32_CalculateBytePerPixelAndBlockSizes( 253 enum source_format_class SourcePixelFormat, 254 enum dm_swizzle_mode SurfaceTiling, 255 256 /* Output */ 257 unsigned int *BytePerPixelY, 258 unsigned int *BytePerPixelC, 259 double *BytePerPixelDETY, 260 double *BytePerPixelDETC, 261 unsigned int *BlockHeight256BytesY, 262 unsigned int *BlockHeight256BytesC, 263 unsigned int *BlockWidth256BytesY, 264 unsigned int *BlockWidth256BytesC, 265 unsigned int *MacroTileHeightY, 266 unsigned int *MacroTileHeightC, 267 unsigned int *MacroTileWidthY, 268 unsigned int *MacroTileWidthC) 269 { 270 if (SourcePixelFormat == dm_444_64) { 271 *BytePerPixelDETY = 8; 272 *BytePerPixelDETC = 0; 273 *BytePerPixelY = 8; 274 *BytePerPixelC = 0; 275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 276 *BytePerPixelDETY = 4; 277 *BytePerPixelDETC = 0; 278 *BytePerPixelY = 4; 279 *BytePerPixelC = 0; 280 } else if (SourcePixelFormat == dm_444_16) { 281 *BytePerPixelDETY = 2; 282 *BytePerPixelDETC = 0; 283 *BytePerPixelY = 2; 284 *BytePerPixelC = 0; 285 } else if (SourcePixelFormat == dm_444_8) { 286 *BytePerPixelDETY = 1; 287 *BytePerPixelDETC = 0; 288 *BytePerPixelY = 1; 289 *BytePerPixelC = 0; 290 } else if (SourcePixelFormat == dm_rgbe_alpha) { 291 *BytePerPixelDETY = 4; 292 *BytePerPixelDETC = 1; 293 *BytePerPixelY = 4; 294 *BytePerPixelC = 1; 295 } else if (SourcePixelFormat == dm_420_8) { 296 *BytePerPixelDETY = 1; 297 *BytePerPixelDETC = 2; 298 *BytePerPixelY = 1; 299 *BytePerPixelC = 2; 300 } else if (SourcePixelFormat == dm_420_12) { 301 *BytePerPixelDETY = 2; 302 *BytePerPixelDETC = 4; 303 *BytePerPixelY = 2; 304 *BytePerPixelC = 4; 305 } else { 306 *BytePerPixelDETY = 4.0 / 3; 307 *BytePerPixelDETC = 8.0 / 3; 308 *BytePerPixelY = 2; 309 *BytePerPixelC = 4; 310 } 311 #ifdef __DML_VBA_DEBUG__ 312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); 313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); 314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); 315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); 316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); 317 #endif 318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 319 || SourcePixelFormat == dm_444_16 320 || SourcePixelFormat == dm_444_8 321 || SourcePixelFormat == dm_mono_16 322 || SourcePixelFormat == dm_mono_8 323 || SourcePixelFormat == dm_rgbe)) { 324 if (SurfaceTiling == dm_sw_linear) 325 *BlockHeight256BytesY = 1; 326 else if (SourcePixelFormat == dm_444_64) 327 *BlockHeight256BytesY = 4; 328 else if (SourcePixelFormat == dm_444_8) 329 *BlockHeight256BytesY = 16; 330 else 331 *BlockHeight256BytesY = 8; 332 333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 334 *BlockHeight256BytesC = 0; 335 *BlockWidth256BytesC = 0; 336 } else { 337 if (SurfaceTiling == dm_sw_linear) { 338 *BlockHeight256BytesY = 1; 339 *BlockHeight256BytesC = 1; 340 } else if (SourcePixelFormat == dm_rgbe_alpha) { 341 *BlockHeight256BytesY = 8; 342 *BlockHeight256BytesC = 16; 343 } else if (SourcePixelFormat == dm_420_8) { 344 *BlockHeight256BytesY = 16; 345 *BlockHeight256BytesC = 8; 346 } else { 347 *BlockHeight256BytesY = 8; 348 *BlockHeight256BytesC = 8; 349 } 350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 352 } 353 #ifdef __DML_VBA_DEBUG__ 354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); 355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); 356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); 357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); 358 #endif 359 360 if (SurfaceTiling == dm_sw_linear) { 361 *MacroTileHeightY = *BlockHeight256BytesY; 362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; 363 *MacroTileHeightC = *BlockHeight256BytesC; 364 if (*MacroTileHeightC == 0) 365 *MacroTileWidthC = 0; 366 else 367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; 368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || 369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { 370 *MacroTileHeightY = 16 * *BlockHeight256BytesY; 371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; 372 *MacroTileHeightC = 16 * *BlockHeight256BytesC; 373 if (*MacroTileHeightC == 0) 374 *MacroTileWidthC = 0; 375 else 376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; 377 } else { 378 *MacroTileHeightY = 32 * *BlockHeight256BytesY; 379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; 380 *MacroTileHeightC = 32 * *BlockHeight256BytesC; 381 if (*MacroTileHeightC == 0) 382 *MacroTileWidthC = 0; 383 else 384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; 385 } 386 387 #ifdef __DML_VBA_DEBUG__ 388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); 389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); 390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); 391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); 392 #endif 393 } // CalculateBytePerPixelAndBlockSizes 394 395 void dml32_CalculateSwathAndDETConfiguration( 396 unsigned int DETSizeOverride[], 397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 398 unsigned int ConfigReturnBufferSizeInKByte, 399 unsigned int MaxTotalDETInKByte, 400 unsigned int MinCompressedBufferSizeInKByte, 401 double ForceSingleDPP, 402 unsigned int NumberOfActiveSurfaces, 403 unsigned int nomDETInKByte, 404 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, 406 unsigned int PixelChunkSizeKBytes, 407 unsigned int ROBSizeKBytes, 408 unsigned int CompressedBufferSegmentSizeInkByteFinal, 409 enum output_encoder_class Output[], 410 double ReadBandwidthLuma[], 411 double ReadBandwidthChroma[], 412 double MaximumSwathWidthLuma[], 413 double MaximumSwathWidthChroma[], 414 enum dm_rotation_angle SourceRotation[], 415 bool ViewportStationary[], 416 enum source_format_class SourcePixelFormat[], 417 enum dm_swizzle_mode SurfaceTiling[], 418 unsigned int ViewportWidth[], 419 unsigned int ViewportHeight[], 420 unsigned int ViewportXStart[], 421 unsigned int ViewportYStart[], 422 unsigned int ViewportXStartC[], 423 unsigned int ViewportYStartC[], 424 unsigned int SurfaceWidthY[], 425 unsigned int SurfaceWidthC[], 426 unsigned int SurfaceHeightY[], 427 unsigned int SurfaceHeightC[], 428 unsigned int Read256BytesBlockHeightY[], 429 unsigned int Read256BytesBlockHeightC[], 430 unsigned int Read256BytesBlockWidthY[], 431 unsigned int Read256BytesBlockWidthC[], 432 enum odm_combine_mode ODMMode[], 433 unsigned int BlendingAndTiming[], 434 unsigned int BytePerPixY[], 435 unsigned int BytePerPixC[], 436 double BytePerPixDETY[], 437 double BytePerPixDETC[], 438 unsigned int HActive[], 439 double HRatio[], 440 double HRatioChroma[], 441 unsigned int DPPPerSurface[], 442 443 /* Output */ 444 unsigned int swath_width_luma_ub[], 445 unsigned int swath_width_chroma_ub[], 446 double SwathWidth[], 447 double SwathWidthChroma[], 448 unsigned int SwathHeightY[], 449 unsigned int SwathHeightC[], 450 unsigned int DETBufferSizeInKByte[], 451 unsigned int DETBufferSizeY[], 452 unsigned int DETBufferSizeC[], 453 bool *UnboundedRequestEnabled, 454 unsigned int *CompressedBufferSizeInkByte, 455 unsigned int *CompBufReservedSpaceKBytes, 456 bool *CompBufReservedSpaceNeedAdjustment, 457 bool ViewportSizeSupportPerSurface[], 458 bool *ViewportSizeSupport) 459 { 460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 464 unsigned int RoundedUpSwathSizeBytesY; 465 unsigned int RoundedUpSwathSizeBytesC; 466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; 467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; 468 unsigned int k; 469 unsigned int TotalActiveDPP = 0; 470 bool NoChromaSurfaces = true; 471 unsigned int DETBufferSizeInKByteForSwathCalculation; 472 473 #ifdef __DML_VBA_DEBUG__ 474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); 476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); 477 #endif 478 dml32_CalculateSwathWidth(ForceSingleDPP, 479 NumberOfActiveSurfaces, 480 SourcePixelFormat, 481 SourceRotation, 482 ViewportStationary, 483 ViewportWidth, 484 ViewportHeight, 485 ViewportXStart, 486 ViewportYStart, 487 ViewportXStartC, 488 ViewportYStartC, 489 SurfaceWidthY, 490 SurfaceWidthC, 491 SurfaceHeightY, 492 SurfaceHeightC, 493 ODMMode, 494 BytePerPixY, 495 BytePerPixC, 496 Read256BytesBlockHeightY, 497 Read256BytesBlockHeightC, 498 Read256BytesBlockWidthY, 499 Read256BytesBlockWidthC, 500 BlendingAndTiming, 501 HActive, 502 HRatio, 503 DPPPerSurface, 504 505 /* Output */ 506 SwathWidthdoubleDPP, 507 SwathWidthdoubleDPPChroma, 508 SwathWidth, 509 SwathWidthChroma, 510 MaximumSwathHeightY, 511 MaximumSwathHeightC, 512 swath_width_luma_ub, 513 swath_width_chroma_ub); 514 515 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 518 #ifdef __DML_VBA_DEBUG__ 519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); 522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); 523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 524 RoundedUpMaxSwathSizeBytesY[k]); 525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); 527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); 528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 529 RoundedUpMaxSwathSizeBytesC[k]); 530 #endif 531 532 if (SourcePixelFormat[k] == dm_420_10) { 533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); 534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); 535 } 536 } 537 538 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { 542 NoChromaSurfaces = false; 543 } 544 } 545 546 // By default, just set the reserved space to 2 pixel chunks size 547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; 548 549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data 550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] 551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req 552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); 553 554 if (*CompBufReservedSpaceNeedAdjustment == 1) { 555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; 556 } 557 558 #ifdef __DML_VBA_DEBUG__ 559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); 560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); 561 #endif 562 563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 564 565 dml32_CalculateDETBufferSize(DETSizeOverride, 566 UseMALLForPStateChange, 567 ForceSingleDPP, 568 NumberOfActiveSurfaces, 569 *UnboundedRequestEnabled, 570 nomDETInKByte, 571 MaxTotalDETInKByte, 572 ConfigReturnBufferSizeInKByte, 573 MinCompressedBufferSizeInKByte, 574 CompressedBufferSegmentSizeInkByteFinal, 575 SourcePixelFormat, 576 ReadBandwidthLuma, 577 ReadBandwidthChroma, 578 RoundedUpMaxSwathSizeBytesY, 579 RoundedUpMaxSwathSizeBytesC, 580 DPPPerSurface, 581 582 /* Output */ 583 DETBufferSizeInKByte, // per hubp pipe 584 CompressedBufferSizeInkByte); 585 586 #ifdef __DML_VBA_DEBUG__ 587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 593 #endif 594 595 *ViewportSizeSupport = true; 596 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 597 598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); 600 #ifdef __DML_VBA_DEBUG__ 601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, 602 DETBufferSizeInKByteForSwathCalculation); 603 #endif 604 605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= 606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 607 SwathHeightY[k] = MaximumSwathHeightY[k]; 608 SwathHeightC[k] = MaximumSwathHeightC[k]; 609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= 613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 615 SwathHeightC[k] = MaximumSwathHeightC[k]; 616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= 620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 621 SwathHeightY[k] = MaximumSwathHeightY[k]; 622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 625 } else { 626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 630 } 631 632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > 633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && 635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 636 *ViewportSizeSupport = false; 637 ViewportSizeSupportPerSurface[k] = false; 638 } else { 639 ViewportSizeSupportPerSurface[k] = true; 640 } 641 642 if (SwathHeightC[k] == 0) { 643 #ifdef __DML_VBA_DEBUG__ 644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); 645 #endif 646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; 647 DETBufferSizeC[k] = 0; 648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 649 #ifdef __DML_VBA_DEBUG__ 650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); 651 #endif 652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; 653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; 654 } else { 655 #ifdef __DML_VBA_DEBUG__ 656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); 657 #endif 658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); 659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; 660 } 661 662 #ifdef __DML_VBA_DEBUG__ 663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); 665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, 666 k, RoundedUpMaxSwathSizeBytesY[k]); 667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, 668 k, RoundedUpMaxSwathSizeBytesC[k]); 669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); 670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); 671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); 674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, 675 ViewportSizeSupportPerSurface[k]); 676 #endif 677 678 } 679 } // CalculateSwathAndDETConfiguration 680 681 void dml32_CalculateSwathWidth( 682 bool ForceSingleDPP, 683 unsigned int NumberOfActiveSurfaces, 684 enum source_format_class SourcePixelFormat[], 685 enum dm_rotation_angle SourceRotation[], 686 bool ViewportStationary[], 687 unsigned int ViewportWidth[], 688 unsigned int ViewportHeight[], 689 unsigned int ViewportXStart[], 690 unsigned int ViewportYStart[], 691 unsigned int ViewportXStartC[], 692 unsigned int ViewportYStartC[], 693 unsigned int SurfaceWidthY[], 694 unsigned int SurfaceWidthC[], 695 unsigned int SurfaceHeightY[], 696 unsigned int SurfaceHeightC[], 697 enum odm_combine_mode ODMMode[], 698 unsigned int BytePerPixY[], 699 unsigned int BytePerPixC[], 700 unsigned int Read256BytesBlockHeightY[], 701 unsigned int Read256BytesBlockHeightC[], 702 unsigned int Read256BytesBlockWidthY[], 703 unsigned int Read256BytesBlockWidthC[], 704 unsigned int BlendingAndTiming[], 705 unsigned int HActive[], 706 double HRatio[], 707 unsigned int DPPPerSurface[], 708 709 /* Output */ 710 double SwathWidthdoubleDPPY[], 711 double SwathWidthdoubleDPPC[], 712 double SwathWidthY[], // per-pipe 713 double SwathWidthC[], // per-pipe 714 unsigned int MaximumSwathHeightY[], 715 unsigned int MaximumSwathHeightC[], 716 unsigned int swath_width_luma_ub[], // per-pipe 717 unsigned int swath_width_chroma_ub[]) // per-pipe 718 { 719 unsigned int k, j; 720 enum odm_combine_mode MainSurfaceODMMode; 721 722 unsigned int surface_width_ub_l; 723 unsigned int surface_height_ub_l; 724 unsigned int surface_width_ub_c = 0; 725 unsigned int surface_height_ub_c = 0; 726 727 #ifdef __DML_VBA_DEBUG__ 728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 730 #endif 731 732 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 733 if (!IsVertical(SourceRotation[k])) 734 SwathWidthdoubleDPPY[k] = ViewportWidth[k]; 735 else 736 SwathWidthdoubleDPPY[k] = ViewportHeight[k]; 737 738 #ifdef __DML_VBA_DEBUG__ 739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 741 #endif 742 743 MainSurfaceODMMode = ODMMode[k]; 744 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 745 if (BlendingAndTiming[k] == j) 746 MainSurfaceODMMode = ODMMode[j]; 747 } 748 749 if (ForceSingleDPP) { 750 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 751 } else { 752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { 753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 754 dml_round(HActive[k] / 4.0 * HRatio[k])); 755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { 756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 757 dml_round(HActive[k] / 2.0 * HRatio[k])); 758 } else if (DPPPerSurface[k] == 2) { 759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; 760 } else { 761 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 762 } 763 } 764 765 #ifdef __DML_VBA_DEBUG__ 766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); 767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); 768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); 769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); 770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); 771 #endif 772 773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 774 SourcePixelFormat[k] == dm_420_12) { 775 SwathWidthC[k] = SwathWidthY[k] / 2; 776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; 777 } else { 778 SwathWidthC[k] = SwathWidthY[k]; 779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; 780 } 781 782 if (ForceSingleDPP == true) { 783 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 784 SwathWidthC[k] = SwathWidthdoubleDPPC[k]; 785 } 786 787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 789 790 if (!IsVertical(SourceRotation[k])) { 791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 795 dml_floor(ViewportXStart[k] + 796 SwathWidthY[k] + 797 Read256BytesBlockWidthY[k] - 1, 798 Read256BytesBlockWidthY[k]) - 799 dml_floor(ViewportXStart[k], 800 Read256BytesBlockWidthY[k])); 801 } else { 802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 803 dml_ceil(SwathWidthY[k] - 1, 804 Read256BytesBlockWidthY[k]) + 805 Read256BytesBlockWidthY[k]); 806 } 807 if (BytePerPixC[k] > 0) { 808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] + 812 Read256BytesBlockWidthC[k] - 1, 813 Read256BytesBlockWidthC[k]) - 814 dml_floor(ViewportXStartC[k], 815 Read256BytesBlockWidthC[k])); 816 } else { 817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 818 dml_ceil(SwathWidthC[k] - 1, 819 Read256BytesBlockWidthC[k]) + 820 Read256BytesBlockWidthC[k]); 821 } 822 } else { 823 swath_width_chroma_ub[k] = 0; 824 } 825 } else { 826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 828 829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + 831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, 832 Read256BytesBlockHeightY[k]) - 833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); 834 } else { 835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, 836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 837 } 838 if (BytePerPixC[k] > 0) { 839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] + 843 Read256BytesBlockHeightC[k] - 1, 844 Read256BytesBlockHeightC[k]) - 845 dml_floor(ViewportYStartC[k], 846 Read256BytesBlockHeightC[k])); 847 } else { 848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + 850 Read256BytesBlockHeightC[k]); 851 } 852 } else { 853 swath_width_chroma_ub[k] = 0; 854 } 855 } 856 857 #ifdef __DML_VBA_DEBUG__ 858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); 860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); 861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); 862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); 863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); 864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); 865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); 866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); 867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); 868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); 869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); 870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); 871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); 872 #endif 873 874 } 875 } // CalculateSwathWidth 876 877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, 878 unsigned int TotalNumberOfActiveDPP, 879 bool NoChroma, 880 enum output_encoder_class Output, 881 enum dm_swizzle_mode SurfaceTiling, 882 bool CompBufReservedSpaceNeedAdjustment, 883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 884 { 885 bool ret_val = false; 886 887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && 888 TotalNumberOfActiveDPP == 1 && NoChroma); 889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 890 ret_val = false; 891 892 if (SurfaceTiling == dm_sw_linear) 893 ret_val = false; 894 895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 896 ret_val = false; 897 898 #ifdef __DML_VBA_DEBUG__ 899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); 900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); 902 #endif 903 904 return (ret_val); 905 } 906 907 void dml32_CalculateDETBufferSize( 908 unsigned int DETSizeOverride[], 909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 910 bool ForceSingleDPP, 911 unsigned int NumberOfActiveSurfaces, 912 bool UnboundedRequestEnabled, 913 unsigned int nomDETInKByte, 914 unsigned int MaxTotalDETInKByte, 915 unsigned int ConfigReturnBufferSizeInKByte, 916 unsigned int MinCompressedBufferSizeInKByte, 917 unsigned int CompressedBufferSegmentSizeInkByteFinal, 918 enum source_format_class SourcePixelFormat[], 919 double ReadBandwidthLuma[], 920 double ReadBandwidthChroma[], 921 unsigned int RoundedUpMaxSwathSizeBytesY[], 922 unsigned int RoundedUpMaxSwathSizeBytesC[], 923 unsigned int DPPPerSurface[], 924 /* Output */ 925 unsigned int DETBufferSizeInKByte[], 926 unsigned int *CompressedBufferSizeInkByte) 927 { 928 unsigned int DETBufferSizePoolInKByte; 929 unsigned int NextDETBufferPieceInKByte; 930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; 931 bool NextPotentialSurfaceToAssignDETPieceFound; 932 unsigned int NextSurfaceToAssignDETPiece; 933 double TotalBandwidth; 934 double BandwidthOfSurfacesNotAssignedDETPiece; 935 unsigned int max_minDET; 936 unsigned int minDET; 937 unsigned int minDET_pipe; 938 unsigned int j, k; 939 940 #ifdef __DML_VBA_DEBUG__ 941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); 946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); 948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, 949 CompressedBufferSegmentSizeInkByteFinal); 950 #endif 951 952 // Note: Will use default det size if that fits 2 swaths 953 if (UnboundedRequestEnabled) { 954 if (DETSizeOverride[0] > 0) { 955 DETBufferSizeInKByte[0] = DETSizeOverride[0]; 956 } else { 957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * 958 ((double) RoundedUpMaxSwathSizeBytesY[0] + 959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); 960 } 961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; 962 } else { 963 DETBufferSizePoolInKByte = MaxTotalDETInKByte; 964 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 965 DETBufferSizeInKByte[k] = nomDETInKByte; 966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 967 SourcePixelFormat[k] == dm_420_12) { 968 max_minDET = nomDETInKByte - 64; 969 } else { 970 max_minDET = nomDETInKByte; 971 } 972 minDET = 128; 973 minDET_pipe = 0; 974 975 // add DET resource until can hold 2 full swaths 976 while (minDET <= max_minDET && minDET_pipe == 0) { 977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + 978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) 979 minDET_pipe = minDET; 980 minDET = minDET + 64; 981 } 982 983 #ifdef __DML_VBA_DEBUG__ 984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); 985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); 986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); 987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 988 RoundedUpMaxSwathSizeBytesY[k]); 989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 990 RoundedUpMaxSwathSizeBytesC[k]); 991 #endif 992 993 if (minDET_pipe == 0) { 994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + 995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); 996 #ifdef __DML_VBA_DEBUG__ 997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", 998 __func__, k, minDET_pipe); 999 #endif 1000 } 1001 1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1003 DETBufferSizeInKByte[k] = 0; 1004 } else if (DETSizeOverride[k] > 0) { 1005 DETBufferSizeInKByte[k] = DETSizeOverride[k]; 1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; 1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { 1009 DETBufferSizeInKByte[k] = minDET_pipe; 1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; 1012 } 1013 1014 #ifdef __DML_VBA_DEBUG__ 1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); 1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); 1019 #endif 1020 } 1021 1022 TotalBandwidth = 0; 1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) 1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 1026 } 1027 #ifdef __DML_VBA_DEBUG__ 1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); 1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) 1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); 1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); 1033 #endif 1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; 1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1036 1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1038 DETPieceAssignedToThisSurfaceAlready[k] = true; 1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * 1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= 1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { 1042 DETPieceAssignedToThisSurfaceAlready[k] = true; 1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; 1045 } else { 1046 DETPieceAssignedToThisSurfaceAlready[k] = false; 1047 } 1048 #ifdef __DML_VBA_DEBUG__ 1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, 1050 DETPieceAssignedToThisSurfaceAlready[k]); 1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, 1052 BandwidthOfSurfacesNotAssignedDETPiece); 1053 #endif 1054 } 1055 1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 1057 NextPotentialSurfaceToAssignDETPieceFound = false; 1058 NextSurfaceToAssignDETPiece = 0; 1059 1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1061 #ifdef __DML_VBA_DEBUG__ 1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, 1063 ReadBandwidthLuma[k]); 1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, 1065 ReadBandwidthChroma[k]); 1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, 1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, 1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, 1071 NextSurfaceToAssignDETPiece); 1072 #endif 1073 if (!DETPieceAssignedToThisSurfaceAlready[k] && 1074 (!NextPotentialSurfaceToAssignDETPieceFound || 1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < 1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { 1078 NextSurfaceToAssignDETPiece = k; 1079 NextPotentialSurfaceToAssignDETPieceFound = true; 1080 } 1081 #ifdef __DML_VBA_DEBUG__ 1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", 1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); 1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", 1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); 1086 #endif 1087 } 1088 1089 if (NextPotentialSurfaceToAssignDETPieceFound) { 1090 // Note: To show the banker's rounding behavior in VBA and also the fact 1091 // that the DET buffer size varies due to precision issue 1092 // 1093 //double tmp1 = ((double) DETBufferSizePoolInKByte * 1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1096 // BandwidthOfSurfacesNotAssignedDETPiece / 1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * 1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1101 //BandwidthOfSurfacesNotAssignedDETPiece / 1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1103 // 1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); 1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); 1106 1107 NextDETBufferPieceInKByte = dml_min( 1108 dml_round((double) DETBufferSizePoolInKByte * 1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1111 BandwidthOfSurfacesNotAssignedDETPiece / 1112 ((ForceSingleDPP ? 1 : 1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * 1114 (ForceSingleDPP ? 1 : 1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, 1116 dml_floor((double) DETBufferSizePoolInKByte, 1117 (ForceSingleDPP ? 1 : 1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1119 1120 // Above calculation can assign the entire DET buffer allocation to a single pipe. 1121 // We should limit the per-pipe DET size to the nominal / max per pipe. 1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < 1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - 1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; 1127 } else { 1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1129 // already has the max per-pipe value 1130 NextDETBufferPieceInKByte = 0; 1131 } 1132 } 1133 1134 #ifdef __DML_VBA_DEBUG__ 1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, 1136 DETBufferSizePoolInKByte); 1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, 1138 NextSurfaceToAssignDETPiece); 1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, 1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, 1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", 1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); 1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, 1146 NextDETBufferPieceInKByte); 1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", 1148 __func__, j, NextSurfaceToAssignDETPiece, 1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1150 #endif 1151 1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = 1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1154 + NextDETBufferPieceInKByte 1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); 1156 #ifdef __DML_VBA_DEBUG__ 1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1158 #endif 1159 1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; 1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; 1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1165 } 1166 } 1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; 1168 } 1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 1170 1171 #ifdef __DML_VBA_DEBUG__ 1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); 1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { 1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", 1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1177 } 1178 #endif 1179 } // CalculateDETBufferSize 1180 1181 void dml32_CalculateODMMode( 1182 unsigned int MaximumPixelsPerLinePerDSCUnit, 1183 unsigned int HActive, 1184 enum output_format_class OutFormat, 1185 enum output_encoder_class Output, 1186 enum odm_combine_policy ODMUse, 1187 double StateDispclk, 1188 double MaxDispclk, 1189 bool DSCEnable, 1190 unsigned int TotalNumberOfActiveDPP, 1191 unsigned int MaxNumDPP, 1192 double PixelClock, 1193 double DISPCLKDPPCLKDSCCLKDownSpreading, 1194 double DISPCLKRampingMargin, 1195 double DISPCLKDPPCLKVCOSpeed, 1196 unsigned int NumberOfDSCSlices, 1197 1198 /* Output */ 1199 bool *TotalAvailablePipesSupport, 1200 unsigned int *NumberOfDPP, 1201 enum odm_combine_mode *ODMMode, 1202 double *RequiredDISPCLKPerSurface) 1203 { 1204 1205 double SurfaceRequiredDISPCLKWithoutODMCombine; 1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1208 1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, 1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1211 MaxDispclk); 1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, 1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1214 MaxDispclk); 1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, 1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1217 MaxDispclk); 1218 *TotalAvailablePipesSupport = true; 1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable 1220 1221 if (ODMUse == dm_odm_combine_policy_none) 1222 *ODMMode = dm_odm_combine_mode_disabled; 1223 1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; 1225 *NumberOfDPP = 0; 1226 1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? 1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded") 1229 1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || 1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || 1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) 1233 || NumberOfDSCSlices > 8)))) { 1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { 1235 *ODMMode = dm_odm_combine_mode_4to1; 1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1237 *NumberOfDPP = 4; 1238 } else { 1239 *TotalAvailablePipesSupport = false; 1240 } 1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || 1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && 1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || 1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) 1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { 1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { 1247 *ODMMode = dm_odm_combine_mode_2to1; 1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1249 *NumberOfDPP = 2; 1250 } else { 1251 *TotalAvailablePipesSupport = false; 1252 } 1253 } else { 1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) 1255 *NumberOfDPP = 1; 1256 else 1257 *TotalAvailablePipesSupport = false; 1258 } 1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH && 1260 ODMUse != dm_odm_combine_policy_4to1) { 1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) { 1262 *ODMMode = dm_odm_combine_mode_disabled; 1263 *NumberOfDPP = 0; 1264 *TotalAvailablePipesSupport = false; 1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 || 1266 *ODMMode == dm_odm_combine_mode_4to1) { 1267 *ODMMode = dm_odm_combine_mode_4to1; 1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1269 *NumberOfDPP = 4; 1270 } else { 1271 *ODMMode = dm_odm_combine_mode_2to1; 1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1273 *NumberOfDPP = 2; 1274 } 1275 } 1276 if (Output == dm_hdmi && OutFormat == dm_420 && 1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) { 1278 *ODMMode = dm_odm_combine_mode_disabled; 1279 *NumberOfDPP = 0; 1280 *TotalAvailablePipesSupport = false; 1281 } 1282 } 1283 1284 double dml32_CalculateRequiredDispclk( 1285 enum odm_combine_mode ODMMode, 1286 double PixelClock, 1287 double DISPCLKDPPCLKDSCCLKDownSpreading, 1288 double DISPCLKRampingMargin, 1289 double DISPCLKDPPCLKVCOSpeed, 1290 double MaxDispclk) 1291 { 1292 double RequiredDispclk = 0.; 1293 double PixelClockAfterODM; 1294 double DISPCLKWithRampingRoundedToDFSGranularity; 1295 double DISPCLKWithoutRampingRoundedToDFSGranularity; 1296 double MaxDispclkRoundedDownToDFSGranularity; 1297 1298 if (ODMMode == dm_odm_combine_mode_4to1) 1299 PixelClockAfterODM = PixelClock / 4; 1300 else if (ODMMode == dm_odm_combine_mode_2to1) 1301 PixelClockAfterODM = PixelClock / 2; 1302 else 1303 PixelClockAfterODM = PixelClock; 1304 1305 1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) 1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); 1309 1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); 1312 1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); 1314 1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; 1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; 1319 else 1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; 1321 1322 return RequiredDispclk; 1323 } 1324 1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) 1326 { 1327 if (Clock <= 0.0) 1328 return 0.0; 1329 1330 if (round_up) 1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); 1332 else 1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); 1334 } 1335 1336 void dml32_CalculateOutputLink( 1337 double PHYCLKPerState, 1338 double PHYCLKD18PerState, 1339 double PHYCLKD32PerState, 1340 double Downspreading, 1341 bool IsMainSurfaceUsingTheIndicatedTiming, 1342 enum output_encoder_class Output, 1343 enum output_format_class OutputFormat, 1344 unsigned int HTotal, 1345 unsigned int HActive, 1346 double PixelClockBackEnd, 1347 double ForcedOutputLinkBPP, 1348 unsigned int DSCInputBitPerComponent, 1349 unsigned int NumberOfDSCSlices, 1350 double AudioSampleRate, 1351 unsigned int AudioSampleLayout, 1352 enum odm_combine_mode ODMModeNoDSC, 1353 enum odm_combine_mode ODMModeDSC, 1354 bool DSCEnable, 1355 unsigned int OutputLinkDPLanes, 1356 enum dm_output_link_dp_rate OutputLinkDPRate, 1357 1358 /* Output */ 1359 bool *RequiresDSC, 1360 double *RequiresFEC, 1361 double *OutBpp, 1362 enum dm_output_type *OutputType, 1363 enum dm_output_rate *OutputRate, 1364 unsigned int *RequiredSlots) 1365 { 1366 bool LinkDSCEnable; 1367 unsigned int dummy; 1368 *RequiresDSC = false; 1369 *RequiresFEC = false; 1370 *OutBpp = 0; 1371 *OutputType = dm_output_type_unknown; 1372 *OutputRate = dm_output_rate_unknown; 1373 1374 if (IsMainSurfaceUsingTheIndicatedTiming) { 1375 if (Output == dm_hdmi) { 1376 *RequiresDSC = false; 1377 *RequiresFEC = false; 1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, 1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, 1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1381 ODMModeNoDSC, ODMModeDSC, &dummy); 1382 //OutputTypeAndRate = "HDMI"; 1383 *OutputType = dm_output_type_hdmi; 1384 1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { 1386 if (DSCEnable == true) { 1387 *RequiresDSC = true; 1388 LinkDSCEnable = true; 1389 if (Output == dm_dp || Output == dm_dp2p0) 1390 *RequiresFEC = true; 1391 else 1392 *RequiresFEC = false; 1393 } else { 1394 *RequiresDSC = false; 1395 LinkDSCEnable = false; 1396 if (Output == dm_dp2p0) 1397 *RequiresFEC = true; 1398 else 1399 *RequiresFEC = false; 1400 } 1401 if (Output == dm_dp2p0) { 1402 *OutBpp = 0; 1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && 1404 PHYCLKD32PerState >= 10000 / 32) { 1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && 1411 ForcedOutputLinkBPP == 0) { 1412 *RequiresDSC = true; 1413 LinkDSCEnable = true; 1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1416 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1417 OutputFormat, DSCInputBitPerComponent, 1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1420 } 1421 //OutputTypeAndRate = Output & " UHBR10"; 1422 *OutputType = dm_output_type_dp2p0; 1423 *OutputRate = dm_output_rate_dp_rate_uhbr10; 1424 } 1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && 1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { 1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1432 1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && 1434 ForcedOutputLinkBPP == 0) { 1435 *RequiresDSC = true; 1436 LinkDSCEnable = true; 1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1439 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1440 OutputFormat, DSCInputBitPerComponent, 1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1443 } 1444 //OutputTypeAndRate = Output & " UHBR13p5"; 1445 *OutputType = dm_output_type_dp2p0; 1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5; 1447 } 1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && 1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { 1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1456 *RequiresDSC = true; 1457 LinkDSCEnable = true; 1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1460 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1461 OutputFormat, DSCInputBitPerComponent, 1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1464 } 1465 //OutputTypeAndRate = Output & " UHBR20"; 1466 *OutputType = dm_output_type_dp2p0; 1467 *OutputRate = dm_output_rate_dp_rate_uhbr20; 1468 } 1469 } else { 1470 *OutBpp = 0; 1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && 1472 PHYCLKPerState >= 270) { 1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && 1479 ForcedOutputLinkBPP == 0) { 1480 *RequiresDSC = true; 1481 LinkDSCEnable = true; 1482 if (Output == dm_dp) 1483 *RequiresFEC = true; 1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1486 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1487 OutputFormat, DSCInputBitPerComponent, 1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1490 } 1491 //OutputTypeAndRate = Output & " HBR"; 1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1493 *OutputRate = dm_output_rate_dp_rate_hbr; 1494 } 1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && 1496 *OutBpp == 0 && PHYCLKPerState >= 540) { 1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1502 1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && 1504 ForcedOutputLinkBPP == 0) { 1505 *RequiresDSC = true; 1506 LinkDSCEnable = true; 1507 if (Output == dm_dp) 1508 *RequiresFEC = true; 1509 1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1512 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1513 OutputFormat, DSCInputBitPerComponent, 1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1516 } 1517 //OutputTypeAndRate = Output & " HBR2"; 1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1519 *OutputRate = dm_output_rate_dp_rate_hbr2; 1520 } 1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { 1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1524 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, 1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, 1527 RequiredSlots); 1528 1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1530 *RequiresDSC = true; 1531 LinkDSCEnable = true; 1532 if (Output == dm_dp) 1533 *RequiresFEC = true; 1534 1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1537 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1538 OutputFormat, DSCInputBitPerComponent, 1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1541 } 1542 //OutputTypeAndRate = Output & " HBR3"; 1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1544 *OutputRate = dm_output_rate_dp_rate_hbr3; 1545 } 1546 } 1547 } 1548 } 1549 } 1550 1551 void dml32_CalculateDPPCLK( 1552 unsigned int NumberOfActiveSurfaces, 1553 double DISPCLKDPPCLKDSCCLKDownSpreading, 1554 double DISPCLKDPPCLKVCOSpeed, 1555 double DPPCLKUsingSingleDPP[], 1556 unsigned int DPPPerSurface[], 1557 1558 /* output */ 1559 double *GlobalDPPCLK, 1560 double Dppclk[]) 1561 { 1562 unsigned int k; 1563 *GlobalDPPCLK = 0; 1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); 1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); 1567 } 1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); 1569 for (k = 0; k < NumberOfActiveSurfaces; ++k) 1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); 1571 } 1572 1573 double dml32_TruncToValidBPP( 1574 double LinkBitRate, 1575 unsigned int Lanes, 1576 unsigned int HTotal, 1577 unsigned int HActive, 1578 double PixelClock, 1579 double DesiredBPP, 1580 bool DSCEnable, 1581 enum output_encoder_class Output, 1582 enum output_format_class Format, 1583 unsigned int DSCInputBitPerComponent, 1584 unsigned int DSCSlices, 1585 unsigned int AudioRate, 1586 unsigned int AudioLayout, 1587 enum odm_combine_mode ODMModeNoDSC, 1588 enum odm_combine_mode ODMModeDSC, 1589 /* Output */ 1590 unsigned int *RequiredSlots) 1591 { 1592 double MaxLinkBPP; 1593 unsigned int MinDSCBPP; 1594 double MaxDSCBPP; 1595 unsigned int NonDSCBPP0; 1596 unsigned int NonDSCBPP1; 1597 unsigned int NonDSCBPP2; 1598 unsigned int NonDSCBPP3; 1599 1600 if (Format == dm_420) { 1601 NonDSCBPP0 = 12; 1602 NonDSCBPP1 = 15; 1603 NonDSCBPP2 = 18; 1604 MinDSCBPP = 6; 1605 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 1606 } else if (Format == dm_444) { 1607 NonDSCBPP0 = 18; 1608 NonDSCBPP1 = 24; 1609 NonDSCBPP2 = 30; 1610 NonDSCBPP3 = 36; 1611 MinDSCBPP = 8; 1612 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 1613 } else { 1614 if (Output == dm_hdmi) { 1615 NonDSCBPP0 = 24; 1616 NonDSCBPP1 = 24; 1617 NonDSCBPP2 = 24; 1618 } else { 1619 NonDSCBPP0 = 16; 1620 NonDSCBPP1 = 20; 1621 NonDSCBPP2 = 24; 1622 } 1623 if (Format == dm_n422) { 1624 MinDSCBPP = 7; 1625 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 1626 } else { 1627 MinDSCBPP = 8; 1628 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 1629 } 1630 } 1631 if (Output == dm_dp2p0) { 1632 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; 1633 } else if (DSCEnable && Output == dm_dp) { 1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 1635 } else { 1636 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 1637 } 1638 1639 if (DSCEnable) { 1640 if (ODMModeDSC == dm_odm_combine_mode_4to1) 1641 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1642 else if (ODMModeDSC == dm_odm_combine_mode_2to1) 1643 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1644 else if (ODMModeDSC == dm_odm_split_mode_1to2) 1645 MaxLinkBPP = 2 * MaxLinkBPP; 1646 } else { 1647 if (ODMModeNoDSC == dm_odm_combine_mode_4to1) 1648 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1649 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) 1650 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1651 else if (ODMModeNoDSC == dm_odm_split_mode_1to2) 1652 MaxLinkBPP = 2 * MaxLinkBPP; 1653 } 1654 1655 if (DesiredBPP == 0) { 1656 if (DSCEnable) { 1657 if (MaxLinkBPP < MinDSCBPP) 1658 return BPP_INVALID; 1659 else if (MaxLinkBPP >= MaxDSCBPP) 1660 return MaxDSCBPP; 1661 else 1662 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 1663 } else { 1664 if (MaxLinkBPP >= NonDSCBPP3) 1665 return NonDSCBPP3; 1666 else if (MaxLinkBPP >= NonDSCBPP2) 1667 return NonDSCBPP2; 1668 else if (MaxLinkBPP >= NonDSCBPP1) 1669 return NonDSCBPP1; 1670 else if (MaxLinkBPP >= NonDSCBPP0) 1671 return 16.0; 1672 else 1673 return BPP_INVALID; 1674 } 1675 } else { 1676 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || 1677 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) || 1678 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) 1679 return BPP_INVALID; 1680 else 1681 return DesiredBPP; 1682 } 1683 1684 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); 1685 1686 return BPP_INVALID; 1687 } // TruncToValidBPP 1688 1689 double dml32_RequiredDTBCLK( 1690 bool DSCEnable, 1691 double PixelClock, 1692 enum output_format_class OutputFormat, 1693 double OutputBpp, 1694 unsigned int DSCSlices, 1695 unsigned int HTotal, 1696 unsigned int HActive, 1697 unsigned int AudioRate, 1698 unsigned int AudioLayout) 1699 { 1700 double PixelWordRate; 1701 double HCActive; 1702 double HCBlank; 1703 double AverageTribyteRate; 1704 double HActiveTribyteRate; 1705 1706 if (DSCEnable != true) 1707 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); 1708 1709 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); 1710 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * 1711 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); 1712 HCBlank = 64 + 32 * 1713 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); 1714 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; 1715 HActiveTribyteRate = PixelWordRate * HCActive / HActive; 1716 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; 1717 } 1718 1719 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, 1720 enum odm_combine_mode ODMMode, 1721 unsigned int DSCInputBitPerComponent, 1722 double OutputBpp, 1723 unsigned int HActive, 1724 unsigned int HTotal, 1725 unsigned int NumberOfDSCSlices, 1726 enum output_format_class OutputFormat, 1727 enum output_encoder_class Output, 1728 double PixelClock, 1729 double PixelClockBackEnd, 1730 double dsc_delay_factor_wa) 1731 { 1732 unsigned int DSCDelayRequirement_val; 1733 1734 if (DSCEnabled == true && OutputBpp != 0) { 1735 if (ODMMode == dm_odm_combine_mode_4to1) { 1736 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1737 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, 1738 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1739 } else if (ODMMode == dm_odm_combine_mode_2to1) { 1740 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1741 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, 1742 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1743 } else { 1744 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1745 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, 1746 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); 1747 } 1748 1749 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * 1750 dml_ceil((double)DSCDelayRequirement_val / HActive, 1); 1751 1752 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; 1753 1754 } else { 1755 DSCDelayRequirement_val = 0; 1756 } 1757 1758 #ifdef __DML_VBA_DEBUG__ 1759 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); 1760 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); 1761 dml_print("DML::%s: HActive = %d\n", __func__, HActive); 1762 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); 1763 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); 1764 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); 1765 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); 1766 #endif 1767 1768 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1); 1769 } 1770 1771 void dml32_CalculateSurfaceSizeInMall( 1772 unsigned int NumberOfActiveSurfaces, 1773 unsigned int MALLAllocatedForDCN, 1774 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1775 bool DCCEnable[], 1776 bool ViewportStationary[], 1777 unsigned int ViewportXStartY[], 1778 unsigned int ViewportYStartY[], 1779 unsigned int ViewportXStartC[], 1780 unsigned int ViewportYStartC[], 1781 unsigned int ViewportWidthY[], 1782 unsigned int ViewportHeightY[], 1783 unsigned int BytesPerPixelY[], 1784 unsigned int ViewportWidthC[], 1785 unsigned int ViewportHeightC[], 1786 unsigned int BytesPerPixelC[], 1787 unsigned int SurfaceWidthY[], 1788 unsigned int SurfaceWidthC[], 1789 unsigned int SurfaceHeightY[], 1790 unsigned int SurfaceHeightC[], 1791 unsigned int Read256BytesBlockWidthY[], 1792 unsigned int Read256BytesBlockWidthC[], 1793 unsigned int Read256BytesBlockHeightY[], 1794 unsigned int Read256BytesBlockHeightC[], 1795 unsigned int ReadBlockWidthY[], 1796 unsigned int ReadBlockWidthC[], 1797 unsigned int ReadBlockHeightY[], 1798 unsigned int ReadBlockHeightC[], 1799 1800 /* Output */ 1801 unsigned int SurfaceSizeInMALL[], 1802 bool *ExceededMALLSize) 1803 { 1804 unsigned int TotalSurfaceSizeInMALL = 0; 1805 unsigned int k; 1806 1807 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1808 if (ViewportStationary[k]) { 1809 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), 1810 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, 1811 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 1812 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], 1813 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1814 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - 1815 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; 1816 1817 if (ReadBlockWidthC[k] > 0) { 1818 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1819 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), 1820 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 1821 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - 1822 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * 1823 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), 1824 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1825 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - 1826 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * 1827 BytesPerPixelC[k]; 1828 } 1829 if (DCCEnable[k] == true) { 1830 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1831 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), 1832 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * 1833 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) 1834 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) 1835 * dml_min(dml_ceil(SurfaceHeightY[k], 8 * 1836 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1837 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * 1838 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 1839 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256; 1840 if (Read256BytesBlockWidthC[k] > 0) { 1841 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1842 dml_min(dml_ceil(SurfaceWidthC[k], 8 * 1843 Read256BytesBlockWidthC[k]), 1844 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 1845 * Read256BytesBlockWidthC[k] - 1, 8 * 1846 Read256BytesBlockWidthC[k]) - 1847 dml_floor(ViewportXStartC[k], 8 * 1848 Read256BytesBlockWidthC[k])) * 1849 dml_min(dml_ceil(SurfaceHeightC[k], 8 * 1850 Read256BytesBlockHeightC[k]), 1851 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1852 8 * Read256BytesBlockHeightC[k] - 1, 8 * 1853 Read256BytesBlockHeightC[k]) - 1854 dml_floor(ViewportYStartC[k], 8 * 1855 Read256BytesBlockHeightC[k])) * 1856 BytesPerPixelC[k] / 256; 1857 } 1858 } 1859 } else { 1860 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 1861 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * 1862 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 1863 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * 1864 BytesPerPixelY[k]; 1865 if (ReadBlockWidthC[k] > 0) { 1866 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1867 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 1868 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * 1869 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 1870 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * 1871 BytesPerPixelC[k]; 1872 } 1873 if (DCCEnable[k] == true) { 1874 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1875 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * 1876 Read256BytesBlockWidthY[k] - 1), 8 * 1877 Read256BytesBlockWidthY[k]) * 1878 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * 1879 Read256BytesBlockHeightY[k] - 1), 8 * 1880 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256; 1881 1882 if (Read256BytesBlockWidthC[k] > 0) { 1883 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1884 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * 1885 Read256BytesBlockWidthC[k] - 1), 8 * 1886 Read256BytesBlockWidthC[k]) * 1887 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * 1888 Read256BytesBlockHeightC[k] - 1), 8 * 1889 Read256BytesBlockHeightC[k]) * 1890 BytesPerPixelC[k] / 256; 1891 } 1892 } 1893 } 1894 } 1895 1896 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1897 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) 1898 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 1899 } 1900 *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024); 1901 } // CalculateSurfaceSizeInMall 1902 1903 void dml32_CalculateVMRowAndSwath( 1904 unsigned int NumberOfActiveSurfaces, 1905 DmlPipe myPipe[], 1906 unsigned int SurfaceSizeInMALL[], 1907 unsigned int PTEBufferSizeInRequestsLuma, 1908 unsigned int PTEBufferSizeInRequestsChroma, 1909 unsigned int DCCMetaBufferSizeBytes, 1910 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1911 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 1912 unsigned int MALLAllocatedForDCN, 1913 double SwathWidthY[], 1914 double SwathWidthC[], 1915 bool GPUVMEnable, 1916 bool HostVMEnable, 1917 unsigned int HostVMMaxNonCachedPageTableLevels, 1918 unsigned int GPUVMMaxPageTableLevels, 1919 unsigned int GPUVMMinPageSizeKBytes[], 1920 unsigned int HostVMMinPageSize, 1921 1922 /* Output */ 1923 bool PTEBufferSizeNotExceeded[], 1924 bool DCCMetaBufferSizeNotExceeded[], 1925 unsigned int dpte_row_width_luma_ub[], 1926 unsigned int dpte_row_width_chroma_ub[], 1927 unsigned int dpte_row_height_luma[], 1928 unsigned int dpte_row_height_chroma[], 1929 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA 1930 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA 1931 unsigned int meta_req_width[], 1932 unsigned int meta_req_width_chroma[], 1933 unsigned int meta_req_height[], 1934 unsigned int meta_req_height_chroma[], 1935 unsigned int meta_row_width[], 1936 unsigned int meta_row_width_chroma[], 1937 unsigned int meta_row_height[], 1938 unsigned int meta_row_height_chroma[], 1939 unsigned int vm_group_bytes[], 1940 unsigned int dpte_group_bytes[], 1941 unsigned int PixelPTEReqWidthY[], 1942 unsigned int PixelPTEReqHeightY[], 1943 unsigned int PTERequestSizeY[], 1944 unsigned int PixelPTEReqWidthC[], 1945 unsigned int PixelPTEReqHeightC[], 1946 unsigned int PTERequestSizeC[], 1947 unsigned int dpde0_bytes_per_frame_ub_l[], 1948 unsigned int meta_pte_bytes_per_frame_ub_l[], 1949 unsigned int dpde0_bytes_per_frame_ub_c[], 1950 unsigned int meta_pte_bytes_per_frame_ub_c[], 1951 double PrefetchSourceLinesY[], 1952 double PrefetchSourceLinesC[], 1953 double VInitPreFillY[], 1954 double VInitPreFillC[], 1955 unsigned int MaxNumSwathY[], 1956 unsigned int MaxNumSwathC[], 1957 double meta_row_bw[], 1958 double dpte_row_bw[], 1959 double PixelPTEBytesPerRow[], 1960 double PDEAndMetaPTEBytesFrame[], 1961 double MetaRowByte[], 1962 bool use_one_row_for_frame[], 1963 bool use_one_row_for_frame_flip[], 1964 bool UsesMALLForStaticScreen[], 1965 bool PTE_BUFFER_MODE[], 1966 unsigned int BIGK_FRAGMENT_SIZE[]) 1967 { 1968 unsigned int k; 1969 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; 1970 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; 1971 unsigned int PDEAndMetaPTEBytesFrameY; 1972 unsigned int PDEAndMetaPTEBytesFrameC; 1973 unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; 1974 unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; 1975 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; 1976 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; 1977 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; 1978 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; 1979 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1980 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; 1981 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1982 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; 1983 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; 1984 1985 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1986 if (HostVMEnable == true) { 1987 vm_group_bytes[k] = 512; 1988 dpte_group_bytes[k] = 512; 1989 } else if (GPUVMEnable == true) { 1990 vm_group_bytes[k] = 2048; 1991 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) 1992 dpte_group_bytes[k] = 512; 1993 else 1994 dpte_group_bytes[k] = 2048; 1995 } else { 1996 vm_group_bytes[k] = 0; 1997 dpte_group_bytes[k] = 0; 1998 } 1999 2000 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || 2001 myPipe[k].SourcePixelFormat == dm_420_12 || 2002 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { 2003 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && 2004 !IsVertical(myPipe[k].SourceRotation)) { 2005 PTEBufferSizeInRequestsForLuma[k] = 2006 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; 2007 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; 2008 } else { 2009 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 2010 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 2011 } 2012 2013 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 2014 myPipe[k].ViewportStationary, 2015 myPipe[k].DCCEnable, 2016 myPipe[k].DPPPerSurface, 2017 myPipe[k].BlockHeight256BytesC, 2018 myPipe[k].BlockWidth256BytesC, 2019 myPipe[k].SourcePixelFormat, 2020 myPipe[k].SurfaceTiling, 2021 myPipe[k].BytePerPixelC, 2022 myPipe[k].SourceRotation, 2023 SwathWidthC[k], 2024 myPipe[k].ViewportHeightChroma, 2025 myPipe[k].ViewportXStartC, 2026 myPipe[k].ViewportYStartC, 2027 GPUVMEnable, 2028 HostVMEnable, 2029 HostVMMaxNonCachedPageTableLevels, 2030 GPUVMMaxPageTableLevels, 2031 GPUVMMinPageSizeKBytes[k], 2032 HostVMMinPageSize, 2033 PTEBufferSizeInRequestsForChroma[k], 2034 myPipe[k].PitchC, 2035 myPipe[k].DCCMetaPitchC, 2036 myPipe[k].BlockWidthC, 2037 myPipe[k].BlockHeightC, 2038 2039 /* Output */ 2040 &MetaRowByteC[k], 2041 &PixelPTEBytesPerRowC[k], 2042 &dpte_row_width_chroma_ub[k], 2043 &dpte_row_height_chroma[k], 2044 &dpte_row_height_linear_chroma[k], 2045 &PixelPTEBytesPerRowC_one_row_per_frame[k], 2046 &dpte_row_width_chroma_ub_one_row_per_frame[k], 2047 &dpte_row_height_chroma_one_row_per_frame[k], 2048 &meta_req_width_chroma[k], 2049 &meta_req_height_chroma[k], 2050 &meta_row_width_chroma[k], 2051 &meta_row_height_chroma[k], 2052 &PixelPTEReqWidthC[k], 2053 &PixelPTEReqHeightC[k], 2054 &PTERequestSizeC[k], 2055 &dpde0_bytes_per_frame_ub_c[k], 2056 &meta_pte_bytes_per_frame_ub_c[k]); 2057 2058 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( 2059 myPipe[k].VRatioChroma, 2060 myPipe[k].VTapsChroma, 2061 myPipe[k].InterlaceEnable, 2062 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2063 myPipe[k].SwathHeightC, 2064 myPipe[k].SourceRotation, 2065 myPipe[k].ViewportStationary, 2066 SwathWidthC[k], 2067 myPipe[k].ViewportHeightChroma, 2068 myPipe[k].ViewportXStartC, 2069 myPipe[k].ViewportYStartC, 2070 2071 /* Output */ 2072 &VInitPreFillC[k], 2073 &MaxNumSwathC[k]); 2074 } else { 2075 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2076 PTEBufferSizeInRequestsForChroma[k] = 0; 2077 PixelPTEBytesPerRowC[k] = 0; 2078 PDEAndMetaPTEBytesFrameC = 0; 2079 MetaRowByteC[k] = 0; 2080 MaxNumSwathC[k] = 0; 2081 PrefetchSourceLinesC[k] = 0; 2082 dpte_row_height_chroma_one_row_per_frame[k] = 0; 2083 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2084 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2085 } 2086 2087 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2088 myPipe[k].ViewportStationary, 2089 myPipe[k].DCCEnable, 2090 myPipe[k].DPPPerSurface, 2091 myPipe[k].BlockHeight256BytesY, 2092 myPipe[k].BlockWidth256BytesY, 2093 myPipe[k].SourcePixelFormat, 2094 myPipe[k].SurfaceTiling, 2095 myPipe[k].BytePerPixelY, 2096 myPipe[k].SourceRotation, 2097 SwathWidthY[k], 2098 myPipe[k].ViewportHeight, 2099 myPipe[k].ViewportXStart, 2100 myPipe[k].ViewportYStart, 2101 GPUVMEnable, 2102 HostVMEnable, 2103 HostVMMaxNonCachedPageTableLevels, 2104 GPUVMMaxPageTableLevels, 2105 GPUVMMinPageSizeKBytes[k], 2106 HostVMMinPageSize, 2107 PTEBufferSizeInRequestsForLuma[k], 2108 myPipe[k].PitchY, 2109 myPipe[k].DCCMetaPitchY, 2110 myPipe[k].BlockWidthY, 2111 myPipe[k].BlockHeightY, 2112 2113 /* Output */ 2114 &MetaRowByteY[k], 2115 &PixelPTEBytesPerRowY[k], 2116 &dpte_row_width_luma_ub[k], 2117 &dpte_row_height_luma[k], 2118 &dpte_row_height_linear_luma[k], 2119 &PixelPTEBytesPerRowY_one_row_per_frame[k], 2120 &dpte_row_width_luma_ub_one_row_per_frame[k], 2121 &dpte_row_height_luma_one_row_per_frame[k], 2122 &meta_req_width[k], 2123 &meta_req_height[k], 2124 &meta_row_width[k], 2125 &meta_row_height[k], 2126 &PixelPTEReqWidthY[k], 2127 &PixelPTEReqHeightY[k], 2128 &PTERequestSizeY[k], 2129 &dpde0_bytes_per_frame_ub_l[k], 2130 &meta_pte_bytes_per_frame_ub_l[k]); 2131 2132 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( 2133 myPipe[k].VRatio, 2134 myPipe[k].VTaps, 2135 myPipe[k].InterlaceEnable, 2136 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2137 myPipe[k].SwathHeightY, 2138 myPipe[k].SourceRotation, 2139 myPipe[k].ViewportStationary, 2140 SwathWidthY[k], 2141 myPipe[k].ViewportHeight, 2142 myPipe[k].ViewportXStart, 2143 myPipe[k].ViewportYStart, 2144 2145 /* Output */ 2146 &VInitPreFillY[k], 2147 &MaxNumSwathY[k]); 2148 2149 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2150 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; 2151 2152 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && 2153 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { 2154 PTEBufferSizeNotExceeded[k] = true; 2155 } else { 2156 PTEBufferSizeNotExceeded[k] = false; 2157 } 2158 2159 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2160 PTEBufferSizeInRequestsForLuma[k] && 2161 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); 2162 } 2163 2164 dml32_CalculateMALLUseForStaticScreen( 2165 NumberOfActiveSurfaces, 2166 MALLAllocatedForDCN, 2167 UseMALLForStaticScreen, // mode 2168 SurfaceSizeInMALL, 2169 one_row_per_frame_fits_in_buffer, 2170 /* Output */ 2171 UsesMALLForStaticScreen); // boolen 2172 2173 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2174 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2175 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2176 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2177 (GPUVMMinPageSizeKBytes[k] > 64); 2178 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; 2179 } 2180 2181 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2182 #ifdef __DML_VBA_DEBUG__ 2183 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); 2184 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2185 #endif 2186 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2187 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2188 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2189 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); 2190 2191 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && 2192 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); 2193 2194 if (use_one_row_for_frame[k]) { 2195 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; 2196 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; 2197 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; 2198 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; 2199 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; 2200 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; 2201 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; 2202 } 2203 2204 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) 2205 DCCMetaBufferSizeNotExceeded[k] = true; 2206 else 2207 DCCMetaBufferSizeNotExceeded[k] = false; 2208 2209 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; 2210 if (use_one_row_for_frame[k]) 2211 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; 2212 2213 dml32_CalculateRowBandwidth( 2214 GPUVMEnable, 2215 myPipe[k].SourcePixelFormat, 2216 myPipe[k].VRatio, 2217 myPipe[k].VRatioChroma, 2218 myPipe[k].DCCEnable, 2219 myPipe[k].HTotal / myPipe[k].PixelClock, 2220 MetaRowByteY[k], MetaRowByteC[k], 2221 meta_row_height[k], 2222 meta_row_height_chroma[k], 2223 PixelPTEBytesPerRowY[k], 2224 PixelPTEBytesPerRowC[k], 2225 dpte_row_height_luma[k], 2226 dpte_row_height_chroma[k], 2227 2228 /* Output */ 2229 &meta_row_bw[k], 2230 &dpte_row_bw[k]); 2231 #ifdef __DML_VBA_DEBUG__ 2232 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); 2233 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", 2234 __func__, k, use_one_row_for_frame_flip[k]); 2235 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", 2236 __func__, k, UseMALLForPStateChange[k]); 2237 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); 2238 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", 2239 __func__, k, dpte_row_width_luma_ub[k]); 2240 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); 2241 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", 2242 __func__, k, dpte_row_height_chroma[k]); 2243 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", 2244 __func__, k, dpte_row_width_chroma_ub[k]); 2245 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); 2246 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); 2247 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", 2248 __func__, k, PTEBufferSizeNotExceeded[k]); 2249 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); 2250 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); 2251 #endif 2252 } 2253 } // CalculateVMRowAndSwath 2254 2255 unsigned int dml32_CalculateVMAndRowBytes( 2256 bool ViewportStationary, 2257 bool DCCEnable, 2258 unsigned int NumberOfDPPs, 2259 unsigned int BlockHeight256Bytes, 2260 unsigned int BlockWidth256Bytes, 2261 enum source_format_class SourcePixelFormat, 2262 unsigned int SurfaceTiling, 2263 unsigned int BytePerPixel, 2264 enum dm_rotation_angle SourceRotation, 2265 double SwathWidth, 2266 unsigned int ViewportHeight, 2267 unsigned int ViewportXStart, 2268 unsigned int ViewportYStart, 2269 bool GPUVMEnable, 2270 bool HostVMEnable, 2271 unsigned int HostVMMaxNonCachedPageTableLevels, 2272 unsigned int GPUVMMaxPageTableLevels, 2273 unsigned int GPUVMMinPageSizeKBytes, 2274 unsigned int HostVMMinPageSize, 2275 unsigned int PTEBufferSizeInRequests, 2276 unsigned int Pitch, 2277 unsigned int DCCMetaPitch, 2278 unsigned int MacroTileWidth, 2279 unsigned int MacroTileHeight, 2280 2281 /* Output */ 2282 unsigned int *MetaRowByte, 2283 unsigned int *PixelPTEBytesPerRow, 2284 unsigned int *dpte_row_width_ub, 2285 unsigned int *dpte_row_height, 2286 unsigned int *dpte_row_height_linear, 2287 unsigned int *PixelPTEBytesPerRow_one_row_per_frame, 2288 unsigned int *dpte_row_width_ub_one_row_per_frame, 2289 unsigned int *dpte_row_height_one_row_per_frame, 2290 unsigned int *MetaRequestWidth, 2291 unsigned int *MetaRequestHeight, 2292 unsigned int *meta_row_width, 2293 unsigned int *meta_row_height, 2294 unsigned int *PixelPTEReqWidth, 2295 unsigned int *PixelPTEReqHeight, 2296 unsigned int *PTERequestSize, 2297 unsigned int *DPDE0BytesFrame, 2298 unsigned int *MetaPTEBytesFrame) 2299 { 2300 unsigned int MPDEBytesFrame; 2301 unsigned int DCCMetaSurfaceBytes; 2302 unsigned int ExtraDPDEBytesFrame; 2303 unsigned int PDEAndMetaPTEBytesFrame; 2304 unsigned int HostVMDynamicLevels = 0; 2305 unsigned int MacroTileSizeBytes; 2306 unsigned int vp_height_meta_ub; 2307 unsigned int vp_height_dpte_ub; 2308 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this 2309 2310 if (GPUVMEnable == true && HostVMEnable == true) { 2311 if (HostVMMinPageSize < 2048) 2312 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 2313 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 2314 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 2315 else 2316 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 2317 } 2318 2319 *MetaRequestHeight = 8 * BlockHeight256Bytes; 2320 *MetaRequestWidth = 8 * BlockWidth256Bytes; 2321 if (SurfaceTiling == dm_sw_linear) { 2322 *meta_row_height = 32; 2323 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) 2324 - dml_floor(ViewportXStart, *MetaRequestWidth); 2325 } else if (!IsVertical(SourceRotation)) { 2326 *meta_row_height = *MetaRequestHeight; 2327 if (ViewportStationary && NumberOfDPPs == 1) { 2328 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, 2329 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); 2330 } else { 2331 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 2332 } 2333 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 2334 } else { 2335 *meta_row_height = *MetaRequestWidth; 2336 if (ViewportStationary && NumberOfDPPs == 1) { 2337 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, 2338 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); 2339 } else { 2340 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 2341 } 2342 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 2343 } 2344 2345 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2346 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 2347 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); 2348 } else if (!IsVertical(SourceRotation)) { 2349 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2350 } else { 2351 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2352 } 2353 2354 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; 2355 2356 if (GPUVMEnable == true) { 2357 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / 2358 (8 * 4.0 * 1024), 1) + 1) * 64; 2359 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); 2360 } else { 2361 *MetaPTEBytesFrame = 0; 2362 MPDEBytesFrame = 0; 2363 } 2364 2365 if (DCCEnable != true) { 2366 *MetaPTEBytesFrame = 0; 2367 MPDEBytesFrame = 0; 2368 *MetaRowByte = 0; 2369 } 2370 2371 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; 2372 2373 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { 2374 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2375 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + 2376 MacroTileHeight - 1, MacroTileHeight) - 2377 dml_floor(ViewportYStart, MacroTileHeight); 2378 } else if (!IsVertical(SourceRotation)) { 2379 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; 2380 } else { 2381 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; 2382 } 2383 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / 2384 (8 * 2097152), 1) + 1); 2385 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); 2386 } else { 2387 *DPDE0BytesFrame = 0; 2388 ExtraDPDEBytesFrame = 0; 2389 vp_height_dpte_ub = 0; 2390 } 2391 2392 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2393 2394 #ifdef __DML_VBA_DEBUG__ 2395 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 2396 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 2397 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); 2398 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); 2399 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); 2400 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); 2401 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); 2402 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); 2403 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); 2404 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2405 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2406 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2407 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2408 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2409 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); 2410 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); 2411 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); 2412 #endif 2413 2414 if (HostVMEnable == true) 2415 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2416 2417 if (SurfaceTiling == dm_sw_linear) { 2418 *PixelPTEReqHeight = 1; 2419 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2420 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2421 *PTERequestSize = 64; 2422 } else if (GPUVMMinPageSizeKBytes == 4) { 2423 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2424 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2425 *PTERequestSize = 128; 2426 } else { 2427 *PixelPTEReqHeight = MacroTileHeight; 2428 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); 2429 *PTERequestSize = 64; 2430 } 2431 #ifdef __DML_VBA_DEBUG__ 2432 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2433 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); 2434 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); 2435 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); 2436 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); 2437 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); 2438 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); 2439 #endif 2440 2441 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; 2442 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / 2443 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * 2444 (double) *PixelPTEReqWidth; 2445 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * 2446 *PTERequestSize; 2447 2448 if (SurfaceTiling == dm_sw_linear) { 2449 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2450 *PixelPTEReqWidth / Pitch), 1)); 2451 #ifdef __DML_VBA_DEBUG__ 2452 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, 2453 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); 2454 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, 2455 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); 2456 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, 2457 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2458 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, 2459 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2460 *PixelPTEReqWidth / Pitch), 1)); 2461 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2462 #endif 2463 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), 2464 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; 2465 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; 2466 2467 // VBA_DELTA, VBA doesn't have programming value for pte row height linear. 2468 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2469 PixelPTEReqWidth_linear / Pitch), 1); 2470 if (*dpte_row_height_linear > 128) 2471 *dpte_row_height_linear = 128; 2472 2473 } else if (!IsVertical(SourceRotation)) { 2474 *dpte_row_height = *PixelPTEReqHeight; 2475 2476 if (GPUVMMinPageSizeKBytes > 64) { 2477 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / 2478 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2479 } else if (ViewportStationary && (NumberOfDPPs == 1)) { 2480 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + 2481 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - 2482 dml_floor(ViewportXStart, *PixelPTEReqWidth); 2483 } else { 2484 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * 2485 *PixelPTEReqWidth; 2486 } 2487 2488 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2489 } else { 2490 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); 2491 2492 if (ViewportStationary && (NumberOfDPPs == 1)) { 2493 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, 2494 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); 2495 } else { 2496 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) 2497 * *PixelPTEReqHeight; 2498 } 2499 2500 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2501 } 2502 2503 if (GPUVMEnable != true) 2504 *PixelPTEBytesPerRow = 0; 2505 if (HostVMEnable == true) 2506 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2507 2508 #ifdef __DML_VBA_DEBUG__ 2509 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2510 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2511 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); 2512 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); 2513 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); 2514 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); 2515 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); 2516 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", 2517 __func__, *dpte_row_width_ub_one_row_per_frame); 2518 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", 2519 __func__, *PixelPTEBytesPerRow_one_row_per_frame); 2520 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", 2521 *MetaPTEBytesFrame); 2522 #endif 2523 2524 return PDEAndMetaPTEBytesFrame; 2525 } // CalculateVMAndRowBytes 2526 2527 double dml32_CalculatePrefetchSourceLines( 2528 double VRatio, 2529 unsigned int VTaps, 2530 bool Interlace, 2531 bool ProgressiveToInterlaceUnitInOPP, 2532 unsigned int SwathHeight, 2533 enum dm_rotation_angle SourceRotation, 2534 bool ViewportStationary, 2535 double SwathWidth, 2536 unsigned int ViewportHeight, 2537 unsigned int ViewportXStart, 2538 unsigned int ViewportYStart, 2539 2540 /* Output */ 2541 double *VInitPreFill, 2542 unsigned int *MaxNumSwath) 2543 { 2544 2545 unsigned int vp_start_rot; 2546 unsigned int sw0_tmp; 2547 unsigned int MaxPartialSwath; 2548 double numLines; 2549 2550 #ifdef __DML_VBA_DEBUG__ 2551 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 2552 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); 2553 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); 2554 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); 2555 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); 2556 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 2557 #endif 2558 if (ProgressiveToInterlaceUnitInOPP) 2559 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); 2560 else 2561 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 2562 2563 if (ViewportStationary) { 2564 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { 2565 vp_start_rot = SwathHeight - 2566 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); 2567 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { 2568 vp_start_rot = ViewportXStart; 2569 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { 2570 vp_start_rot = SwathHeight - 2571 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); 2572 } else { 2573 vp_start_rot = ViewportYStart; 2574 } 2575 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); 2576 if (sw0_tmp < *VInitPreFill) 2577 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; 2578 else 2579 *MaxNumSwath = 1; 2580 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); 2581 } else { 2582 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; 2583 if (*VInitPreFill > 1) 2584 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); 2585 else 2586 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); 2587 } 2588 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; 2589 2590 #ifdef __DML_VBA_DEBUG__ 2591 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); 2592 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); 2593 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 2594 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 2595 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); 2596 #endif 2597 return numLines; 2598 2599 } // CalculatePrefetchSourceLines 2600 2601 void dml32_CalculateMALLUseForStaticScreen( 2602 unsigned int NumberOfActiveSurfaces, 2603 unsigned int MALLAllocatedForDCNFinal, 2604 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, 2605 unsigned int SurfaceSizeInMALL[], 2606 bool one_row_per_frame_fits_in_buffer[], 2607 2608 /* output */ 2609 bool UsesMALLForStaticScreen[]) 2610 { 2611 unsigned int k; 2612 unsigned int SurfaceToAddToMALL; 2613 bool CanAddAnotherSurfaceToMALL; 2614 unsigned int TotalSurfaceSizeInMALL; 2615 2616 TotalSurfaceSizeInMALL = 0; 2617 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2618 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); 2619 if (UsesMALLForStaticScreen[k]) 2620 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 2621 #ifdef __DML_VBA_DEBUG__ 2622 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2623 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); 2624 #endif 2625 } 2626 2627 SurfaceToAddToMALL = 0; 2628 CanAddAnotherSurfaceToMALL = true; 2629 while (CanAddAnotherSurfaceToMALL) { 2630 CanAddAnotherSurfaceToMALL = false; 2631 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2632 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && 2633 !UsesMALLForStaticScreen[k] && 2634 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && 2635 one_row_per_frame_fits_in_buffer[k] && 2636 (!CanAddAnotherSurfaceToMALL || 2637 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { 2638 CanAddAnotherSurfaceToMALL = true; 2639 SurfaceToAddToMALL = k; 2640 #ifdef __DML_VBA_DEBUG__ 2641 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", 2642 __func__, k, UseMALLForStaticScreen[k]); 2643 #endif 2644 } 2645 } 2646 if (CanAddAnotherSurfaceToMALL) { 2647 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; 2648 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; 2649 2650 #ifdef __DML_VBA_DEBUG__ 2651 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); 2652 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); 2653 #endif 2654 2655 } 2656 } 2657 } 2658 2659 void dml32_CalculateRowBandwidth( 2660 bool GPUVMEnable, 2661 enum source_format_class SourcePixelFormat, 2662 double VRatio, 2663 double VRatioChroma, 2664 bool DCCEnable, 2665 double LineTime, 2666 unsigned int MetaRowByteLuma, 2667 unsigned int MetaRowByteChroma, 2668 unsigned int meta_row_height_luma, 2669 unsigned int meta_row_height_chroma, 2670 unsigned int PixelPTEBytesPerRowLuma, 2671 unsigned int PixelPTEBytesPerRowChroma, 2672 unsigned int dpte_row_height_luma, 2673 unsigned int dpte_row_height_chroma, 2674 /* Output */ 2675 double *meta_row_bw, 2676 double *dpte_row_bw) 2677 { 2678 if (DCCEnable != true) { 2679 *meta_row_bw = 0; 2680 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2681 SourcePixelFormat == dm_rgbe_alpha) { 2682 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * 2683 MetaRowByteChroma / (meta_row_height_chroma * LineTime); 2684 } else { 2685 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 2686 } 2687 2688 if (GPUVMEnable != true) { 2689 *dpte_row_bw = 0; 2690 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2691 SourcePixelFormat == dm_rgbe_alpha) { 2692 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + 2693 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 2694 } else { 2695 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 2696 } 2697 } 2698 2699 double dml32_CalculateUrgentLatency( 2700 double UrgentLatencyPixelDataOnly, 2701 double UrgentLatencyPixelMixedWithVMData, 2702 double UrgentLatencyVMDataOnly, 2703 bool DoUrgentLatencyAdjustment, 2704 double UrgentLatencyAdjustmentFabricClockComponent, 2705 double UrgentLatencyAdjustmentFabricClockReference, 2706 double FabricClock) 2707 { 2708 double ret; 2709 2710 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 2711 if (DoUrgentLatencyAdjustment == true) { 2712 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * 2713 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 2714 } 2715 return ret; 2716 } 2717 2718 void dml32_CalculateUrgentBurstFactor( 2719 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 2720 unsigned int swath_width_luma_ub, 2721 unsigned int swath_width_chroma_ub, 2722 unsigned int SwathHeightY, 2723 unsigned int SwathHeightC, 2724 double LineTime, 2725 double UrgentLatency, 2726 double CursorBufferSize, 2727 unsigned int CursorWidth, 2728 unsigned int CursorBPP, 2729 double VRatio, 2730 double VRatioC, 2731 double BytePerPixelInDETY, 2732 double BytePerPixelInDETC, 2733 unsigned int DETBufferSizeY, 2734 unsigned int DETBufferSizeC, 2735 /* Output */ 2736 double *UrgentBurstFactorCursor, 2737 double *UrgentBurstFactorLuma, 2738 double *UrgentBurstFactorChroma, 2739 bool *NotEnoughUrgentLatencyHiding) 2740 { 2741 double LinesInDETLuma; 2742 double LinesInDETChroma; 2743 unsigned int LinesInCursorBuffer; 2744 double CursorBufferSizeInTime; 2745 double DETBufferSizeInTimeLuma; 2746 double DETBufferSizeInTimeChroma; 2747 2748 *NotEnoughUrgentLatencyHiding = 0; 2749 2750 if (CursorWidth > 0) { 2751 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / 2752 (CursorWidth * CursorBPP / 8.0)), 1.0); 2753 if (VRatio > 0) { 2754 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 2755 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 2756 *NotEnoughUrgentLatencyHiding = 1; 2757 *UrgentBurstFactorCursor = 0; 2758 } else { 2759 *UrgentBurstFactorCursor = CursorBufferSizeInTime / 2760 (CursorBufferSizeInTime - UrgentLatency); 2761 } 2762 } else { 2763 *UrgentBurstFactorCursor = 1; 2764 } 2765 } 2766 2767 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : 2768 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; 2769 2770 if (VRatio > 0) { 2771 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 2772 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 2773 *NotEnoughUrgentLatencyHiding = 1; 2774 *UrgentBurstFactorLuma = 0; 2775 } else { 2776 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 2777 } 2778 } else { 2779 *UrgentBurstFactorLuma = 1; 2780 } 2781 2782 if (BytePerPixelInDETC > 0) { 2783 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 2784 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC 2785 / swath_width_chroma_ub; 2786 2787 if (VRatio > 0) { 2788 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 2789 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 2790 *NotEnoughUrgentLatencyHiding = 1; 2791 *UrgentBurstFactorChroma = 0; 2792 } else { 2793 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma 2794 / (DETBufferSizeInTimeChroma - UrgentLatency); 2795 } 2796 } else { 2797 *UrgentBurstFactorChroma = 1; 2798 } 2799 } 2800 } // CalculateUrgentBurstFactor 2801 2802 void dml32_CalculateDCFCLKDeepSleep( 2803 unsigned int NumberOfActiveSurfaces, 2804 unsigned int BytePerPixelY[], 2805 unsigned int BytePerPixelC[], 2806 double VRatio[], 2807 double VRatioChroma[], 2808 double SwathWidthY[], 2809 double SwathWidthC[], 2810 unsigned int DPPPerSurface[], 2811 double HRatio[], 2812 double HRatioChroma[], 2813 double PixelClock[], 2814 double PSCL_THROUGHPUT[], 2815 double PSCL_THROUGHPUT_CHROMA[], 2816 double Dppclk[], 2817 double ReadBandwidthLuma[], 2818 double ReadBandwidthChroma[], 2819 unsigned int ReturnBusWidth, 2820 2821 /* Output */ 2822 double *DCFClkDeepSleep) 2823 { 2824 unsigned int k; 2825 double DisplayPipeLineDeliveryTimeLuma; 2826 double DisplayPipeLineDeliveryTimeChroma; 2827 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; 2828 double ReadBandwidth = 0.0; 2829 2830 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2831 2832 if (VRatio[k] <= 1) { 2833 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] 2834 / PixelClock[k]; 2835 } else { 2836 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 2837 } 2838 if (BytePerPixelC[k] == 0) { 2839 DisplayPipeLineDeliveryTimeChroma = 0; 2840 } else { 2841 if (VRatioChroma[k] <= 1) { 2842 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * 2843 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 2844 } else { 2845 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] 2846 / Dppclk[k]; 2847 } 2848 } 2849 2850 if (BytePerPixelC[k] > 0) { 2851 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * 2852 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 2853 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 2854 32.0 / DisplayPipeLineDeliveryTimeChroma); 2855 } else { 2856 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 2857 64.0 / DisplayPipeLineDeliveryTimeLuma; 2858 } 2859 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); 2860 2861 #ifdef __DML_VBA_DEBUG__ 2862 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); 2863 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 2864 #endif 2865 } 2866 2867 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2868 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 2869 2870 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); 2871 2872 #ifdef __DML_VBA_DEBUG__ 2873 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); 2874 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); 2875 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); 2876 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); 2877 #endif 2878 2879 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2880 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 2881 #ifdef __DML_VBA_DEBUG__ 2882 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 2883 #endif 2884 } // CalculateDCFCLKDeepSleep 2885 2886 double dml32_CalculateWriteBackDelay( 2887 enum source_format_class WritebackPixelFormat, 2888 double WritebackHRatio, 2889 double WritebackVRatio, 2890 unsigned int WritebackVTaps, 2891 unsigned int WritebackDestinationWidth, 2892 unsigned int WritebackDestinationHeight, 2893 unsigned int WritebackSourceHeight, 2894 unsigned int HTotal) 2895 { 2896 double CalculateWriteBackDelay; 2897 double Line_length; 2898 double Output_lines_last_notclamped; 2899 double WritebackVInit; 2900 2901 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 2902 Line_length = dml_max((double) WritebackDestinationWidth, 2903 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); 2904 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - 2905 dml_ceil(((double)WritebackSourceHeight - 2906 (double) WritebackVInit) / (double)WritebackVRatio, 1.0); 2907 if (Output_lines_last_notclamped < 0) { 2908 CalculateWriteBackDelay = 0; 2909 } else { 2910 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + 2911 (HTotal - WritebackDestinationWidth) + 80; 2912 } 2913 return CalculateWriteBackDelay; 2914 } 2915 2916 void dml32_UseMinimumDCFCLK( 2917 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 2918 bool DRRDisplay[], 2919 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 2920 unsigned int MaxInterDCNTileRepeaters, 2921 unsigned int MaxPrefetchMode, 2922 double DRAMClockChangeLatencyFinal, 2923 double FCLKChangeLatency, 2924 double SREnterPlusExitTime, 2925 unsigned int ReturnBusWidth, 2926 unsigned int RoundTripPingLatencyCycles, 2927 unsigned int ReorderingBytes, 2928 unsigned int PixelChunkSizeInKByte, 2929 unsigned int MetaChunkSize, 2930 bool GPUVMEnable, 2931 unsigned int GPUVMMaxPageTableLevels, 2932 bool HostVMEnable, 2933 unsigned int NumberOfActiveSurfaces, 2934 double HostVMMinPageSize, 2935 unsigned int HostVMMaxNonCachedPageTableLevels, 2936 bool DynamicMetadataVMEnabled, 2937 bool ImmediateFlipRequirement, 2938 bool ProgressiveToInterlaceUnitInOPP, 2939 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, 2940 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, 2941 unsigned int VTotal[], 2942 unsigned int VActive[], 2943 unsigned int DynamicMetadataTransmittedBytes[], 2944 unsigned int DynamicMetadataLinesBeforeActiveRequired[], 2945 bool Interlace[], 2946 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], 2947 double RequiredDISPCLK[][2], 2948 double UrgLatency[], 2949 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 2950 double ProjectedDCFClkDeepSleep[][2], 2951 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 2952 unsigned int TotalNumberOfActiveDPP[][2], 2953 unsigned int TotalNumberOfDCCActiveDPP[][2], 2954 unsigned int dpte_group_bytes[], 2955 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 2956 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 2957 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 2958 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 2959 unsigned int BytePerPixelY[], 2960 unsigned int BytePerPixelC[], 2961 unsigned int HTotal[], 2962 double PixelClock[], 2963 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 2964 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 2965 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 2966 bool DynamicMetadataEnable[], 2967 double ReadBandwidthLuma[], 2968 double ReadBandwidthChroma[], 2969 double DCFCLKPerState[], 2970 /* Output */ 2971 double DCFCLKState[][2]) 2972 { 2973 unsigned int i, j, k; 2974 unsigned int dummy1; 2975 double dummy2, dummy3; 2976 double NormalEfficiency; 2977 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 2978 2979 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; 2980 for (i = 0; i < DC__VOLTAGE_STATES; ++i) { 2981 for (j = 0; j <= 1; ++j) { 2982 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 2983 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 2984 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; 2985 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 2986 double MinimumTWait = 0.0; 2987 double DPTEBandwidth; 2988 double DCFCLKRequiredForAverageBandwidth; 2989 unsigned int ExtraLatencyBytes; 2990 double ExtraLatencyCycles; 2991 double DCFCLKRequiredForPeakBandwidth; 2992 unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; 2993 double MinimumTvmPlus2Tr0; 2994 2995 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 2996 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2997 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 2998 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] 2999 / (15.75 * HTotal[k] / PixelClock[k]); 3000 } 3001 3002 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) 3003 NoOfDPPState[k] = NoOfDPP[i][j][k]; 3004 3005 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; 3006 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); 3007 3008 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, 3009 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, 3010 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, 3011 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, 3012 HostVMMaxNonCachedPageTableLevels); 3013 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ 3014 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 3015 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3016 double DCFCLKCyclesRequiredInPrefetch; 3017 double PrefetchTime; 3018 3019 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] 3020 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 3021 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] 3022 * BytePerPixelC[k]) / NormalEfficiency 3023 / ReturnBusWidth; 3024 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 3025 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency 3026 / NormalEfficiency / ReturnBusWidth 3027 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 3028 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency 3029 / ReturnBusWidth 3030 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth 3031 + PixelDCFCLKCyclesRequiredInPrefetch[k]; 3032 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) 3033 * HTotal[k] / PixelClock[k]; 3034 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && 3035 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 3036 UrgLatency[i] * GPUVMMaxPageTableLevels * 3037 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 3038 3039 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, 3040 UseMALLForPStateChange[k], 3041 SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3042 DRRDisplay[k], 3043 DRAMClockChangeLatencyFinal, 3044 FCLKChangeLatency, 3045 UrgLatency[i], 3046 SREnterPlusExitTime); 3047 3048 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - 3049 MinimumTWait - UrgLatency[i] * 3050 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : 3051 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? 3052 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - 3053 DynamicMetadataVMExtraLatency[k]; 3054 3055 if (PrefetchTime > 0) { 3056 double ExpectedVRatioPrefetch; 3057 3058 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * 3059 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3060 DCFCLKCyclesRequiredInPrefetch); 3061 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * 3062 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3063 PrefetchPixelLinesTime[k] * 3064 dml_max(1.0, ExpectedVRatioPrefetch) * 3065 dml_max(1.0, ExpectedVRatioPrefetch / 4); 3066 if (HostVMEnable == true || ImmediateFlipRequirement == true) { 3067 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3068 DCFCLKRequiredForPeakBandwidthPerSurface[k] + 3069 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / 3070 NormalEfficiency / ReturnBusWidth; 3071 } 3072 } else { 3073 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3074 } 3075 if (DynamicMetadataEnable[k] == true) { 3076 double TSetupPipe; 3077 double TdmbfPipe; 3078 double TdmsksPipe; 3079 double TdmecPipe; 3080 double AllowedTimeForUrgentExtraLatency; 3081 3082 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3083 MaxInterDCNTileRepeaters, 3084 RequiredDPPCLKPerSurface[i][j][k], 3085 RequiredDISPCLK[i][j], 3086 ProjectedDCFClkDeepSleep[i][j], 3087 PixelClock[k], 3088 HTotal[k], 3089 VTotal[k] - VActive[k], 3090 DynamicMetadataTransmittedBytes[k], 3091 DynamicMetadataLinesBeforeActiveRequired[k], 3092 Interlace[k], 3093 ProgressiveToInterlaceUnitInOPP, 3094 3095 /* output */ 3096 &TSetupPipe, 3097 &TdmbfPipe, 3098 &TdmecPipe, 3099 &TdmsksPipe, 3100 &dummy1, 3101 &dummy2, 3102 &dummy3); 3103 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / 3104 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - 3105 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 3106 if (AllowedTimeForUrgentExtraLatency > 0) 3107 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3108 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], 3109 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 3110 else 3111 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3112 } 3113 } 3114 DCFCLKRequiredForPeakBandwidth = 0; 3115 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { 3116 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + 3117 DCFCLKRequiredForPeakBandwidthPerSurface[k]; 3118 } 3119 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? 3120 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * 3121 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); 3122 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3123 double MaximumTvmPlus2Tr0PlusTsw; 3124 3125 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / 3126 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 3127 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 3128 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 3129 } else { 3130 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 3131 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - 3132 MinimumTvmPlus2Tr0 - 3133 PrefetchPixelLinesTime[k] / 4), 3134 (2 * ExtraLatencyCycles + 3135 PixelDCFCLKCyclesRequiredInPrefetch[k]) / 3136 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 3137 } 3138 } 3139 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * 3140 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 3141 } 3142 } 3143 } 3144 3145 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, 3146 unsigned int TotalNumberOfActiveDPP, 3147 unsigned int PixelChunkSizeInKByte, 3148 unsigned int TotalNumberOfDCCActiveDPP, 3149 unsigned int MetaChunkSize, 3150 bool GPUVMEnable, 3151 bool HostVMEnable, 3152 unsigned int NumberOfActiveSurfaces, 3153 unsigned int NumberOfDPP[], 3154 unsigned int dpte_group_bytes[], 3155 double HostVMInefficiencyFactor, 3156 double HostVMMinPageSize, 3157 unsigned int HostVMMaxNonCachedPageTableLevels) 3158 { 3159 unsigned int k; 3160 double ret; 3161 unsigned int HostVMDynamicLevels; 3162 3163 if (GPUVMEnable == true && HostVMEnable == true) { 3164 if (HostVMMinPageSize < 2048) 3165 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 3166 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 3167 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 3168 else 3169 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 3170 } else { 3171 HostVMDynamicLevels = 0; 3172 } 3173 3174 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + 3175 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 3176 3177 if (GPUVMEnable == true) { 3178 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3179 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * 3180 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 3181 } 3182 } 3183 return ret; 3184 } 3185 3186 void dml32_CalculateVUpdateAndDynamicMetadataParameters( 3187 unsigned int MaxInterDCNTileRepeaters, 3188 double Dppclk, 3189 double Dispclk, 3190 double DCFClkDeepSleep, 3191 double PixelClock, 3192 unsigned int HTotal, 3193 unsigned int VBlank, 3194 unsigned int DynamicMetadataTransmittedBytes, 3195 unsigned int DynamicMetadataLinesBeforeActiveRequired, 3196 unsigned int InterlaceEnable, 3197 bool ProgressiveToInterlaceUnitInOPP, 3198 3199 /* output */ 3200 double *TSetup, 3201 double *Tdmbf, 3202 double *Tdmec, 3203 double *Tdmsks, 3204 unsigned int *VUpdateOffsetPix, 3205 double *VUpdateWidthPix, 3206 double *VReadyOffsetPix) 3207 { 3208 double TotalRepeaterDelayTime; 3209 3210 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); 3211 *VUpdateWidthPix = 3212 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); 3213 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, 3214 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); 3215 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); 3216 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3217 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; 3218 *Tdmec = HTotal / PixelClock; 3219 3220 if (DynamicMetadataLinesBeforeActiveRequired == 0) 3221 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3222 else 3223 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3224 3225 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) 3226 *Tdmsks = *Tdmsks / 2; 3227 #ifdef __DML_VBA_DEBUG__ 3228 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3229 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3230 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3231 3232 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", 3233 __func__, DynamicMetadataLinesBeforeActiveRequired); 3234 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); 3235 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); 3236 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); 3237 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); 3238 #endif 3239 } 3240 3241 double dml32_CalculateTWait( 3242 unsigned int PrefetchMode, 3243 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 3244 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3245 bool DRRDisplay, 3246 double DRAMClockChangeLatency, 3247 double FCLKChangeLatency, 3248 double UrgentLatency, 3249 double SREnterPlusExitTime) 3250 { 3251 double TWait = 0.0; 3252 3253 if (PrefetchMode == 0 && 3254 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && 3255 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && 3256 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && 3257 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { 3258 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3259 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3260 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3261 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3262 TWait = dml_max(SREnterPlusExitTime, UrgentLatency); 3263 } else { 3264 TWait = UrgentLatency; 3265 } 3266 3267 #ifdef __DML_VBA_DEBUG__ 3268 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); 3269 dml_print("DML::%s: TWait = %f\n", __func__, TWait); 3270 #endif 3271 return TWait; 3272 } // CalculateTWait 3273 3274 // Function: get_return_bw_mbps 3275 // Megabyte per second 3276 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, 3277 const int VoltageLevel, 3278 const bool HostVMEnable, 3279 const double DCFCLK, 3280 const double FabricClock, 3281 const double DRAMSpeed) 3282 { 3283 double ReturnBW = 0.; 3284 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; 3285 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; 3286 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; 3287 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3288 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3289 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3290 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3291 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3292 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3293 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3294 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3295 3296 if (HostVMEnable != true) 3297 ReturnBW = PixelDataOnlyReturnBW; 3298 else 3299 ReturnBW = PixelMixedWithVMDataReturnBW; 3300 3301 #ifdef __DML_VBA_DEBUG__ 3302 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3303 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); 3304 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3305 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3306 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3307 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); 3308 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); 3309 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); 3310 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); 3311 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); 3312 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); 3313 #endif 3314 return ReturnBW; 3315 } 3316 3317 // Function: get_return_bw_mbps_vm_only 3318 // Megabyte per second 3319 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, 3320 const int VoltageLevel, 3321 const double DCFCLK, 3322 const double FabricClock, 3323 const double DRAMSpeed) 3324 { 3325 double VMDataOnlyReturnBW = dml_min3( 3326 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3327 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes 3328 * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3329 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes 3330 * (VoltageLevel < 2 ? 3331 soc->pct_ideal_dram_bw_after_urgent_strobe : 3332 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); 3333 #ifdef __DML_VBA_DEBUG__ 3334 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3335 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3336 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3337 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3338 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 3339 #endif 3340 return VMDataOnlyReturnBW; 3341 } 3342 3343 double dml32_CalculateExtraLatency( 3344 unsigned int RoundTripPingLatencyCycles, 3345 unsigned int ReorderingBytes, 3346 double DCFCLK, 3347 unsigned int TotalNumberOfActiveDPP, 3348 unsigned int PixelChunkSizeInKByte, 3349 unsigned int TotalNumberOfDCCActiveDPP, 3350 unsigned int MetaChunkSize, 3351 double ReturnBW, 3352 bool GPUVMEnable, 3353 bool HostVMEnable, 3354 unsigned int NumberOfActiveSurfaces, 3355 unsigned int NumberOfDPP[], 3356 unsigned int dpte_group_bytes[], 3357 double HostVMInefficiencyFactor, 3358 double HostVMMinPageSize, 3359 unsigned int HostVMMaxNonCachedPageTableLevels) 3360 { 3361 double ExtraLatencyBytes; 3362 double ExtraLatency; 3363 3364 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( 3365 ReorderingBytes, 3366 TotalNumberOfActiveDPP, 3367 PixelChunkSizeInKByte, 3368 TotalNumberOfDCCActiveDPP, 3369 MetaChunkSize, 3370 GPUVMEnable, 3371 HostVMEnable, 3372 NumberOfActiveSurfaces, 3373 NumberOfDPP, 3374 dpte_group_bytes, 3375 HostVMInefficiencyFactor, 3376 HostVMMinPageSize, 3377 HostVMMaxNonCachedPageTableLevels); 3378 3379 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 3380 3381 #ifdef __DML_VBA_DEBUG__ 3382 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 3383 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 3384 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 3385 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 3386 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 3387 #endif 3388 3389 return ExtraLatency; 3390 } // CalculateExtraLatency 3391 3392 bool dml32_CalculatePrefetchSchedule( 3393 struct vba_vars_st *v, 3394 unsigned int k, 3395 double HostVMInefficiencyFactor, 3396 DmlPipe *myPipe, 3397 unsigned int DSCDelay, 3398 unsigned int DPP_RECOUT_WIDTH, 3399 unsigned int VStartup, 3400 unsigned int MaxVStartup, 3401 double UrgentLatency, 3402 double UrgentExtraLatency, 3403 double TCalc, 3404 unsigned int PDEAndMetaPTEBytesFrame, 3405 unsigned int MetaRowByte, 3406 unsigned int PixelPTEBytesPerRow, 3407 double PrefetchSourceLinesY, 3408 unsigned int SwathWidthY, 3409 unsigned int VInitPreFillY, 3410 unsigned int MaxNumSwathY, 3411 double PrefetchSourceLinesC, 3412 unsigned int SwathWidthC, 3413 unsigned int VInitPreFillC, 3414 unsigned int MaxNumSwathC, 3415 unsigned int swath_width_luma_ub, 3416 unsigned int swath_width_chroma_ub, 3417 unsigned int SwathHeightY, 3418 unsigned int SwathHeightC, 3419 double TWait, 3420 double TPreReq, 3421 /* Output */ 3422 double *DSTXAfterScaler, 3423 double *DSTYAfterScaler, 3424 double *DestinationLinesForPrefetch, 3425 double *PrefetchBandwidth, 3426 double *DestinationLinesToRequestVMInVBlank, 3427 double *DestinationLinesToRequestRowInVBlank, 3428 double *VRatioPrefetchY, 3429 double *VRatioPrefetchC, 3430 double *RequiredPrefetchPixDataBWLuma, 3431 double *RequiredPrefetchPixDataBWChroma, 3432 bool *NotEnoughTimeForDynamicMetadata, 3433 double *Tno_bw, 3434 double *prefetch_vmrow_bw, 3435 double *Tdmdl_vm, 3436 double *Tdmdl, 3437 double *TSetup, 3438 unsigned int *VUpdateOffsetPix, 3439 double *VUpdateWidthPix, 3440 double *VReadyOffsetPix) 3441 { 3442 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; 3443 bool MyError = false; 3444 unsigned int DPPCycles, DISPCLKCycles; 3445 double DSTTotalPixelsAfterScaler; 3446 double LineTime; 3447 double dst_y_prefetch_equ; 3448 double prefetch_bw_oto; 3449 double Tvm_oto; 3450 double Tr0_oto; 3451 double Tvm_oto_lines; 3452 double Tr0_oto_lines; 3453 double dst_y_prefetch_oto; 3454 double TimeForFetchingMetaPTE = 0; 3455 double TimeForFetchingRowInVBlank = 0; 3456 double LinesToRequestPrefetchPixelData = 0; 3457 unsigned int HostVMDynamicLevelsTrips; 3458 double trip_to_mem; 3459 double Tvm_trips; 3460 double Tr0_trips; 3461 double Tvm_trips_rounded; 3462 double Tr0_trips_rounded; 3463 double Lsw_oto; 3464 double Tpre_rounded; 3465 double prefetch_bw_equ; 3466 double Tvm_equ; 3467 double Tr0_equ; 3468 double Tdmbf; 3469 double Tdmec; 3470 double Tdmsks; 3471 double prefetch_sw_bytes; 3472 double bytes_pp; 3473 double dep_bytes; 3474 unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; 3475 double min_Lsw; 3476 double Tsw_est1 = 0; 3477 double Tsw_est3 = 0; 3478 3479 if (v->GPUVMEnable == true && v->HostVMEnable == true) 3480 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3481 else 3482 HostVMDynamicLevelsTrips = 0; 3483 #ifdef __DML_VBA_DEBUG__ 3484 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); 3485 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); 3486 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); 3487 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", 3488 __func__, v->HostVMEnable, HostVMInefficiencyFactor); 3489 #endif 3490 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3491 v->MaxInterDCNTileRepeaters, 3492 myPipe->Dppclk, 3493 myPipe->Dispclk, 3494 myPipe->DCFClkDeepSleep, 3495 myPipe->PixelClock, 3496 myPipe->HTotal, 3497 myPipe->VBlank, 3498 v->DynamicMetadataTransmittedBytes[k], 3499 v->DynamicMetadataLinesBeforeActiveRequired[k], 3500 myPipe->InterlaceEnable, 3501 myPipe->ProgressiveToInterlaceUnitInOPP, 3502 TSetup, 3503 3504 /* output */ 3505 &Tdmbf, 3506 &Tdmec, 3507 &Tdmsks, 3508 VUpdateOffsetPix, 3509 VUpdateWidthPix, 3510 VReadyOffsetPix); 3511 3512 LineTime = myPipe->HTotal / myPipe->PixelClock; 3513 trip_to_mem = UrgentLatency; 3514 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 3515 3516 if (v->DynamicMetadataVMEnabled == true) 3517 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 3518 else 3519 *Tdmdl = TWait + UrgentExtraLatency; 3520 3521 #ifdef __DML_VBA_ALLOW_DELTA__ 3522 if (v->DynamicMetadataEnable[k] == false) 3523 *Tdmdl = 0.0; 3524 #endif 3525 3526 if (v->DynamicMetadataEnable[k] == true) { 3527 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 3528 *NotEnoughTimeForDynamicMetadata = true; 3529 #ifdef __DML_VBA_DEBUG__ 3530 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3531 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3532 __func__, Tdmbf); 3533 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3534 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3535 __func__, Tdmsks); 3536 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3537 __func__, *Tdmdl); 3538 #endif 3539 } else { 3540 *NotEnoughTimeForDynamicMetadata = false; 3541 } 3542 } else { 3543 *NotEnoughTimeForDynamicMetadata = false; 3544 } 3545 3546 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && 3547 v->GPUVMEnable == true ? TWait + Tvm_trips : 0); 3548 3549 if (myPipe->ScalerEnabled) 3550 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; 3551 else 3552 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; 3553 3554 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; 3555 3556 DISPCLKCycles = v->DISPCLKDelaySubtotal; 3557 3558 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3559 return true; 3560 3561 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * 3562 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3563 3564 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) 3565 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH 3566 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? 3567 myPipe->HActive / 2 : 0) 3568 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3569 3570 #ifdef __DML_VBA_DEBUG__ 3571 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 3572 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3573 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3574 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 3575 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3576 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3577 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); 3578 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); 3579 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 3580 #endif 3581 3582 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 3583 *DSTYAfterScaler = 1; 3584 else 3585 *DSTYAfterScaler = 0; 3586 3587 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3588 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3589 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3590 #ifdef __DML_VBA_DEBUG__ 3591 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3592 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); 3593 #endif 3594 3595 MyError = false; 3596 3597 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 3598 3599 if (v->GPUVMEnable == true) { 3600 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; 3601 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3602 if (v->GPUVMMaxPageTableLevels >= 3) { 3603 *Tno_bw = UrgentExtraLatency + trip_to_mem * 3604 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); 3605 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { 3606 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 3607 4.0 * LineTime; // VBA_ERROR 3608 *Tno_bw = UrgentExtraLatency; 3609 } else { 3610 *Tno_bw = 0; 3611 } 3612 } else if (myPipe->DCCEnable == true) { 3613 Tvm_trips_rounded = LineTime / 4.0; 3614 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3615 *Tno_bw = 0; 3616 } else { 3617 Tvm_trips_rounded = LineTime / 4.0; 3618 Tr0_trips_rounded = LineTime / 2.0; 3619 *Tno_bw = 0; 3620 } 3621 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); 3622 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); 3623 3624 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3625 || myPipe->SourcePixelFormat == dm_420_12) { 3626 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3627 } else { 3628 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3629 } 3630 3631 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3632 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3633 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3634 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 3635 3636 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 3637 min_Lsw = dml_max(min_Lsw, 1.0); 3638 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; 3639 3640 if (v->GPUVMEnable == true) { 3641 Tvm_oto = dml_max3( 3642 Tvm_trips, 3643 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 3644 LineTime / 4.0); 3645 } else 3646 Tvm_oto = LineTime / 4.0; 3647 3648 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3649 Tr0_oto = dml_max4( 3650 Tr0_trips, 3651 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 3652 (LineTime - Tvm_oto)/2.0, 3653 LineTime / 4.0); 3654 #ifdef __DML_VBA_DEBUG__ 3655 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3656 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); 3657 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); 3658 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); 3659 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); 3660 #endif 3661 } else 3662 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 3663 3664 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 3665 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 3666 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 3667 3668 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - 3669 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3670 3671 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__); 3672 #ifdef __DML_VBA_DEBUG__ 3673 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3674 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); 3675 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3676 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3677 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); 3678 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3679 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3680 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3681 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3682 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3683 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3684 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); 3685 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); 3686 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3687 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3688 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3689 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3690 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 3691 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 3692 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 3693 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 3694 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 3695 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); 3696 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); 3697 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); 3698 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); 3699 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); 3700 #endif 3701 3702 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 3703 Tpre_rounded = dst_y_prefetch_equ * LineTime; 3704 #ifdef __DML_VBA_DEBUG__ 3705 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); 3706 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); 3707 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3708 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3709 __func__, VStartup * LineTime); 3710 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3711 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3712 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 3713 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3714 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3715 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3716 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3717 __func__, *DSTYAfterScaler); 3718 #endif 3719 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3720 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3721 3722 if (prefetch_sw_bytes < dep_bytes) 3723 prefetch_sw_bytes = 2 * dep_bytes; 3724 3725 *PrefetchBandwidth = 0; 3726 *DestinationLinesToRequestVMInVBlank = 0; 3727 *DestinationLinesToRequestRowInVBlank = 0; 3728 *VRatioPrefetchY = 0; 3729 *VRatioPrefetchC = 0; 3730 *RequiredPrefetchPixDataBWLuma = 0; 3731 if (dst_y_prefetch_equ > 1 && 3732 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) { 3733 double PrefetchBandwidth1; 3734 double PrefetchBandwidth2; 3735 double PrefetchBandwidth3; 3736 double PrefetchBandwidth4; 3737 3738 if (Tpre_rounded - *Tno_bw > 0) { 3739 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3740 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3741 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 3742 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 3743 } else 3744 PrefetchBandwidth1 = 0; 3745 3746 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) 3747 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 3748 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3749 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3750 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 3751 } 3752 3753 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 3754 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / 3755 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 3756 else 3757 PrefetchBandwidth2 = 0; 3758 3759 if (Tpre_rounded - Tvm_trips_rounded > 0) { 3760 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3761 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 3762 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 3763 } else 3764 PrefetchBandwidth3 = 0; 3765 3766 3767 if (VStartup == MaxVStartup && 3768 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * 3769 LineTime - Tvm_trips_rounded > 0) { 3770 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3771 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 3772 } 3773 3774 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { 3775 PrefetchBandwidth4 = prefetch_sw_bytes / 3776 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 3777 } else { 3778 PrefetchBandwidth4 = 0; 3779 } 3780 3781 #ifdef __DML_VBA_DEBUG__ 3782 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 3783 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3784 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 3785 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); 3786 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); 3787 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3788 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3789 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 3790 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); 3791 #endif 3792 { 3793 bool Case1OK; 3794 bool Case2OK; 3795 bool Case3OK; 3796 3797 if (PrefetchBandwidth1 > 0) { 3798 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3799 >= Tvm_trips_rounded 3800 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3801 / PrefetchBandwidth1 >= Tr0_trips_rounded) { 3802 Case1OK = true; 3803 } else { 3804 Case1OK = false; 3805 } 3806 } else { 3807 Case1OK = false; 3808 } 3809 3810 if (PrefetchBandwidth2 > 0) { 3811 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3812 >= Tvm_trips_rounded 3813 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3814 / PrefetchBandwidth2 < Tr0_trips_rounded) { 3815 Case2OK = true; 3816 } else { 3817 Case2OK = false; 3818 } 3819 } else { 3820 Case2OK = false; 3821 } 3822 3823 if (PrefetchBandwidth3 > 0) { 3824 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3825 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3826 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3827 Tr0_trips_rounded) { 3828 Case3OK = true; 3829 } else { 3830 Case3OK = false; 3831 } 3832 } else { 3833 Case3OK = false; 3834 } 3835 3836 if (Case1OK) 3837 prefetch_bw_equ = PrefetchBandwidth1; 3838 else if (Case2OK) 3839 prefetch_bw_equ = PrefetchBandwidth2; 3840 else if (Case3OK) 3841 prefetch_bw_equ = PrefetchBandwidth3; 3842 else 3843 prefetch_bw_equ = PrefetchBandwidth4; 3844 3845 #ifdef __DML_VBA_DEBUG__ 3846 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3847 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3848 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3849 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 3850 #endif 3851 3852 if (prefetch_bw_equ > 0) { 3853 if (v->GPUVMEnable == true) { 3854 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3855 HostVMInefficiencyFactor / prefetch_bw_equ, 3856 Tvm_trips, LineTime / 4); 3857 } else { 3858 Tvm_equ = LineTime / 4; 3859 } 3860 3861 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3862 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3863 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, 3864 (LineTime - Tvm_equ) / 2, LineTime / 4); 3865 } else { 3866 Tr0_equ = (LineTime - Tvm_equ) / 2; 3867 } 3868 } else { 3869 Tvm_equ = 0; 3870 Tr0_equ = 0; 3871 #ifdef __DML_VBA_DEBUG__ 3872 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3873 #endif 3874 } 3875 } 3876 3877 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 3878 if (dst_y_prefetch_oto * LineTime < TPreReq) { 3879 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3880 } else { 3881 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 3882 } 3883 TimeForFetchingMetaPTE = Tvm_oto; 3884 TimeForFetchingRowInVBlank = Tr0_oto; 3885 *PrefetchBandwidth = prefetch_bw_oto; 3886 } else { 3887 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3888 TimeForFetchingMetaPTE = Tvm_equ; 3889 TimeForFetchingRowInVBlank = Tr0_equ; 3890 *PrefetchBandwidth = prefetch_bw_equ; 3891 } 3892 3893 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 3894 3895 *DestinationLinesToRequestRowInVBlank = 3896 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 3897 3898 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3899 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3900 3901 #ifdef __DML_VBA_DEBUG__ 3902 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3903 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3904 __func__, *DestinationLinesToRequestVMInVBlank); 3905 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 3906 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3907 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3908 __func__, *DestinationLinesToRequestRowInVBlank); 3909 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3910 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 3911 #endif 3912 3913 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { 3914 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 3915 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3916 #ifdef __DML_VBA_DEBUG__ 3917 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3918 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 3919 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3920 #endif 3921 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3922 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3923 *VRatioPrefetchY = 3924 dml_max((double) PrefetchSourceLinesY / 3925 LinesToRequestPrefetchPixelData, 3926 (double) MaxNumSwathY * SwathHeightY / 3927 (LinesToRequestPrefetchPixelData - 3928 (VInitPreFillY - 3.0) / 2.0)); 3929 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3930 } else { 3931 MyError = true; 3932 *VRatioPrefetchY = 0; 3933 } 3934 #ifdef __DML_VBA_DEBUG__ 3935 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3936 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3937 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 3938 #endif 3939 } 3940 3941 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 3942 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3943 3944 #ifdef __DML_VBA_DEBUG__ 3945 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3946 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 3947 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3948 #endif 3949 if ((SwathHeightC > 4)) { 3950 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3951 *VRatioPrefetchC = 3952 dml_max(*VRatioPrefetchC, 3953 (double) MaxNumSwathC * SwathHeightC / 3954 (LinesToRequestPrefetchPixelData - 3955 (VInitPreFillC - 3.0) / 2.0)); 3956 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3957 } else { 3958 MyError = true; 3959 *VRatioPrefetchC = 0; 3960 } 3961 #ifdef __DML_VBA_DEBUG__ 3962 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3963 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3964 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 3965 #endif 3966 } 3967 3968 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 3969 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3970 / LineTime; 3971 3972 #ifdef __DML_VBA_DEBUG__ 3973 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3974 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3975 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3976 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 3977 __func__, *RequiredPrefetchPixDataBWLuma); 3978 #endif 3979 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 3980 LinesToRequestPrefetchPixelData 3981 * myPipe->BytePerPixelC 3982 * swath_width_chroma_ub / LineTime; 3983 } else { 3984 MyError = true; 3985 #ifdef __DML_VBA_DEBUG__ 3986 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 3987 __func__, LinesToRequestPrefetchPixelData); 3988 #endif 3989 *VRatioPrefetchY = 0; 3990 *VRatioPrefetchC = 0; 3991 *RequiredPrefetchPixDataBWLuma = 0; 3992 *RequiredPrefetchPixDataBWChroma = 0; 3993 } 3994 #ifdef __DML_VBA_DEBUG__ 3995 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 3996 (double)LinesToRequestPrefetchPixelData * LineTime + 3997 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 3998 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 3999 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 4000 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 4001 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 4002 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - 4003 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 4004 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 4005 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 4006 PixelPTEBytesPerRow); 4007 #endif 4008 } else { 4009 MyError = true; 4010 #ifdef __DML_VBA_DEBUG__ 4011 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 4012 __func__, dst_y_prefetch_equ); 4013 #endif 4014 } 4015 4016 { 4017 double prefetch_vm_bw; 4018 double prefetch_row_bw; 4019 4020 if (PDEAndMetaPTEBytesFrame == 0) { 4021 prefetch_vm_bw = 0; 4022 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 4023 #ifdef __DML_VBA_DEBUG__ 4024 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 4025 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 4026 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 4027 __func__, *DestinationLinesToRequestVMInVBlank); 4028 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 4029 #endif 4030 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 4031 (*DestinationLinesToRequestVMInVBlank * LineTime); 4032 #ifdef __DML_VBA_DEBUG__ 4033 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 4034 #endif 4035 } else { 4036 prefetch_vm_bw = 0; 4037 MyError = true; 4038 #ifdef __DML_VBA_DEBUG__ 4039 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", 4040 __func__, *DestinationLinesToRequestVMInVBlank); 4041 #endif 4042 } 4043 4044 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 4045 prefetch_row_bw = 0; 4046 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 4047 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 4048 (*DestinationLinesToRequestRowInVBlank * LineTime); 4049 4050 #ifdef __DML_VBA_DEBUG__ 4051 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 4052 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 4053 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 4054 __func__, *DestinationLinesToRequestRowInVBlank); 4055 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 4056 #endif 4057 } else { 4058 prefetch_row_bw = 0; 4059 MyError = true; 4060 #ifdef __DML_VBA_DEBUG__ 4061 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", 4062 __func__, *DestinationLinesToRequestRowInVBlank); 4063 #endif 4064 } 4065 4066 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 4067 } 4068 4069 if (MyError) { 4070 *PrefetchBandwidth = 0; 4071 TimeForFetchingMetaPTE = 0; 4072 TimeForFetchingRowInVBlank = 0; 4073 *DestinationLinesToRequestVMInVBlank = 0; 4074 *DestinationLinesToRequestRowInVBlank = 0; 4075 *DestinationLinesForPrefetch = 0; 4076 LinesToRequestPrefetchPixelData = 0; 4077 *VRatioPrefetchY = 0; 4078 *VRatioPrefetchC = 0; 4079 *RequiredPrefetchPixDataBWLuma = 0; 4080 *RequiredPrefetchPixDataBWChroma = 0; 4081 } 4082 4083 return MyError; 4084 } // CalculatePrefetchSchedule 4085 4086 void dml32_CalculateFlipSchedule( 4087 double HostVMInefficiencyFactor, 4088 double UrgentExtraLatency, 4089 double UrgentLatency, 4090 unsigned int GPUVMMaxPageTableLevels, 4091 bool HostVMEnable, 4092 unsigned int HostVMMaxNonCachedPageTableLevels, 4093 bool GPUVMEnable, 4094 double HostVMMinPageSize, 4095 double PDEAndMetaPTEBytesPerFrame, 4096 double MetaRowBytes, 4097 double DPTEBytesPerRow, 4098 double BandwidthAvailableForImmediateFlip, 4099 unsigned int TotImmediateFlipBytes, 4100 enum source_format_class SourcePixelFormat, 4101 double LineTime, 4102 double VRatio, 4103 double VRatioChroma, 4104 double Tno_bw, 4105 bool DCCEnable, 4106 unsigned int dpte_row_height, 4107 unsigned int meta_row_height, 4108 unsigned int dpte_row_height_chroma, 4109 unsigned int meta_row_height_chroma, 4110 bool use_one_row_for_frame_flip, 4111 4112 /* Output */ 4113 double *DestinationLinesToRequestVMInImmediateFlip, 4114 double *DestinationLinesToRequestRowInImmediateFlip, 4115 double *final_flip_bw, 4116 bool *ImmediateFlipSupportedForPipe) 4117 { 4118 double min_row_time = 0.0; 4119 unsigned int HostVMDynamicLevelsTrips; 4120 double TimeForFetchingMetaPTEImmediateFlip; 4121 double TimeForFetchingRowInVBlankImmediateFlip; 4122 double ImmediateFlipBW; 4123 4124 if (GPUVMEnable == true && HostVMEnable == true) 4125 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 4126 else 4127 HostVMDynamicLevelsTrips = 0; 4128 4129 #ifdef __DML_VBA_DEBUG__ 4130 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); 4131 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 4132 #endif 4133 4134 if (TotImmediateFlipBytes > 0) { 4135 if (use_one_row_for_frame_flip) { 4136 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * 4137 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4138 } else { 4139 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * 4140 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4141 } 4142 if (GPUVMEnable == true) { 4143 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * 4144 HostVMInefficiencyFactor / ImmediateFlipBW, 4145 UrgentExtraLatency + UrgentLatency * 4146 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 4147 LineTime / 4.0); 4148 } else { 4149 TimeForFetchingMetaPTEImmediateFlip = 0; 4150 } 4151 if ((GPUVMEnable == true || DCCEnable == true)) { 4152 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 4153 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 4154 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); 4155 } else { 4156 TimeForFetchingRowInVBlankImmediateFlip = 0; 4157 } 4158 4159 *DestinationLinesToRequestVMInImmediateFlip = 4160 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; 4161 *DestinationLinesToRequestRowInImmediateFlip = 4162 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; 4163 4164 if (GPUVMEnable == true) { 4165 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / 4166 (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 4167 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4168 (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 4169 } else if ((GPUVMEnable == true || DCCEnable == true)) { 4170 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4171 (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 4172 } else { 4173 *final_flip_bw = 0; 4174 } 4175 } else { 4176 TimeForFetchingMetaPTEImmediateFlip = 0; 4177 TimeForFetchingRowInVBlankImmediateFlip = 0; 4178 *DestinationLinesToRequestVMInImmediateFlip = 0; 4179 *DestinationLinesToRequestRowInImmediateFlip = 0; 4180 *final_flip_bw = 0; 4181 } 4182 4183 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 4184 if (GPUVMEnable == true && DCCEnable != true) { 4185 min_row_time = dml_min(dpte_row_height * 4186 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 4187 } else if (GPUVMEnable != true && DCCEnable == true) { 4188 min_row_time = dml_min(meta_row_height * 4189 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 4190 } else { 4191 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * 4192 LineTime / VRatio, dpte_row_height_chroma * LineTime / 4193 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); 4194 } 4195 } else { 4196 if (GPUVMEnable == true && DCCEnable != true) { 4197 min_row_time = dpte_row_height * LineTime / VRatio; 4198 } else if (GPUVMEnable != true && DCCEnable == true) { 4199 min_row_time = meta_row_height * LineTime / VRatio; 4200 } else { 4201 min_row_time = 4202 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 4203 } 4204 } 4205 4206 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 4207 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip 4208 > min_row_time) { 4209 *ImmediateFlipSupportedForPipe = false; 4210 } else { 4211 *ImmediateFlipSupportedForPipe = true; 4212 } 4213 4214 #ifdef __DML_VBA_DEBUG__ 4215 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 4216 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 4217 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", 4218 __func__, *DestinationLinesToRequestVMInImmediateFlip); 4219 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", 4220 __func__, *DestinationLinesToRequestRowInImmediateFlip); 4221 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 4222 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", 4223 __func__, TimeForFetchingRowInVBlankImmediateFlip); 4224 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 4225 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 4226 #endif 4227 } // CalculateFlipSchedule 4228 4229 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 4230 struct vba_vars_st *v, 4231 unsigned int PrefetchMode, 4232 double DCFCLK, 4233 double ReturnBW, 4234 SOCParametersList mmSOCParameters, 4235 double SOCCLK, 4236 double DCFClkDeepSleep, 4237 unsigned int DETBufferSizeY[], 4238 unsigned int DETBufferSizeC[], 4239 unsigned int SwathHeightY[], 4240 unsigned int SwathHeightC[], 4241 double SwathWidthY[], 4242 double SwathWidthC[], 4243 unsigned int DPPPerSurface[], 4244 double BytePerPixelDETY[], 4245 double BytePerPixelDETC[], 4246 double DSTXAfterScaler[], 4247 double DSTYAfterScaler[], 4248 bool UnboundedRequestEnabled, 4249 unsigned int CompressedBufferSizeInkByte, 4250 4251 /* Output */ 4252 enum clock_change_support *DRAMClockChangeSupport, 4253 double MaxActiveDRAMClockChangeLatencySupported[], 4254 unsigned int SubViewportLinesNeededInMALL[], 4255 enum dm_fclock_change_support *FCLKChangeSupport, 4256 double *MinActiveFCLKChangeLatencySupported, 4257 bool *USRRetrainingSupport, 4258 double ActiveDRAMClockChangeLatencyMargin[]) 4259 { 4260 unsigned int i, j, k; 4261 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; 4262 unsigned int DRAMClockChangeSupportNumber = 0; 4263 unsigned int LastSurfaceWithoutMargin; 4264 unsigned int DRAMClockChangeMethod = 0; 4265 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4266 double MinActiveFCLKChangeMargin = 0.; 4267 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4268 double ActiveClockChangeLatencyHidingY; 4269 double ActiveClockChangeLatencyHidingC; 4270 double ActiveClockChangeLatencyHiding; 4271 double EffectiveDETBufferSizeY; 4272 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; 4273 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; 4274 double TotalPixelBW = 0.0; 4275 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; 4276 double EffectiveLBLatencyHidingY; 4277 double EffectiveLBLatencyHidingC; 4278 double LinesInDETY[DC__NUM_DPP__MAX]; 4279 double LinesInDETC[DC__NUM_DPP__MAX]; 4280 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 4281 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; 4282 double FullDETBufferingTimeY; 4283 double FullDETBufferingTimeC; 4284 double WritebackDRAMClockChangeLatencyMargin; 4285 double WritebackFCLKChangeLatencyMargin; 4286 double WritebackLatencyHiding; 4287 bool SameTimingForFCLKChange; 4288 4289 unsigned int TotalActiveWriteback = 0; 4290 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; 4291 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 4292 4293 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; 4294 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency 4295 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; 4296 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; 4297 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; 4298 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency 4299 + 10 / DCFClkDeepSleep; 4300 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency 4301 + 10 / DCFClkDeepSleep; 4302 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency 4303 + 10 / DCFClkDeepSleep; 4304 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time 4305 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; 4306 4307 #ifdef __DML_VBA_DEBUG__ 4308 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); 4309 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); 4310 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); 4311 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); 4312 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); 4313 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); 4314 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); 4315 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); 4316 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); 4317 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); 4318 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", 4319 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); 4320 #endif 4321 4322 4323 TotalActiveWriteback = 0; 4324 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4325 if (v->WritebackEnable[k] == true) 4326 TotalActiveWriteback = TotalActiveWriteback + 1; 4327 } 4328 4329 if (TotalActiveWriteback <= 1) { 4330 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; 4331 } else { 4332 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency 4333 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4334 } 4335 if (v->USRRetrainingRequiredFinal) 4336 v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark 4337 + mmSOCParameters.USRRetrainingLatency; 4338 4339 if (TotalActiveWriteback <= 1) { 4340 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4341 + mmSOCParameters.WritebackLatency; 4342 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4343 + mmSOCParameters.WritebackLatency; 4344 } else { 4345 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4346 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4347 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4348 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; 4349 } 4350 4351 if (v->USRRetrainingRequiredFinal) 4352 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark 4353 + mmSOCParameters.USRRetrainingLatency; 4354 4355 if (v->USRRetrainingRequiredFinal) 4356 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark 4357 + mmSOCParameters.USRRetrainingLatency; 4358 4359 #ifdef __DML_VBA_DEBUG__ 4360 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", 4361 __func__, v->Watermark.WritebackDRAMClockChangeWatermark); 4362 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); 4363 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); 4364 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); 4365 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); 4366 #endif 4367 4368 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4369 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + 4370 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); 4371 } 4372 4373 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4374 4375 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 4376 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 4377 4378 4379 #ifdef __DML_VBA_DEBUG__ 4380 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); 4381 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); 4382 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); 4383 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); 4384 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); 4385 #endif 4386 4387 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 4388 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 4389 EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4390 4391 if (UnboundedRequestEnabled) { 4392 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 4393 + CompressedBufferSizeInkByte * 1024 4394 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) 4395 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 4396 } 4397 4398 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4399 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 4400 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 4401 4402 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 4403 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; 4404 4405 if (v->NumberOfActiveSurfaces > 1) { 4406 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY 4407 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] 4408 / v->PixelClock[k] / v->VRatio[k]; 4409 } 4410 4411 if (BytePerPixelDETC[k] > 0) { 4412 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4413 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); 4414 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) 4415 / v->VRatioChroma[k]; 4416 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 4417 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] 4418 / v->PixelClock[k]; 4419 if (v->NumberOfActiveSurfaces > 1) { 4420 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC 4421 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] 4422 / v->PixelClock[k] / v->VRatioChroma[k]; 4423 } 4424 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, 4425 ActiveClockChangeLatencyHidingC); 4426 } else { 4427 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; 4428 } 4429 4430 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4431 - v->Watermark.DRAMClockChangeWatermark; 4432 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4433 - v->Watermark.FCLKChangeWatermark; 4434 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; 4435 4436 if (v->WritebackEnable[k]) { 4437 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 4438 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4439 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 4440 if (v->WritebackPixelFormat[k] == dm_444_64) 4441 WritebackLatencyHiding = WritebackLatencyHiding / 2; 4442 4443 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding 4444 - v->Watermark.WritebackDRAMClockChangeWatermark; 4445 4446 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding 4447 - v->Watermark.WritebackFCLKChangeWatermark; 4448 4449 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], 4450 WritebackFCLKChangeLatencyMargin); 4451 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], 4452 WritebackDRAMClockChangeLatencyMargin); 4453 } 4454 MaxActiveDRAMClockChangeLatencySupported[k] = 4455 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 4456 0 : 4457 (ActiveDRAMClockChangeLatencyMargin[k] 4458 + mmSOCParameters.DRAMClockChangeLatency); 4459 } 4460 4461 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { 4462 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { 4463 if (i == j || 4464 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || 4465 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || 4466 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || 4467 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && 4468 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && 4469 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && 4470 (v->DRRDisplay[i] || v->DRRDisplay[j]))) { 4471 SynchronizedSurfaces[i][j] = true; 4472 } else { 4473 SynchronizedSurfaces[i][j] = false; 4474 } 4475 } 4476 } 4477 4478 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4479 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4480 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4481 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { 4482 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4483 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; 4484 SurfaceWithMinActiveFCLKChangeMargin = k; 4485 } 4486 } 4487 4488 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4489 4490 SameTimingForFCLKChange = true; 4491 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4492 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { 4493 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4494 (SameTimingForFCLKChange || 4495 ActiveFCLKChangeLatencyMargin[k] < 4496 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4497 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; 4498 } 4499 SameTimingForFCLKChange = false; 4500 } 4501 } 4502 4503 if (MinActiveFCLKChangeMargin > 0) { 4504 *FCLKChangeSupport = dm_fclock_change_vactive; 4505 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4506 (PrefetchMode <= 1)) { 4507 *FCLKChangeSupport = dm_fclock_change_vblank; 4508 } else { 4509 *FCLKChangeSupport = dm_fclock_change_unsupported; 4510 } 4511 4512 *USRRetrainingSupport = true; 4513 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4514 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4515 (USRRetrainingLatencyMargin[k] < 0)) { 4516 *USRRetrainingSupport = false; 4517 } 4518 } 4519 4520 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4521 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && 4522 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && 4523 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && 4524 ActiveDRAMClockChangeLatencyMargin[k] < 0) { 4525 if (PrefetchMode > 0) { 4526 DRAMClockChangeSupportNumber = 2; 4527 } else if (DRAMClockChangeSupportNumber == 0) { 4528 DRAMClockChangeSupportNumber = 1; 4529 LastSurfaceWithoutMargin = k; 4530 } else if (DRAMClockChangeSupportNumber == 1 && 4531 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { 4532 DRAMClockChangeSupportNumber = 2; 4533 } 4534 } 4535 } 4536 4537 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4538 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) 4539 DRAMClockChangeMethod = 1; 4540 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) 4541 DRAMClockChangeMethod = 2; 4542 } 4543 4544 if (DRAMClockChangeMethod == 0) { 4545 if (DRAMClockChangeSupportNumber == 0) 4546 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 4547 else if (DRAMClockChangeSupportNumber == 1) 4548 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 4549 else 4550 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4551 } else if (DRAMClockChangeMethod == 1) { 4552 if (DRAMClockChangeSupportNumber == 0) 4553 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; 4554 else if (DRAMClockChangeSupportNumber == 1) 4555 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; 4556 else 4557 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4558 } else { 4559 if (DRAMClockChangeSupportNumber == 0) 4560 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; 4561 else if (DRAMClockChangeSupportNumber == 1) 4562 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; 4563 else 4564 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4565 } 4566 4567 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4568 unsigned int dst_y_pstate; 4569 unsigned int src_y_pstate_l; 4570 unsigned int src_y_pstate_c; 4571 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; 4572 4573 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); 4574 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); 4575 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; 4576 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; 4577 4578 #ifdef __DML_VBA_DEBUG__ 4579 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 4580 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 4581 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 4582 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 4583 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); 4584 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); 4585 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); 4586 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); 4587 dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); 4588 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); 4589 #endif 4590 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; 4591 4592 if (BytePerPixelDETC[k] > 0) { 4593 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); 4594 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; 4595 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; 4596 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); 4597 4598 #ifdef __DML_VBA_DEBUG__ 4599 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); 4600 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); 4601 dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); 4602 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); 4603 #endif 4604 } 4605 } 4606 #ifdef __DML_VBA_DEBUG__ 4607 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); 4608 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); 4609 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", 4610 __func__, *MinActiveFCLKChangeLatencySupported); 4611 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); 4612 #endif 4613 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport 4614 4615 double dml32_CalculateWriteBackDISPCLK( 4616 enum source_format_class WritebackPixelFormat, 4617 double PixelClock, 4618 double WritebackHRatio, 4619 double WritebackVRatio, 4620 unsigned int WritebackHTaps, 4621 unsigned int WritebackVTaps, 4622 unsigned int WritebackSourceWidth, 4623 unsigned int WritebackDestinationWidth, 4624 unsigned int HTotal, 4625 unsigned int WritebackLineBufferSize, 4626 double DISPCLKDPPCLKVCOSpeed) 4627 { 4628 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 4629 4630 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 4631 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 4632 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * 4633 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 4634 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); 4635 } 4636 4637 void dml32_CalculateMinAndMaxPrefetchMode( 4638 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, 4639 unsigned int *MinPrefetchMode, 4640 unsigned int *MaxPrefetchMode) 4641 { 4642 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { 4643 *MinPrefetchMode = 3; 4644 *MaxPrefetchMode = 3; 4645 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { 4646 *MinPrefetchMode = 2; 4647 *MaxPrefetchMode = 2; 4648 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { 4649 *MinPrefetchMode = 1; 4650 *MaxPrefetchMode = 1; 4651 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { 4652 *MinPrefetchMode = 0; 4653 *MaxPrefetchMode = 0; 4654 } else { 4655 *MinPrefetchMode = 0; 4656 *MaxPrefetchMode = 3; 4657 } 4658 } // CalculateMinAndMaxPrefetchMode 4659 4660 void dml32_CalculatePixelDeliveryTimes( 4661 unsigned int NumberOfActiveSurfaces, 4662 double VRatio[], 4663 double VRatioChroma[], 4664 double VRatioPrefetchY[], 4665 double VRatioPrefetchC[], 4666 unsigned int swath_width_luma_ub[], 4667 unsigned int swath_width_chroma_ub[], 4668 unsigned int DPPPerSurface[], 4669 double HRatio[], 4670 double HRatioChroma[], 4671 double PixelClock[], 4672 double PSCL_THROUGHPUT[], 4673 double PSCL_THROUGHPUT_CHROMA[], 4674 double Dppclk[], 4675 unsigned int BytePerPixelC[], 4676 enum dm_rotation_angle SourceRotation[], 4677 unsigned int NumberOfCursors[], 4678 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 4679 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 4680 unsigned int BlockWidth256BytesY[], 4681 unsigned int BlockHeight256BytesY[], 4682 unsigned int BlockWidth256BytesC[], 4683 unsigned int BlockHeight256BytesC[], 4684 4685 /* Output */ 4686 double DisplayPipeLineDeliveryTimeLuma[], 4687 double DisplayPipeLineDeliveryTimeChroma[], 4688 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 4689 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 4690 double DisplayPipeRequestDeliveryTimeLuma[], 4691 double DisplayPipeRequestDeliveryTimeChroma[], 4692 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 4693 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 4694 double CursorRequestDeliveryTime[], 4695 double CursorRequestDeliveryTimePrefetch[]) 4696 { 4697 double req_per_swath_ub; 4698 unsigned int k; 4699 4700 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4701 4702 #ifdef __DML_VBA_DEBUG__ 4703 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 4704 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 4705 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 4706 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 4707 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 4708 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 4709 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); 4710 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); 4711 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 4712 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); 4713 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); 4714 #endif 4715 4716 if (VRatio[k] <= 1) { 4717 DisplayPipeLineDeliveryTimeLuma[k] = 4718 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4719 } else { 4720 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4721 } 4722 4723 if (BytePerPixelC[k] == 0) { 4724 DisplayPipeLineDeliveryTimeChroma[k] = 0; 4725 } else { 4726 if (VRatioChroma[k] <= 1) { 4727 DisplayPipeLineDeliveryTimeChroma[k] = 4728 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4729 } else { 4730 DisplayPipeLineDeliveryTimeChroma[k] = 4731 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4732 } 4733 } 4734 4735 if (VRatioPrefetchY[k] <= 1) { 4736 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4737 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4738 } else { 4739 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4740 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4741 } 4742 4743 if (BytePerPixelC[k] == 0) { 4744 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 4745 } else { 4746 if (VRatioPrefetchC[k] <= 1) { 4747 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * 4748 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4749 } else { 4750 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 4751 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4752 } 4753 } 4754 #ifdef __DML_VBA_DEBUG__ 4755 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", 4756 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 4757 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", 4758 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 4759 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", 4760 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 4761 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", 4762 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 4763 #endif 4764 } 4765 4766 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4767 if (!IsVertical(SourceRotation[k])) 4768 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 4769 else 4770 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 4771 #ifdef __DML_VBA_DEBUG__ 4772 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); 4773 #endif 4774 4775 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 4776 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = 4777 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 4778 if (BytePerPixelC[k] == 0) { 4779 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 4780 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 4781 } else { 4782 if (!IsVertical(SourceRotation[k])) 4783 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 4784 else 4785 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 4786 #ifdef __DML_VBA_DEBUG__ 4787 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); 4788 #endif 4789 DisplayPipeRequestDeliveryTimeChroma[k] = 4790 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 4791 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 4792 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 4793 } 4794 #ifdef __DML_VBA_DEBUG__ 4795 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", 4796 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 4797 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", 4798 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 4799 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", 4800 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 4801 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", 4802 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 4803 #endif 4804 } 4805 4806 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4807 unsigned int cursor_req_per_width; 4808 4809 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / 4810 256.0 / 8.0, 1.0); 4811 if (NumberOfCursors[k] > 0) { 4812 if (VRatio[k] <= 1) { 4813 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4814 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4815 } else { 4816 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4817 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4818 } 4819 if (VRatioPrefetchY[k] <= 1) { 4820 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4821 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4822 } else { 4823 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4824 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4825 } 4826 } else { 4827 CursorRequestDeliveryTime[k] = 0; 4828 CursorRequestDeliveryTimePrefetch[k] = 0; 4829 } 4830 #ifdef __DML_VBA_DEBUG__ 4831 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", 4832 __func__, k, NumberOfCursors[k]); 4833 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", 4834 __func__, k, CursorRequestDeliveryTime[k]); 4835 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", 4836 __func__, k, CursorRequestDeliveryTimePrefetch[k]); 4837 #endif 4838 } 4839 } // CalculatePixelDeliveryTimes 4840 4841 void dml32_CalculateMetaAndPTETimes( 4842 bool use_one_row_for_frame[], 4843 unsigned int NumberOfActiveSurfaces, 4844 bool GPUVMEnable, 4845 unsigned int MetaChunkSize, 4846 unsigned int MinMetaChunkSizeBytes, 4847 unsigned int HTotal[], 4848 double VRatio[], 4849 double VRatioChroma[], 4850 double DestinationLinesToRequestRowInVBlank[], 4851 double DestinationLinesToRequestRowInImmediateFlip[], 4852 bool DCCEnable[], 4853 double PixelClock[], 4854 unsigned int BytePerPixelY[], 4855 unsigned int BytePerPixelC[], 4856 enum dm_rotation_angle SourceRotation[], 4857 unsigned int dpte_row_height[], 4858 unsigned int dpte_row_height_chroma[], 4859 unsigned int meta_row_width[], 4860 unsigned int meta_row_width_chroma[], 4861 unsigned int meta_row_height[], 4862 unsigned int meta_row_height_chroma[], 4863 unsigned int meta_req_width[], 4864 unsigned int meta_req_width_chroma[], 4865 unsigned int meta_req_height[], 4866 unsigned int meta_req_height_chroma[], 4867 unsigned int dpte_group_bytes[], 4868 unsigned int PTERequestSizeY[], 4869 unsigned int PTERequestSizeC[], 4870 unsigned int PixelPTEReqWidthY[], 4871 unsigned int PixelPTEReqHeightY[], 4872 unsigned int PixelPTEReqWidthC[], 4873 unsigned int PixelPTEReqHeightC[], 4874 unsigned int dpte_row_width_luma_ub[], 4875 unsigned int dpte_row_width_chroma_ub[], 4876 4877 /* Output */ 4878 double DST_Y_PER_PTE_ROW_NOM_L[], 4879 double DST_Y_PER_PTE_ROW_NOM_C[], 4880 double DST_Y_PER_META_ROW_NOM_L[], 4881 double DST_Y_PER_META_ROW_NOM_C[], 4882 double TimePerMetaChunkNominal[], 4883 double TimePerChromaMetaChunkNominal[], 4884 double TimePerMetaChunkVBlank[], 4885 double TimePerChromaMetaChunkVBlank[], 4886 double TimePerMetaChunkFlip[], 4887 double TimePerChromaMetaChunkFlip[], 4888 double time_per_pte_group_nom_luma[], 4889 double time_per_pte_group_vblank_luma[], 4890 double time_per_pte_group_flip_luma[], 4891 double time_per_pte_group_nom_chroma[], 4892 double time_per_pte_group_vblank_chroma[], 4893 double time_per_pte_group_flip_chroma[]) 4894 { 4895 unsigned int meta_chunk_width; 4896 unsigned int min_meta_chunk_width; 4897 unsigned int meta_chunk_per_row_int; 4898 unsigned int meta_row_remainder; 4899 unsigned int meta_chunk_threshold; 4900 unsigned int meta_chunks_per_row_ub; 4901 unsigned int meta_chunk_width_chroma; 4902 unsigned int min_meta_chunk_width_chroma; 4903 unsigned int meta_chunk_per_row_int_chroma; 4904 unsigned int meta_row_remainder_chroma; 4905 unsigned int meta_chunk_threshold_chroma; 4906 unsigned int meta_chunks_per_row_ub_chroma; 4907 unsigned int dpte_group_width_luma; 4908 unsigned int dpte_groups_per_row_luma_ub; 4909 unsigned int dpte_group_width_chroma; 4910 unsigned int dpte_groups_per_row_chroma_ub; 4911 unsigned int k; 4912 4913 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4914 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 4915 if (BytePerPixelC[k] == 0) 4916 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 4917 else 4918 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 4919 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 4920 if (BytePerPixelC[k] == 0) 4921 DST_Y_PER_META_ROW_NOM_C[k] = 0; 4922 else 4923 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 4924 } 4925 4926 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4927 if (DCCEnable[k] == true) { 4928 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 4929 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 4930 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 4931 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 4932 if (!IsVertical(SourceRotation[k])) 4933 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 4934 else 4935 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 4936 4937 if (meta_row_remainder <= meta_chunk_threshold) 4938 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 4939 else 4940 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 4941 4942 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * 4943 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4944 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4945 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4946 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4947 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4948 if (BytePerPixelC[k] == 0) { 4949 TimePerChromaMetaChunkNominal[k] = 0; 4950 TimePerChromaMetaChunkVBlank[k] = 0; 4951 TimePerChromaMetaChunkFlip[k] = 0; 4952 } else { 4953 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / 4954 meta_row_height_chroma[k]; 4955 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / 4956 meta_row_height_chroma[k]; 4957 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / 4958 meta_chunk_width_chroma; 4959 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 4960 if (!IsVertical(SourceRotation[k])) { 4961 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4962 meta_req_width_chroma[k]; 4963 } else { 4964 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4965 meta_req_height_chroma[k]; 4966 } 4967 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) 4968 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 4969 else 4970 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 4971 4972 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * 4973 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4974 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4975 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4976 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4977 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4978 } 4979 } else { 4980 TimePerMetaChunkNominal[k] = 0; 4981 TimePerMetaChunkVBlank[k] = 0; 4982 TimePerMetaChunkFlip[k] = 0; 4983 TimePerChromaMetaChunkNominal[k] = 0; 4984 TimePerChromaMetaChunkVBlank[k] = 0; 4985 TimePerChromaMetaChunkFlip[k] = 0; 4986 } 4987 } 4988 4989 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4990 if (GPUVMEnable == true) { 4991 if (!IsVertical(SourceRotation[k])) { 4992 dpte_group_width_luma = (double) dpte_group_bytes[k] / 4993 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 4994 } else { 4995 dpte_group_width_luma = (double) dpte_group_bytes[k] / 4996 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 4997 } 4998 4999 if (use_one_row_for_frame[k]) { 5000 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5001 (double) dpte_group_width_luma / 2.0, 1.0); 5002 } else { 5003 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5004 (double) dpte_group_width_luma, 1.0); 5005 } 5006 #ifdef __DML_VBA_DEBUG__ 5007 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", 5008 __func__, k, use_one_row_for_frame[k]); 5009 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", 5010 __func__, k, dpte_group_bytes[k]); 5011 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", 5012 __func__, k, PTERequestSizeY[k]); 5013 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", 5014 __func__, k, PixelPTEReqWidthY[k]); 5015 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", 5016 __func__, k, PixelPTEReqHeightY[k]); 5017 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", 5018 __func__, k, dpte_row_width_luma_ub[k]); 5019 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", 5020 __func__, k, dpte_group_width_luma); 5021 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", 5022 __func__, k, dpte_groups_per_row_luma_ub); 5023 #endif 5024 5025 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * 5026 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5027 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * 5028 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5029 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5030 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5031 if (BytePerPixelC[k] == 0) { 5032 time_per_pte_group_nom_chroma[k] = 0; 5033 time_per_pte_group_vblank_chroma[k] = 0; 5034 time_per_pte_group_flip_chroma[k] = 0; 5035 } else { 5036 if (!IsVertical(SourceRotation[k])) { 5037 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5038 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 5039 } else { 5040 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5041 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 5042 } 5043 5044 if (use_one_row_for_frame[k]) { 5045 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5046 (double) dpte_group_width_chroma / 2.0, 1.0); 5047 } else { 5048 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5049 (double) dpte_group_width_chroma, 1.0); 5050 } 5051 #ifdef __DML_VBA_DEBUG__ 5052 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", 5053 __func__, k, dpte_row_width_chroma_ub[k]); 5054 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", 5055 __func__, k, dpte_group_width_chroma); 5056 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", 5057 __func__, k, dpte_groups_per_row_chroma_ub); 5058 #endif 5059 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * 5060 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5061 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * 5062 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5063 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5064 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5065 } 5066 } else { 5067 time_per_pte_group_nom_luma[k] = 0; 5068 time_per_pte_group_vblank_luma[k] = 0; 5069 time_per_pte_group_flip_luma[k] = 0; 5070 time_per_pte_group_nom_chroma[k] = 0; 5071 time_per_pte_group_vblank_chroma[k] = 0; 5072 time_per_pte_group_flip_chroma[k] = 0; 5073 } 5074 #ifdef __DML_VBA_DEBUG__ 5075 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", 5076 __func__, k, DestinationLinesToRequestRowInVBlank[k]); 5077 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", 5078 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); 5079 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", 5080 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); 5081 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", 5082 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); 5083 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", 5084 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); 5085 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", 5086 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); 5087 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", 5088 __func__, k, TimePerMetaChunkNominal[k]); 5089 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", 5090 __func__, k, TimePerMetaChunkVBlank[k]); 5091 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", 5092 __func__, k, TimePerMetaChunkFlip[k]); 5093 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", 5094 __func__, k, TimePerChromaMetaChunkNominal[k]); 5095 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", 5096 __func__, k, TimePerChromaMetaChunkVBlank[k]); 5097 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", 5098 __func__, k, TimePerChromaMetaChunkFlip[k]); 5099 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", 5100 __func__, k, time_per_pte_group_nom_luma[k]); 5101 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", 5102 __func__, k, time_per_pte_group_vblank_luma[k]); 5103 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", 5104 __func__, k, time_per_pte_group_flip_luma[k]); 5105 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", 5106 __func__, k, time_per_pte_group_nom_chroma[k]); 5107 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", 5108 __func__, k, time_per_pte_group_vblank_chroma[k]); 5109 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", 5110 __func__, k, time_per_pte_group_flip_chroma[k]); 5111 #endif 5112 } 5113 } // CalculateMetaAndPTETimes 5114 5115 void dml32_CalculateVMGroupAndRequestTimes( 5116 unsigned int NumberOfActiveSurfaces, 5117 bool GPUVMEnable, 5118 unsigned int GPUVMMaxPageTableLevels, 5119 unsigned int HTotal[], 5120 unsigned int BytePerPixelC[], 5121 double DestinationLinesToRequestVMInVBlank[], 5122 double DestinationLinesToRequestVMInImmediateFlip[], 5123 bool DCCEnable[], 5124 double PixelClock[], 5125 unsigned int dpte_row_width_luma_ub[], 5126 unsigned int dpte_row_width_chroma_ub[], 5127 unsigned int vm_group_bytes[], 5128 unsigned int dpde0_bytes_per_frame_ub_l[], 5129 unsigned int dpde0_bytes_per_frame_ub_c[], 5130 unsigned int meta_pte_bytes_per_frame_ub_l[], 5131 unsigned int meta_pte_bytes_per_frame_ub_c[], 5132 5133 /* Output */ 5134 double TimePerVMGroupVBlank[], 5135 double TimePerVMGroupFlip[], 5136 double TimePerVMRequestVBlank[], 5137 double TimePerVMRequestFlip[]) 5138 { 5139 unsigned int k; 5140 unsigned int num_group_per_lower_vm_stage; 5141 unsigned int num_req_per_lower_vm_stage; 5142 5143 #ifdef __DML_VBA_DEBUG__ 5144 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 5145 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 5146 #endif 5147 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5148 5149 #ifdef __DML_VBA_DEBUG__ 5150 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); 5151 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); 5152 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", 5153 __func__, k, dpde0_bytes_per_frame_ub_l[k]); 5154 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", 5155 __func__, k, dpde0_bytes_per_frame_ub_c[k]); 5156 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", 5157 __func__, k, meta_pte_bytes_per_frame_ub_l[k]); 5158 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", 5159 __func__, k, meta_pte_bytes_per_frame_ub_c[k]); 5160 #endif 5161 5162 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 5163 if (DCCEnable[k] == false) { 5164 if (BytePerPixelC[k] > 0) { 5165 num_group_per_lower_vm_stage = dml_ceil( 5166 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5167 (double) (vm_group_bytes[k]), 1.0) + 5168 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5169 (double) (vm_group_bytes[k]), 1.0); 5170 } else { 5171 num_group_per_lower_vm_stage = dml_ceil( 5172 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5173 (double) (vm_group_bytes[k]), 1.0); 5174 } 5175 } else { 5176 if (GPUVMMaxPageTableLevels == 1) { 5177 if (BytePerPixelC[k] > 0) { 5178 num_group_per_lower_vm_stage = dml_ceil( 5179 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5180 (double) (vm_group_bytes[k]), 1.0) + 5181 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5182 (double) (vm_group_bytes[k]), 1.0); 5183 } else { 5184 num_group_per_lower_vm_stage = dml_ceil( 5185 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5186 (double) (vm_group_bytes[k]), 1.0); 5187 } 5188 } else { 5189 if (BytePerPixelC[k] > 0) { 5190 num_group_per_lower_vm_stage = 2 + dml_ceil( 5191 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5192 (double) (vm_group_bytes[k]), 1) + 5193 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5194 (double) (vm_group_bytes[k]), 1) + 5195 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / 5196 (double) (vm_group_bytes[k]), 1) + 5197 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5198 (double) (vm_group_bytes[k]), 1); 5199 } else { 5200 num_group_per_lower_vm_stage = 1 + dml_ceil( 5201 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5202 (double) (vm_group_bytes[k]), 1) + dml_ceil( 5203 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5204 (double) (vm_group_bytes[k]), 1); 5205 } 5206 } 5207 } 5208 5209 if (DCCEnable[k] == false) { 5210 if (BytePerPixelC[k] > 0) { 5211 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + 5212 dpde0_bytes_per_frame_ub_c[k] / 64; 5213 } else { 5214 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 5215 } 5216 } else { 5217 if (GPUVMMaxPageTableLevels == 1) { 5218 if (BytePerPixelC[k] > 0) { 5219 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + 5220 meta_pte_bytes_per_frame_ub_c[k] / 64; 5221 } else { 5222 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 5223 } 5224 } else { 5225 if (BytePerPixelC[k] > 0) { 5226 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5227 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + 5228 meta_pte_bytes_per_frame_ub_l[k] / 64 + 5229 meta_pte_bytes_per_frame_ub_c[k] / 64; 5230 } else { 5231 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5232 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 5233 } 5234 } 5235 } 5236 5237 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5238 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5239 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5240 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5241 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5242 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5243 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5244 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5245 5246 if (GPUVMMaxPageTableLevels > 2) { 5247 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 5248 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 5249 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 5250 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 5251 } 5252 5253 } else { 5254 TimePerVMGroupVBlank[k] = 0; 5255 TimePerVMGroupFlip[k] = 0; 5256 TimePerVMRequestVBlank[k] = 0; 5257 TimePerVMRequestFlip[k] = 0; 5258 } 5259 5260 #ifdef __DML_VBA_DEBUG__ 5261 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); 5262 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); 5263 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); 5264 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); 5265 #endif 5266 } 5267 } // CalculateVMGroupAndRequestTimes 5268 5269 void dml32_CalculateDCCConfiguration( 5270 bool DCCEnabled, 5271 bool DCCProgrammingAssumesScanDirectionUnknown, 5272 enum source_format_class SourcePixelFormat, 5273 unsigned int SurfaceWidthLuma, 5274 unsigned int SurfaceWidthChroma, 5275 unsigned int SurfaceHeightLuma, 5276 unsigned int SurfaceHeightChroma, 5277 unsigned int nomDETInKByte, 5278 unsigned int RequestHeight256ByteLuma, 5279 unsigned int RequestHeight256ByteChroma, 5280 enum dm_swizzle_mode TilingFormat, 5281 unsigned int BytePerPixelY, 5282 unsigned int BytePerPixelC, 5283 double BytePerPixelDETY, 5284 double BytePerPixelDETC, 5285 enum dm_rotation_angle SourceRotation, 5286 /* Output */ 5287 unsigned int *MaxUncompressedBlockLuma, 5288 unsigned int *MaxUncompressedBlockChroma, 5289 unsigned int *MaxCompressedBlockLuma, 5290 unsigned int *MaxCompressedBlockChroma, 5291 unsigned int *IndependentBlockLuma, 5292 unsigned int *IndependentBlockChroma) 5293 { 5294 typedef enum { 5295 REQ_256Bytes, 5296 REQ_128BytesNonContiguous, 5297 REQ_128BytesContiguous, 5298 REQ_NA 5299 } RequestType; 5300 5301 RequestType RequestLuma; 5302 RequestType RequestChroma; 5303 5304 unsigned int segment_order_horz_contiguous_luma; 5305 unsigned int segment_order_horz_contiguous_chroma; 5306 unsigned int segment_order_vert_contiguous_luma; 5307 unsigned int segment_order_vert_contiguous_chroma; 5308 unsigned int req128_horz_wc_l; 5309 unsigned int req128_horz_wc_c; 5310 unsigned int req128_vert_wc_l; 5311 unsigned int req128_vert_wc_c; 5312 unsigned int MAS_vp_horz_limit; 5313 unsigned int MAS_vp_vert_limit; 5314 unsigned int max_vp_horz_width; 5315 unsigned int max_vp_vert_height; 5316 unsigned int eff_surf_width_l; 5317 unsigned int eff_surf_width_c; 5318 unsigned int eff_surf_height_l; 5319 unsigned int eff_surf_height_c; 5320 unsigned int full_swath_bytes_horz_wc_l; 5321 unsigned int full_swath_bytes_horz_wc_c; 5322 unsigned int full_swath_bytes_vert_wc_l; 5323 unsigned int full_swath_bytes_vert_wc_c; 5324 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; 5325 5326 unsigned int yuv420; 5327 unsigned int horz_div_l; 5328 unsigned int horz_div_c; 5329 unsigned int vert_div_l; 5330 unsigned int vert_div_c; 5331 5332 unsigned int swath_buf_size; 5333 double detile_buf_vp_horz_limit; 5334 double detile_buf_vp_vert_limit; 5335 5336 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || 5337 SourcePixelFormat == dm_420_12) ? 1 : 0); 5338 horz_div_l = 1; 5339 horz_div_c = 1; 5340 vert_div_l = 1; 5341 vert_div_c = 1; 5342 5343 if (BytePerPixelY == 1) 5344 vert_div_l = 0; 5345 if (BytePerPixelC == 1) 5346 vert_div_c = 0; 5347 5348 if (BytePerPixelC == 0) { 5349 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; 5350 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5351 BytePerPixelY / (1 + horz_div_l)); 5352 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5353 (1 + vert_div_l)); 5354 } else { 5355 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; 5356 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5357 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * 5358 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 5359 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5360 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / 5361 (1 + vert_div_c) / (1 + yuv420)); 5362 } 5363 5364 if (SourcePixelFormat == dm_420_10) { 5365 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 5366 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 5367 } 5368 5369 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 5370 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 5371 5372 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; 5373 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); 5374 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 5375 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 5376 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 5377 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 5378 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 5379 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 5380 5381 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 5382 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 5383 if (BytePerPixelC > 0) { 5384 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 5385 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 5386 } else { 5387 full_swath_bytes_horz_wc_c = 0; 5388 full_swath_bytes_vert_wc_c = 0; 5389 } 5390 5391 if (SourcePixelFormat == dm_420_10) { 5392 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); 5393 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); 5394 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); 5395 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); 5396 } 5397 5398 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5399 req128_horz_wc_l = 0; 5400 req128_horz_wc_c = 0; 5401 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + 5402 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5403 req128_horz_wc_l = 0; 5404 req128_horz_wc_c = 1; 5405 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * 5406 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5407 req128_horz_wc_l = 1; 5408 req128_horz_wc_c = 0; 5409 } else { 5410 req128_horz_wc_l = 1; 5411 req128_horz_wc_c = 1; 5412 } 5413 5414 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5415 req128_vert_wc_l = 0; 5416 req128_vert_wc_c = 0; 5417 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * 5418 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5419 req128_vert_wc_l = 0; 5420 req128_vert_wc_c = 1; 5421 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && 5422 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5423 req128_vert_wc_l = 1; 5424 req128_vert_wc_c = 0; 5425 } else { 5426 req128_vert_wc_l = 1; 5427 req128_vert_wc_c = 1; 5428 } 5429 5430 if (BytePerPixelY == 2) { 5431 segment_order_horz_contiguous_luma = 0; 5432 segment_order_vert_contiguous_luma = 1; 5433 } else { 5434 segment_order_horz_contiguous_luma = 1; 5435 segment_order_vert_contiguous_luma = 0; 5436 } 5437 5438 if (BytePerPixelC == 2) { 5439 segment_order_horz_contiguous_chroma = 0; 5440 segment_order_vert_contiguous_chroma = 1; 5441 } else { 5442 segment_order_horz_contiguous_chroma = 1; 5443 segment_order_vert_contiguous_chroma = 0; 5444 } 5445 #ifdef __DML_VBA_DEBUG__ 5446 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); 5447 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 5448 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); 5449 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); 5450 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); 5451 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); 5452 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); 5453 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); 5454 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", 5455 __func__, segment_order_horz_contiguous_chroma); 5456 #endif 5457 5458 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 5459 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) 5460 RequestLuma = REQ_256Bytes; 5461 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || 5462 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) 5463 RequestLuma = REQ_128BytesNonContiguous; 5464 else 5465 RequestLuma = REQ_128BytesContiguous; 5466 5467 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) 5468 RequestChroma = REQ_256Bytes; 5469 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || 5470 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) 5471 RequestChroma = REQ_128BytesNonContiguous; 5472 else 5473 RequestChroma = REQ_128BytesContiguous; 5474 5475 } else if (!IsVertical(SourceRotation)) { 5476 if (req128_horz_wc_l == 0) 5477 RequestLuma = REQ_256Bytes; 5478 else if (segment_order_horz_contiguous_luma == 0) 5479 RequestLuma = REQ_128BytesNonContiguous; 5480 else 5481 RequestLuma = REQ_128BytesContiguous; 5482 5483 if (req128_horz_wc_c == 0) 5484 RequestChroma = REQ_256Bytes; 5485 else if (segment_order_horz_contiguous_chroma == 0) 5486 RequestChroma = REQ_128BytesNonContiguous; 5487 else 5488 RequestChroma = REQ_128BytesContiguous; 5489 5490 } else { 5491 if (req128_vert_wc_l == 0) 5492 RequestLuma = REQ_256Bytes; 5493 else if (segment_order_vert_contiguous_luma == 0) 5494 RequestLuma = REQ_128BytesNonContiguous; 5495 else 5496 RequestLuma = REQ_128BytesContiguous; 5497 5498 if (req128_vert_wc_c == 0) 5499 RequestChroma = REQ_256Bytes; 5500 else if (segment_order_vert_contiguous_chroma == 0) 5501 RequestChroma = REQ_128BytesNonContiguous; 5502 else 5503 RequestChroma = REQ_128BytesContiguous; 5504 } 5505 5506 if (RequestLuma == REQ_256Bytes) { 5507 *MaxUncompressedBlockLuma = 256; 5508 *MaxCompressedBlockLuma = 256; 5509 *IndependentBlockLuma = 0; 5510 } else if (RequestLuma == REQ_128BytesContiguous) { 5511 *MaxUncompressedBlockLuma = 256; 5512 *MaxCompressedBlockLuma = 128; 5513 *IndependentBlockLuma = 128; 5514 } else { 5515 *MaxUncompressedBlockLuma = 256; 5516 *MaxCompressedBlockLuma = 64; 5517 *IndependentBlockLuma = 64; 5518 } 5519 5520 if (RequestChroma == REQ_256Bytes) { 5521 *MaxUncompressedBlockChroma = 256; 5522 *MaxCompressedBlockChroma = 256; 5523 *IndependentBlockChroma = 0; 5524 } else if (RequestChroma == REQ_128BytesContiguous) { 5525 *MaxUncompressedBlockChroma = 256; 5526 *MaxCompressedBlockChroma = 128; 5527 *IndependentBlockChroma = 128; 5528 } else { 5529 *MaxUncompressedBlockChroma = 256; 5530 *MaxCompressedBlockChroma = 64; 5531 *IndependentBlockChroma = 64; 5532 } 5533 5534 if (DCCEnabled != true || BytePerPixelC == 0) { 5535 *MaxUncompressedBlockChroma = 0; 5536 *MaxCompressedBlockChroma = 0; 5537 *IndependentBlockChroma = 0; 5538 } 5539 5540 if (DCCEnabled != true) { 5541 *MaxUncompressedBlockLuma = 0; 5542 *MaxCompressedBlockLuma = 0; 5543 *IndependentBlockLuma = 0; 5544 } 5545 5546 #ifdef __DML_VBA_DEBUG__ 5547 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); 5548 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); 5549 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); 5550 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); 5551 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); 5552 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); 5553 #endif 5554 5555 } // CalculateDCCConfiguration 5556 5557 void dml32_CalculateStutterEfficiency( 5558 unsigned int CompressedBufferSizeInkByte, 5559 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 5560 bool UnboundedRequestEnabled, 5561 unsigned int MetaFIFOSizeInKEntries, 5562 unsigned int ZeroSizeBufferEntries, 5563 unsigned int PixelChunkSizeInKByte, 5564 unsigned int NumberOfActiveSurfaces, 5565 unsigned int ROBBufferSizeInKByte, 5566 double TotalDataReadBandwidth, 5567 double DCFCLK, 5568 double ReturnBW, 5569 unsigned int CompbufReservedSpace64B, 5570 unsigned int CompbufReservedSpaceZs, 5571 double SRExitTime, 5572 double SRExitZ8Time, 5573 bool SynchronizeTimingsFinal, 5574 unsigned int BlendingAndTiming[], 5575 double StutterEnterPlusExitWatermark, 5576 double Z8StutterEnterPlusExitWatermark, 5577 bool ProgressiveToInterlaceUnitInOPP, 5578 bool Interlace[], 5579 double MinTTUVBlank[], 5580 unsigned int DPPPerSurface[], 5581 unsigned int DETBufferSizeY[], 5582 unsigned int BytePerPixelY[], 5583 double BytePerPixelDETY[], 5584 double SwathWidthY[], 5585 unsigned int SwathHeightY[], 5586 unsigned int SwathHeightC[], 5587 double NetDCCRateLuma[], 5588 double NetDCCRateChroma[], 5589 double DCCFractionOfZeroSizeRequestsLuma[], 5590 double DCCFractionOfZeroSizeRequestsChroma[], 5591 unsigned int HTotal[], 5592 unsigned int VTotal[], 5593 double PixelClock[], 5594 double VRatio[], 5595 enum dm_rotation_angle SourceRotation[], 5596 unsigned int BlockHeight256BytesY[], 5597 unsigned int BlockWidth256BytesY[], 5598 unsigned int BlockHeight256BytesC[], 5599 unsigned int BlockWidth256BytesC[], 5600 unsigned int DCCYMaxUncompressedBlock[], 5601 unsigned int DCCCMaxUncompressedBlock[], 5602 unsigned int VActive[], 5603 bool DCCEnable[], 5604 bool WritebackEnable[], 5605 double ReadBandwidthSurfaceLuma[], 5606 double ReadBandwidthSurfaceChroma[], 5607 double meta_row_bw[], 5608 double dpte_row_bw[], 5609 5610 /* Output */ 5611 double *StutterEfficiencyNotIncludingVBlank, 5612 double *StutterEfficiency, 5613 unsigned int *NumberOfStutterBurstsPerFrame, 5614 double *Z8StutterEfficiencyNotIncludingVBlank, 5615 double *Z8StutterEfficiency, 5616 unsigned int *Z8NumberOfStutterBurstsPerFrame, 5617 double *StutterPeriod, 5618 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) 5619 { 5620 5621 bool FoundCriticalSurface = false; 5622 unsigned int SwathSizeCriticalSurface = 0; 5623 unsigned int LastChunkOfSwathSize; 5624 unsigned int MissingPartOfLastSwathOfDETSize; 5625 double LastZ8StutterPeriod = 0.0; 5626 double LastStutterPeriod = 0.0; 5627 unsigned int TotalNumberOfActiveOTG = 0; 5628 double doublePixelClock; 5629 unsigned int doubleHTotal; 5630 unsigned int doubleVTotal; 5631 bool SameTiming = true; 5632 double DETBufferingTimeY; 5633 double SwathWidthYCriticalSurface = 0.0; 5634 double SwathHeightYCriticalSurface = 0.0; 5635 double VActiveTimeCriticalSurface = 0.0; 5636 double FrameTimeCriticalSurface = 0.0; 5637 unsigned int BytePerPixelYCriticalSurface = 0; 5638 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; 5639 unsigned int DETBufferSizeYCriticalSurface = 0; 5640 double MinTTUVBlankCriticalSurface = 0.0; 5641 unsigned int BlockWidth256BytesYCriticalSurface = 0; 5642 bool doublePlaneCriticalSurface = 0; 5643 bool doublePipeCriticalSurface = 0; 5644 double TotalCompressedReadBandwidth; 5645 double TotalRowReadBandwidth; 5646 double AverageDCCCompressionRate; 5647 double EffectiveCompressedBufferSize; 5648 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 5649 double StutterBurstTime; 5650 unsigned int TotalActiveWriteback; 5651 double LinesInDETY; 5652 double LinesInDETYRoundedDownToSwath; 5653 double MaximumEffectiveCompressionLuma; 5654 double MaximumEffectiveCompressionChroma; 5655 double TotalZeroSizeRequestReadBandwidth; 5656 double TotalZeroSizeCompressedReadBandwidth; 5657 double AverageDCCZeroSizeFraction; 5658 double AverageZeroSizeCompressionRate; 5659 unsigned int k; 5660 5661 TotalZeroSizeRequestReadBandwidth = 0; 5662 TotalZeroSizeCompressedReadBandwidth = 0; 5663 TotalRowReadBandwidth = 0; 5664 TotalCompressedReadBandwidth = 0; 5665 5666 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5667 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5668 if (DCCEnable[k] == true) { 5669 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) 5670 || (!IsVertical(SourceRotation[k]) 5671 && BlockHeight256BytesY[k] > SwathHeightY[k]) 5672 || DCCYMaxUncompressedBlock[k] < 256) { 5673 MaximumEffectiveCompressionLuma = 2; 5674 } else { 5675 MaximumEffectiveCompressionLuma = 4; 5676 } 5677 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5678 + ReadBandwidthSurfaceLuma[k] 5679 / dml_min(NetDCCRateLuma[k], 5680 MaximumEffectiveCompressionLuma); 5681 #ifdef __DML_VBA_DEBUG__ 5682 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5683 __func__, k, ReadBandwidthSurfaceLuma[k]); 5684 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", 5685 __func__, k, NetDCCRateLuma[k]); 5686 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", 5687 __func__, k, MaximumEffectiveCompressionLuma); 5688 #endif 5689 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5690 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 5691 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5692 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] 5693 / MaximumEffectiveCompressionLuma; 5694 5695 if (ReadBandwidthSurfaceChroma[k] > 0) { 5696 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) 5697 || (!IsVertical(SourceRotation[k]) 5698 && BlockHeight256BytesC[k] > SwathHeightC[k]) 5699 || DCCCMaxUncompressedBlock[k] < 256) { 5700 MaximumEffectiveCompressionChroma = 2; 5701 } else { 5702 MaximumEffectiveCompressionChroma = 4; 5703 } 5704 TotalCompressedReadBandwidth = 5705 TotalCompressedReadBandwidth 5706 + ReadBandwidthSurfaceChroma[k] 5707 / dml_min(NetDCCRateChroma[k], 5708 MaximumEffectiveCompressionChroma); 5709 #ifdef __DML_VBA_DEBUG__ 5710 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", 5711 __func__, k, ReadBandwidthSurfaceChroma[k]); 5712 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", 5713 __func__, k, NetDCCRateChroma[k]); 5714 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", 5715 __func__, k, MaximumEffectiveCompressionChroma); 5716 #endif 5717 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5718 + ReadBandwidthSurfaceChroma[k] 5719 * DCCFractionOfZeroSizeRequestsChroma[k]; 5720 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5721 + ReadBandwidthSurfaceChroma[k] 5722 * DCCFractionOfZeroSizeRequestsChroma[k] 5723 / MaximumEffectiveCompressionChroma; 5724 } 5725 } else { 5726 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5727 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; 5728 } 5729 TotalRowReadBandwidth = TotalRowReadBandwidth 5730 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); 5731 } 5732 } 5733 5734 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 5735 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 5736 5737 #ifdef __DML_VBA_DEBUG__ 5738 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 5739 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 5740 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 5741 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", 5742 __func__, TotalZeroSizeCompressedReadBandwidth); 5743 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 5744 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 5745 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5746 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 5747 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); 5748 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); 5749 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 5750 #endif 5751 if (AverageDCCZeroSizeFraction == 1) { 5752 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5753 / TotalZeroSizeCompressedReadBandwidth; 5754 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 5755 * AverageZeroSizeCompressionRate 5756 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5757 * AverageZeroSizeCompressionRate; 5758 } else if (AverageDCCZeroSizeFraction > 0) { 5759 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5760 / TotalZeroSizeCompressedReadBandwidth; 5761 EffectiveCompressedBufferSize = dml_min( 5762 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5763 (double) MetaFIFOSizeInKEntries * 1024 * 64 5764 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate 5765 + 1 / AverageDCCCompressionRate)) 5766 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5767 * AverageDCCCompressionRate, 5768 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5769 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5770 5771 #ifdef __DML_VBA_DEBUG__ 5772 dml_print("DML::%s: min 1 = %f\n", __func__, 5773 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5774 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / 5775 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / 5776 AverageDCCCompressionRate)); 5777 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - 5778 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); 5779 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / 5780 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5781 #endif 5782 } else { 5783 EffectiveCompressedBufferSize = dml_min( 5784 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5785 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) 5786 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5787 * AverageDCCCompressionRate; 5788 5789 #ifdef __DML_VBA_DEBUG__ 5790 dml_print("DML::%s: min 1 = %f\n", __func__, 5791 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5792 dml_print("DML::%s: min 2 = %f\n", __func__, 5793 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 5794 #endif 5795 } 5796 5797 #ifdef __DML_VBA_DEBUG__ 5798 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 5799 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 5800 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5801 #endif 5802 5803 *StutterPeriod = 0; 5804 5805 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5806 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5807 LinesInDETY = ((double) DETBufferSizeY[k] 5808 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) 5809 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) 5810 / BytePerPixelDETY[k] / SwathWidthY[k]; 5811 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 5812 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) 5813 / VRatio[k]; 5814 #ifdef __DML_VBA_DEBUG__ 5815 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 5816 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 5817 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 5818 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5819 __func__, k, ReadBandwidthSurfaceLuma[k]); 5820 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 5821 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); 5822 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", 5823 __func__, k, LinesInDETYRoundedDownToSwath); 5824 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); 5825 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5826 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); 5827 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 5828 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5829 #endif 5830 5831 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { 5832 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 5833 5834 FoundCriticalSurface = true; 5835 *StutterPeriod = DETBufferingTimeY; 5836 FrameTimeCriticalSurface = ( 5837 isInterlaceTiming ? 5838 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) 5839 * (double) HTotal[k] / PixelClock[k]; 5840 VActiveTimeCriticalSurface = ( 5841 isInterlaceTiming ? 5842 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) 5843 * (double) HTotal[k] / PixelClock[k]; 5844 BytePerPixelYCriticalSurface = BytePerPixelY[k]; 5845 SwathWidthYCriticalSurface = SwathWidthY[k]; 5846 SwathHeightYCriticalSurface = SwathHeightY[k]; 5847 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; 5848 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] 5849 - (LinesInDETY - LinesInDETYRoundedDownToSwath); 5850 DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; 5851 MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; 5852 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); 5853 doublePipeCriticalSurface = (DPPPerSurface[k] == 1); 5854 5855 #ifdef __DML_VBA_DEBUG__ 5856 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", 5857 __func__, k, FoundCriticalSurface); 5858 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", 5859 __func__, k, *StutterPeriod); 5860 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", 5861 __func__, k, MinTTUVBlankCriticalSurface); 5862 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", 5863 __func__, k, FrameTimeCriticalSurface); 5864 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", 5865 __func__, k, VActiveTimeCriticalSurface); 5866 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", 5867 __func__, k, BytePerPixelYCriticalSurface); 5868 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", 5869 __func__, k, SwathWidthYCriticalSurface); 5870 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", 5871 __func__, k, SwathHeightYCriticalSurface); 5872 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", 5873 __func__, k, BlockWidth256BytesYCriticalSurface); 5874 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", 5875 __func__, k, doublePlaneCriticalSurface); 5876 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", 5877 __func__, k, doublePipeCriticalSurface); 5878 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", 5879 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); 5880 #endif 5881 } 5882 } 5883 } 5884 5885 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, 5886 EffectiveCompressedBufferSize); 5887 #ifdef __DML_VBA_DEBUG__ 5888 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 5889 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5890 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5891 __func__, *StutterPeriod * TotalDataReadBandwidth); 5892 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5893 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, 5894 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 5895 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 5896 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 5897 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 5898 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 5899 #endif 5900 5901 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate 5902 / ReturnBW 5903 + (*StutterPeriod * TotalDataReadBandwidth 5904 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 5905 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 5906 #ifdef __DML_VBA_DEBUG__ 5907 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 5908 AverageDCCCompressionRate / ReturnBW); 5909 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5910 __func__, (*StutterPeriod * TotalDataReadBandwidth)); 5911 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - 5912 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 5913 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 5914 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 5915 #endif 5916 StutterBurstTime = dml_max(StutterBurstTime, 5917 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface 5918 * SwathWidthYCriticalSurface / ReturnBW); 5919 5920 #ifdef __DML_VBA_DEBUG__ 5921 dml_print("DML::%s: Time to finish residue swath=%f\n", 5922 __func__, 5923 LinesToFinishSwathTransferStutterCriticalSurface * 5924 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); 5925 #endif 5926 5927 TotalActiveWriteback = 0; 5928 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5929 if (WritebackEnable[k]) 5930 TotalActiveWriteback = TotalActiveWriteback + 1; 5931 } 5932 5933 if (TotalActiveWriteback == 0) { 5934 #ifdef __DML_VBA_DEBUG__ 5935 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 5936 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 5937 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 5938 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 5939 #endif 5940 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 5941 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 5942 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 5943 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 5944 *NumberOfStutterBurstsPerFrame = ( 5945 *StutterEfficiencyNotIncludingVBlank > 0 ? 5946 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5947 *Z8NumberOfStutterBurstsPerFrame = ( 5948 *Z8StutterEfficiencyNotIncludingVBlank > 0 ? 5949 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5950 } else { 5951 *StutterEfficiencyNotIncludingVBlank = 0.; 5952 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 5953 *NumberOfStutterBurstsPerFrame = 0; 5954 *Z8NumberOfStutterBurstsPerFrame = 0; 5955 } 5956 #ifdef __DML_VBA_DEBUG__ 5957 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); 5958 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 5959 __func__, *StutterEfficiencyNotIncludingVBlank); 5960 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", 5961 __func__, *Z8StutterEfficiencyNotIncludingVBlank); 5962 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 5963 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 5964 #endif 5965 5966 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5967 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5968 if (BlendingAndTiming[k] == k) { 5969 if (TotalNumberOfActiveOTG == 0) { 5970 doublePixelClock = PixelClock[k]; 5971 doubleHTotal = HTotal[k]; 5972 doubleVTotal = VTotal[k]; 5973 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] 5974 || doubleVTotal != VTotal[k]) { 5975 SameTiming = false; 5976 } 5977 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 5978 } 5979 } 5980 } 5981 5982 if (*StutterEfficiencyNotIncludingVBlank > 0) { 5983 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 5984 5985 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming 5986 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { 5987 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime 5988 + StutterBurstTime * VActiveTimeCriticalSurface 5989 / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 5990 } else { 5991 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 5992 } 5993 } else { 5994 *StutterEfficiency = 0; 5995 } 5996 5997 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 5998 LastZ8StutterPeriod = VActiveTimeCriticalSurface 5999 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6000 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + 6001 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { 6002 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime 6003 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 6004 } else { 6005 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6006 } 6007 } else { 6008 *Z8StutterEfficiency = 0.; 6009 } 6010 6011 #ifdef __DML_VBA_DEBUG__ 6012 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6013 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6014 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6015 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6016 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6017 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6018 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 6019 __func__, *StutterEfficiencyNotIncludingVBlank); 6020 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6021 #endif 6022 6023 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface 6024 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); 6025 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); 6026 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) 6027 - DETBufferSizeYCriticalSurface; 6028 6029 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) 6030 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) 6031 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) 6032 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); 6033 6034 #ifdef __DML_VBA_DEBUG__ 6035 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); 6036 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); 6037 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); 6038 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 6039 #endif 6040 } // CalculateStutterEfficiency 6041 6042 void dml32_CalculateMaxDETAndMinCompressedBufferSize( 6043 unsigned int ConfigReturnBufferSizeInKByte, 6044 unsigned int ROBBufferSizeInKByte, 6045 unsigned int MaxNumDPP, 6046 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size 6047 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA 6048 6049 /* Output */ 6050 unsigned int *MaxTotalDETInKByte, 6051 unsigned int *nomDETInKByte, 6052 unsigned int *MinCompressedBufferSizeInKByte) 6053 { 6054 bool det_buff_size_override_en = nomDETInKByteOverrideEnable; 6055 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; 6056 6057 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + 6058 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); 6059 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); 6060 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; 6061 6062 #ifdef __DML_VBA_DEBUG__ 6063 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); 6064 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); 6065 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); 6066 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); 6067 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); 6068 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); 6069 #endif 6070 6071 if (det_buff_size_override_en) { 6072 *nomDETInKByte = det_buff_size_override_val; 6073 #ifdef __DML_VBA_DEBUG__ 6074 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); 6075 #endif 6076 } 6077 } // CalculateMaxDETAndMinCompressedBufferSize 6078 6079 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, 6080 double ReturnBW, 6081 bool NotUrgentLatencyHiding[], 6082 double ReadBandwidthLuma[], 6083 double ReadBandwidthChroma[], 6084 double cursor_bw[], 6085 double meta_row_bandwidth[], 6086 double dpte_row_bandwidth[], 6087 unsigned int NumberOfDPP[], 6088 double UrgentBurstFactorLuma[], 6089 double UrgentBurstFactorChroma[], 6090 double UrgentBurstFactorCursor[]) 6091 { 6092 unsigned int k; 6093 bool NotEnoughUrgentLatencyHiding = false; 6094 bool CalculateVActiveBandwithSupport_val = false; 6095 double VActiveBandwith = 0; 6096 6097 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6098 if (NotUrgentLatencyHiding[k]) { 6099 NotEnoughUrgentLatencyHiding = true; 6100 } 6101 } 6102 6103 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6104 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; 6105 } 6106 6107 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6108 6109 #ifdef __DML_VBA_DEBUG__ 6110 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); 6111 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); 6112 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6113 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); 6114 #endif 6115 return CalculateVActiveBandwithSupport_val; 6116 } 6117 6118 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, 6119 double ReturnBW, 6120 bool NotUrgentLatencyHiding[], 6121 double ReadBandwidthLuma[], 6122 double ReadBandwidthChroma[], 6123 double PrefetchBandwidthLuma[], 6124 double PrefetchBandwidthChroma[], 6125 double cursor_bw[], 6126 double meta_row_bandwidth[], 6127 double dpte_row_bandwidth[], 6128 double cursor_bw_pre[], 6129 double prefetch_vmrow_bw[], 6130 unsigned int NumberOfDPP[], 6131 double UrgentBurstFactorLuma[], 6132 double UrgentBurstFactorChroma[], 6133 double UrgentBurstFactorCursor[], 6134 double UrgentBurstFactorLumaPre[], 6135 double UrgentBurstFactorChromaPre[], 6136 double UrgentBurstFactorCursorPre[], 6137 6138 /* output */ 6139 double *PrefetchBandwidth, 6140 double *FractionOfUrgentBandwidth, 6141 bool *PrefetchBandwidthSupport) 6142 { 6143 unsigned int k; 6144 bool NotEnoughUrgentLatencyHiding = false; 6145 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6146 if (NotUrgentLatencyHiding[k]) { 6147 NotEnoughUrgentLatencyHiding = true; 6148 } 6149 } 6150 6151 *PrefetchBandwidth = 0; 6152 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6153 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6154 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), 6155 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6156 } 6157 6158 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6159 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW; 6160 } 6161 6162 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, 6163 double ReturnBW, 6164 double ReadBandwidthLuma[], 6165 double ReadBandwidthChroma[], 6166 double PrefetchBandwidthLuma[], 6167 double PrefetchBandwidthChroma[], 6168 double cursor_bw[], 6169 double cursor_bw_pre[], 6170 unsigned int NumberOfDPP[], 6171 double UrgentBurstFactorLuma[], 6172 double UrgentBurstFactorChroma[], 6173 double UrgentBurstFactorCursor[], 6174 double UrgentBurstFactorLumaPre[], 6175 double UrgentBurstFactorChromaPre[], 6176 double UrgentBurstFactorCursorPre[]) 6177 { 6178 unsigned int k; 6179 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; 6180 6181 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6182 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6183 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6184 } 6185 6186 return CalculateBandwidthAvailableForImmediateFlip_val; 6187 } 6188 6189 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, 6190 double ReturnBW, 6191 enum immediate_flip_requirement ImmediateFlipRequirement[], 6192 double final_flip_bw[], 6193 double ReadBandwidthLuma[], 6194 double ReadBandwidthChroma[], 6195 double PrefetchBandwidthLuma[], 6196 double PrefetchBandwidthChroma[], 6197 double cursor_bw[], 6198 double meta_row_bandwidth[], 6199 double dpte_row_bandwidth[], 6200 double cursor_bw_pre[], 6201 double prefetch_vmrow_bw[], 6202 unsigned int NumberOfDPP[], 6203 double UrgentBurstFactorLuma[], 6204 double UrgentBurstFactorChroma[], 6205 double UrgentBurstFactorCursor[], 6206 double UrgentBurstFactorLumaPre[], 6207 double UrgentBurstFactorChromaPre[], 6208 double UrgentBurstFactorCursorPre[], 6209 6210 /* output */ 6211 double *TotalBandwidth, 6212 double *FractionOfUrgentBandwidth, 6213 bool *ImmediateFlipBandwidthSupport) 6214 { 6215 unsigned int k; 6216 *TotalBandwidth = 0; 6217 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6218 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { 6219 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6220 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6221 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6222 } else { 6223 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6224 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6225 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6226 } 6227 } 6228 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); 6229 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; 6230 } 6231 6232 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, 6233 double ReturnBW, 6234 double UrgentLatency, 6235 unsigned int SwathHeightY[], 6236 unsigned int SwathHeightC[], 6237 unsigned int SwathWidthY[], 6238 unsigned int SwathWidthC[], 6239 double BytePerPixelInDETY[], 6240 double BytePerPixelInDETC[], 6241 unsigned int DETBufferSizeY[], 6242 unsigned int DETBufferSizeC[], 6243 unsigned int NumOfDPP[], 6244 unsigned int HTotal[], 6245 double PixelClock[], 6246 double VRatioY[], 6247 double VRatioC[], 6248 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]) 6249 { 6250 int k; 6251 double SwathSizeAllSurfaces = 0; 6252 double SwathSizeAllSurfacesInFetchTimeUs; 6253 double DETSwathLatencyHidingUs; 6254 double DETSwathLatencyHidingYUs; 6255 double DETSwathLatencyHidingCUs; 6256 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; 6257 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; 6258 bool NotEnoughDETSwathFillLatencyHiding = false; 6259 6260 /* calculate sum of single swath size for all pipes in bytes*/ 6261 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6262 SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; 6263 6264 if (SwathHeightC[k] != 0) 6265 SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; 6266 else 6267 SwathSizePerSurfaceC[k] = 0; 6268 6269 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; 6270 } 6271 6272 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; 6273 6274 /* ensure all DET - 1 swath can hide a fetch for all surfaces */ 6275 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6276 double LineTime = HTotal[k] / PixelClock[k]; 6277 6278 /* only care if surface is not phantom */ 6279 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 6280 DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; 6281 6282 if (SwathHeightC[k] != 0) { 6283 DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; 6284 6285 DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); 6286 } else { 6287 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; 6288 } 6289 6290 /* DET must be able to hide time to fetch 1 swath for each surface */ 6291 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { 6292 NotEnoughDETSwathFillLatencyHiding = true; 6293 break; 6294 } 6295 } 6296 } 6297 6298 return NotEnoughDETSwathFillLatencyHiding; 6299 } 6300