1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 #include "display_mode_vba_util_32.h" 26 #include "../dml_inline_defs.h" 27 #include "display_mode_vba_32.h" 28 #include "../display_mode_lib.h" 29 30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096 31 32 unsigned int dml32_dscceComputeDelay( 33 unsigned int bpc, 34 double BPP, 35 unsigned int sliceWidth, 36 unsigned int numSlices, 37 enum output_format_class pixelFormat, 38 enum output_encoder_class Output) 39 { 40 // valid bpc = source bits per component in the set of {8, 10, 12} 41 // valid bpp = increments of 1/16 of a bit 42 // min = 6/7/8 in N420/N422/444, respectively 43 // max = such that compression is 1:1 44 //valid sliceWidth = number of pixels per slice line, 45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 48 49 // fixed value 50 unsigned int rcModelSize = 8192; 51 52 // N422/N420 operate at 2 pixels per clock 53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, 54 Delay, pixels; 55 56 if (pixelFormat == dm_420) 57 pixelsPerClock = 2; 58 else if (pixelFormat == dm_n422) 59 pixelsPerClock = 2; 60 // #all other modes operate at 1 pixel per clock 61 else 62 pixelsPerClock = 1; 63 64 //initial transmit delay as per PPS 65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 66 67 //compute ssm delay 68 if (bpc == 8) 69 D = 81; 70 else if (bpc == 10) 71 D = 89; 72 else 73 D = 113; 74 75 //divide by pixel per cycle to compute slice width as seen by DSC 76 w = sliceWidth / pixelsPerClock; 77 78 //422 mode has an additional cycle of delay 79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 80 s = 0; 81 else 82 s = 1; 83 84 //main calculation for the dscce 85 ix = initalXmitDelay + 45; 86 wx = (w + 2) / 3; 87 p = 3 * wx - w; 88 l0 = ix / w; 89 a = ix + p * l0; 90 ax = (a + 2) / 3 + D + 6 + 1; 91 L = (ax + wx - 1) / wx; 92 if ((ix % w) == 0 && p != 0) 93 lstall = 1; 94 else 95 lstall = 0; 96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 97 98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 99 pixels = Delay * 3 * pixelsPerClock; 100 101 #ifdef __DML_VBA_DEBUG__ 102 dml_print("DML::%s: bpc: %d\n", __func__, bpc); 103 dml_print("DML::%s: BPP: %f\n", __func__, BPP); 104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); 105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); 106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); 107 dml_print("DML::%s: Output: %d\n", __func__, Output); 108 dml_print("DML::%s: pixels: %d\n", __func__, pixels); 109 #endif 110 111 return pixels; 112 } 113 114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 115 { 116 unsigned int Delay = 0; 117 118 if (pixelFormat == dm_420) { 119 // sfr 120 Delay = Delay + 2; 121 // dsccif 122 Delay = Delay + 0; 123 // dscc - input deserializer 124 Delay = Delay + 3; 125 // dscc gets pixels every other cycle 126 Delay = Delay + 2; 127 // dscc - input cdc fifo 128 Delay = Delay + 12; 129 // dscc gets pixels every other cycle 130 Delay = Delay + 13; 131 // dscc - cdc uncertainty 132 Delay = Delay + 2; 133 // dscc - output cdc fifo 134 Delay = Delay + 7; 135 // dscc gets pixels every other cycle 136 Delay = Delay + 3; 137 // dscc - cdc uncertainty 138 Delay = Delay + 2; 139 // dscc - output serializer 140 Delay = Delay + 1; 141 // sft 142 Delay = Delay + 1; 143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { 144 // sfr 145 Delay = Delay + 2; 146 // dsccif 147 Delay = Delay + 1; 148 // dscc - input deserializer 149 Delay = Delay + 5; 150 // dscc - input cdc fifo 151 Delay = Delay + 25; 152 // dscc - cdc uncertainty 153 Delay = Delay + 2; 154 // dscc - output cdc fifo 155 Delay = Delay + 10; 156 // dscc - cdc uncertainty 157 Delay = Delay + 2; 158 // dscc - output serializer 159 Delay = Delay + 1; 160 // sft 161 Delay = Delay + 1; 162 } else { 163 // sfr 164 Delay = Delay + 2; 165 // dsccif 166 Delay = Delay + 0; 167 // dscc - input deserializer 168 Delay = Delay + 3; 169 // dscc - input cdc fifo 170 Delay = Delay + 12; 171 // dscc - cdc uncertainty 172 Delay = Delay + 2; 173 // dscc - output cdc fifo 174 Delay = Delay + 7; 175 // dscc - output serializer 176 Delay = Delay + 1; 177 // dscc - cdc uncertainty 178 Delay = Delay + 2; 179 // sft 180 Delay = Delay + 1; 181 } 182 183 return Delay; 184 } 185 186 187 bool IsVertical(enum dm_rotation_angle Scan) 188 { 189 bool is_vert = false; 190 191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) 192 is_vert = true; 193 else 194 is_vert = false; 195 return is_vert; 196 } 197 198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( 199 double HRatio, 200 double HRatioChroma, 201 double VRatio, 202 double VRatioChroma, 203 double MaxDCHUBToPSCLThroughput, 204 double MaxPSCLToLBThroughput, 205 double PixelClock, 206 enum source_format_class SourcePixelFormat, 207 unsigned int HTaps, 208 unsigned int HTapsChroma, 209 unsigned int VTaps, 210 unsigned int VTapsChroma, 211 212 /* output */ 213 double *PSCL_THROUGHPUT, 214 double *PSCL_THROUGHPUT_CHROMA, 215 double *DPPCLKUsingSingleDPP) 216 { 217 double DPPCLKUsingSingleDPPLuma; 218 double DPPCLKUsingSingleDPPChroma; 219 220 if (HRatio > 1) { 221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / 222 dml_ceil((double) HTaps / 6.0, 1.0)); 223 } else { 224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 225 } 226 227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / 228 *PSCL_THROUGHPUT, 1); 229 230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) 231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock; 232 233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && 234 SourcePixelFormat != dm_rgbe_alpha)) { 235 *PSCL_THROUGHPUT_CHROMA = 0; 236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; 237 } else { 238 if (HRatioChroma > 1) { 239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * 240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); 241 } else { 242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 243 } 244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), 245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); 246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) 247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock; 248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); 249 } 250 } 251 252 void dml32_CalculateBytePerPixelAndBlockSizes( 253 enum source_format_class SourcePixelFormat, 254 enum dm_swizzle_mode SurfaceTiling, 255 256 /* Output */ 257 unsigned int *BytePerPixelY, 258 unsigned int *BytePerPixelC, 259 double *BytePerPixelDETY, 260 double *BytePerPixelDETC, 261 unsigned int *BlockHeight256BytesY, 262 unsigned int *BlockHeight256BytesC, 263 unsigned int *BlockWidth256BytesY, 264 unsigned int *BlockWidth256BytesC, 265 unsigned int *MacroTileHeightY, 266 unsigned int *MacroTileHeightC, 267 unsigned int *MacroTileWidthY, 268 unsigned int *MacroTileWidthC) 269 { 270 if (SourcePixelFormat == dm_444_64) { 271 *BytePerPixelDETY = 8; 272 *BytePerPixelDETC = 0; 273 *BytePerPixelY = 8; 274 *BytePerPixelC = 0; 275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 276 *BytePerPixelDETY = 4; 277 *BytePerPixelDETC = 0; 278 *BytePerPixelY = 4; 279 *BytePerPixelC = 0; 280 } else if (SourcePixelFormat == dm_444_16) { 281 *BytePerPixelDETY = 2; 282 *BytePerPixelDETC = 0; 283 *BytePerPixelY = 2; 284 *BytePerPixelC = 0; 285 } else if (SourcePixelFormat == dm_444_8) { 286 *BytePerPixelDETY = 1; 287 *BytePerPixelDETC = 0; 288 *BytePerPixelY = 1; 289 *BytePerPixelC = 0; 290 } else if (SourcePixelFormat == dm_rgbe_alpha) { 291 *BytePerPixelDETY = 4; 292 *BytePerPixelDETC = 1; 293 *BytePerPixelY = 4; 294 *BytePerPixelC = 1; 295 } else if (SourcePixelFormat == dm_420_8) { 296 *BytePerPixelDETY = 1; 297 *BytePerPixelDETC = 2; 298 *BytePerPixelY = 1; 299 *BytePerPixelC = 2; 300 } else if (SourcePixelFormat == dm_420_12) { 301 *BytePerPixelDETY = 2; 302 *BytePerPixelDETC = 4; 303 *BytePerPixelY = 2; 304 *BytePerPixelC = 4; 305 } else { 306 *BytePerPixelDETY = 4.0 / 3; 307 *BytePerPixelDETC = 8.0 / 3; 308 *BytePerPixelY = 2; 309 *BytePerPixelC = 4; 310 } 311 #ifdef __DML_VBA_DEBUG__ 312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); 313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); 314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); 315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); 316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); 317 #endif 318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 319 || SourcePixelFormat == dm_444_16 320 || SourcePixelFormat == dm_444_8 321 || SourcePixelFormat == dm_mono_16 322 || SourcePixelFormat == dm_mono_8 323 || SourcePixelFormat == dm_rgbe)) { 324 if (SurfaceTiling == dm_sw_linear) 325 *BlockHeight256BytesY = 1; 326 else if (SourcePixelFormat == dm_444_64) 327 *BlockHeight256BytesY = 4; 328 else if (SourcePixelFormat == dm_444_8) 329 *BlockHeight256BytesY = 16; 330 else 331 *BlockHeight256BytesY = 8; 332 333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 334 *BlockHeight256BytesC = 0; 335 *BlockWidth256BytesC = 0; 336 } else { 337 if (SurfaceTiling == dm_sw_linear) { 338 *BlockHeight256BytesY = 1; 339 *BlockHeight256BytesC = 1; 340 } else if (SourcePixelFormat == dm_rgbe_alpha) { 341 *BlockHeight256BytesY = 8; 342 *BlockHeight256BytesC = 16; 343 } else if (SourcePixelFormat == dm_420_8) { 344 *BlockHeight256BytesY = 16; 345 *BlockHeight256BytesC = 8; 346 } else { 347 *BlockHeight256BytesY = 8; 348 *BlockHeight256BytesC = 8; 349 } 350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 352 } 353 #ifdef __DML_VBA_DEBUG__ 354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); 355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); 356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); 357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); 358 #endif 359 360 if (SurfaceTiling == dm_sw_linear) { 361 *MacroTileHeightY = *BlockHeight256BytesY; 362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; 363 *MacroTileHeightC = *BlockHeight256BytesC; 364 if (*MacroTileHeightC == 0) 365 *MacroTileWidthC = 0; 366 else 367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; 368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || 369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { 370 *MacroTileHeightY = 16 * *BlockHeight256BytesY; 371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; 372 *MacroTileHeightC = 16 * *BlockHeight256BytesC; 373 if (*MacroTileHeightC == 0) 374 *MacroTileWidthC = 0; 375 else 376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; 377 } else { 378 *MacroTileHeightY = 32 * *BlockHeight256BytesY; 379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; 380 *MacroTileHeightC = 32 * *BlockHeight256BytesC; 381 if (*MacroTileHeightC == 0) 382 *MacroTileWidthC = 0; 383 else 384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; 385 } 386 387 #ifdef __DML_VBA_DEBUG__ 388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); 389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); 390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); 391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); 392 #endif 393 } // CalculateBytePerPixelAndBlockSizes 394 395 void dml32_CalculateSwathAndDETConfiguration( 396 unsigned int DETSizeOverride[], 397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 398 unsigned int ConfigReturnBufferSizeInKByte, 399 unsigned int MaxTotalDETInKByte, 400 unsigned int MinCompressedBufferSizeInKByte, 401 double ForceSingleDPP, 402 unsigned int NumberOfActiveSurfaces, 403 unsigned int nomDETInKByte, 404 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, 406 unsigned int PixelChunkSizeKBytes, 407 unsigned int ROBSizeKBytes, 408 unsigned int CompressedBufferSegmentSizeInkByteFinal, 409 enum output_encoder_class Output[], 410 double ReadBandwidthLuma[], 411 double ReadBandwidthChroma[], 412 double MaximumSwathWidthLuma[], 413 double MaximumSwathWidthChroma[], 414 enum dm_rotation_angle SourceRotation[], 415 bool ViewportStationary[], 416 enum source_format_class SourcePixelFormat[], 417 enum dm_swizzle_mode SurfaceTiling[], 418 unsigned int ViewportWidth[], 419 unsigned int ViewportHeight[], 420 unsigned int ViewportXStart[], 421 unsigned int ViewportYStart[], 422 unsigned int ViewportXStartC[], 423 unsigned int ViewportYStartC[], 424 unsigned int SurfaceWidthY[], 425 unsigned int SurfaceWidthC[], 426 unsigned int SurfaceHeightY[], 427 unsigned int SurfaceHeightC[], 428 unsigned int Read256BytesBlockHeightY[], 429 unsigned int Read256BytesBlockHeightC[], 430 unsigned int Read256BytesBlockWidthY[], 431 unsigned int Read256BytesBlockWidthC[], 432 enum odm_combine_mode ODMMode[], 433 unsigned int BlendingAndTiming[], 434 unsigned int BytePerPixY[], 435 unsigned int BytePerPixC[], 436 double BytePerPixDETY[], 437 double BytePerPixDETC[], 438 unsigned int HActive[], 439 double HRatio[], 440 double HRatioChroma[], 441 unsigned int DPPPerSurface[], 442 443 /* Output */ 444 unsigned int swath_width_luma_ub[], 445 unsigned int swath_width_chroma_ub[], 446 double SwathWidth[], 447 double SwathWidthChroma[], 448 unsigned int SwathHeightY[], 449 unsigned int SwathHeightC[], 450 unsigned int DETBufferSizeInKByte[], 451 unsigned int DETBufferSizeY[], 452 unsigned int DETBufferSizeC[], 453 bool *UnboundedRequestEnabled, 454 unsigned int *CompressedBufferSizeInkByte, 455 unsigned int *CompBufReservedSpaceKBytes, 456 bool *CompBufReservedSpaceNeedAdjustment, 457 bool ViewportSizeSupportPerSurface[], 458 bool *ViewportSizeSupport) 459 { 460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX]; 463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX]; 464 unsigned int RoundedUpSwathSizeBytesY; 465 unsigned int RoundedUpSwathSizeBytesC; 466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX]; 467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX]; 468 unsigned int k; 469 unsigned int TotalActiveDPP = 0; 470 bool NoChromaSurfaces = true; 471 unsigned int DETBufferSizeInKByteForSwathCalculation; 472 473 #ifdef __DML_VBA_DEBUG__ 474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); 476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); 477 #endif 478 dml32_CalculateSwathWidth(ForceSingleDPP, 479 NumberOfActiveSurfaces, 480 SourcePixelFormat, 481 SourceRotation, 482 ViewportStationary, 483 ViewportWidth, 484 ViewportHeight, 485 ViewportXStart, 486 ViewportYStart, 487 ViewportXStartC, 488 ViewportYStartC, 489 SurfaceWidthY, 490 SurfaceWidthC, 491 SurfaceHeightY, 492 SurfaceHeightC, 493 ODMMode, 494 BytePerPixY, 495 BytePerPixC, 496 Read256BytesBlockHeightY, 497 Read256BytesBlockHeightC, 498 Read256BytesBlockWidthY, 499 Read256BytesBlockWidthC, 500 BlendingAndTiming, 501 HActive, 502 HRatio, 503 DPPPerSurface, 504 505 /* Output */ 506 SwathWidthdoubleDPP, 507 SwathWidthdoubleDPPChroma, 508 SwathWidth, 509 SwathWidthChroma, 510 MaximumSwathHeightY, 511 MaximumSwathHeightC, 512 swath_width_luma_ub, 513 swath_width_chroma_ub); 514 515 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 518 #ifdef __DML_VBA_DEBUG__ 519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); 522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]); 523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 524 RoundedUpMaxSwathSizeBytesY[k]); 525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); 527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]); 528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 529 RoundedUpMaxSwathSizeBytesC[k]); 530 #endif 531 532 if (SourcePixelFormat[k] == dm_420_10) { 533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256); 534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256); 535 } 536 } 537 538 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { 542 NoChromaSurfaces = false; 543 } 544 } 545 546 // By default, just set the reserved space to 2 pixel chunks size 547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; 548 549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data 550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] 551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req 552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512); 553 554 if (*CompBufReservedSpaceNeedAdjustment == 1) { 555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512; 556 } 557 558 #ifdef __DML_VBA_DEBUG__ 559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); 560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); 561 #endif 562 563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 564 565 dml32_CalculateDETBufferSize(DETSizeOverride, 566 UseMALLForPStateChange, 567 ForceSingleDPP, 568 NumberOfActiveSurfaces, 569 *UnboundedRequestEnabled, 570 nomDETInKByte, 571 MaxTotalDETInKByte, 572 ConfigReturnBufferSizeInKByte, 573 MinCompressedBufferSizeInKByte, 574 CompressedBufferSegmentSizeInkByteFinal, 575 SourcePixelFormat, 576 ReadBandwidthLuma, 577 ReadBandwidthChroma, 578 RoundedUpMaxSwathSizeBytesY, 579 RoundedUpMaxSwathSizeBytesC, 580 DPPPerSurface, 581 582 /* Output */ 583 DETBufferSizeInKByte, // per hubp pipe 584 CompressedBufferSizeInkByte); 585 586 #ifdef __DML_VBA_DEBUG__ 587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 593 #endif 594 595 *ViewportSizeSupport = true; 596 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 597 598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); 600 #ifdef __DML_VBA_DEBUG__ 601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, 602 DETBufferSizeInKByteForSwathCalculation); 603 #endif 604 605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <= 606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 607 SwathHeightY[k] = MaximumSwathHeightY[k]; 608 SwathHeightC[k] = MaximumSwathHeightC[k]; 609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <= 613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 615 SwathHeightC[k] = MaximumSwathHeightC[k]; 616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k]; 618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] && 619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <= 620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 621 SwathHeightY[k] = MaximumSwathHeightY[k]; 622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k]; 624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 625 } else { 626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2; 627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2; 628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2; 629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2; 630 } 631 632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 > 633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && 635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 636 *ViewportSizeSupport = false; 637 ViewportSizeSupportPerSurface[k] = false; 638 } else { 639 ViewportSizeSupportPerSurface[k] = true; 640 } 641 642 if (SwathHeightC[k] == 0) { 643 #ifdef __DML_VBA_DEBUG__ 644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); 645 #endif 646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; 647 DETBufferSizeC[k] = 0; 648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 649 #ifdef __DML_VBA_DEBUG__ 650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); 651 #endif 652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; 653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; 654 } else { 655 #ifdef __DML_VBA_DEBUG__ 656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); 657 #endif 658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); 659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; 660 } 661 662 #ifdef __DML_VBA_DEBUG__ 663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); 665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, 666 k, RoundedUpMaxSwathSizeBytesY[k]); 667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, 668 k, RoundedUpMaxSwathSizeBytesC[k]); 669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY); 670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC); 671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); 674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, 675 ViewportSizeSupportPerSurface[k]); 676 #endif 677 678 } 679 } // CalculateSwathAndDETConfiguration 680 681 void dml32_CalculateSwathWidth( 682 bool ForceSingleDPP, 683 unsigned int NumberOfActiveSurfaces, 684 enum source_format_class SourcePixelFormat[], 685 enum dm_rotation_angle SourceRotation[], 686 bool ViewportStationary[], 687 unsigned int ViewportWidth[], 688 unsigned int ViewportHeight[], 689 unsigned int ViewportXStart[], 690 unsigned int ViewportYStart[], 691 unsigned int ViewportXStartC[], 692 unsigned int ViewportYStartC[], 693 unsigned int SurfaceWidthY[], 694 unsigned int SurfaceWidthC[], 695 unsigned int SurfaceHeightY[], 696 unsigned int SurfaceHeightC[], 697 enum odm_combine_mode ODMMode[], 698 unsigned int BytePerPixY[], 699 unsigned int BytePerPixC[], 700 unsigned int Read256BytesBlockHeightY[], 701 unsigned int Read256BytesBlockHeightC[], 702 unsigned int Read256BytesBlockWidthY[], 703 unsigned int Read256BytesBlockWidthC[], 704 unsigned int BlendingAndTiming[], 705 unsigned int HActive[], 706 double HRatio[], 707 unsigned int DPPPerSurface[], 708 709 /* Output */ 710 double SwathWidthdoubleDPPY[], 711 double SwathWidthdoubleDPPC[], 712 double SwathWidthY[], // per-pipe 713 double SwathWidthC[], // per-pipe 714 unsigned int MaximumSwathHeightY[], 715 unsigned int MaximumSwathHeightC[], 716 unsigned int swath_width_luma_ub[], // per-pipe 717 unsigned int swath_width_chroma_ub[]) // per-pipe 718 { 719 unsigned int k, j; 720 enum odm_combine_mode MainSurfaceODMMode; 721 722 unsigned int surface_width_ub_l; 723 unsigned int surface_height_ub_l; 724 unsigned int surface_width_ub_c = 0; 725 unsigned int surface_height_ub_c = 0; 726 727 #ifdef __DML_VBA_DEBUG__ 728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 730 #endif 731 732 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 733 if (!IsVertical(SourceRotation[k])) 734 SwathWidthdoubleDPPY[k] = ViewportWidth[k]; 735 else 736 SwathWidthdoubleDPPY[k] = ViewportHeight[k]; 737 738 #ifdef __DML_VBA_DEBUG__ 739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 741 #endif 742 743 MainSurfaceODMMode = ODMMode[k]; 744 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 745 if (BlendingAndTiming[k] == j) 746 MainSurfaceODMMode = ODMMode[j]; 747 } 748 749 if (ForceSingleDPP) { 750 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 751 } else { 752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { 753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 754 dml_round(HActive[k] / 4.0 * HRatio[k])); 755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { 756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 757 dml_round(HActive[k] / 2.0 * HRatio[k])); 758 } else if (DPPPerSurface[k] == 2) { 759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; 760 } else { 761 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 762 } 763 } 764 765 #ifdef __DML_VBA_DEBUG__ 766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); 767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); 768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); 769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); 770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); 771 #endif 772 773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 774 SourcePixelFormat[k] == dm_420_12) { 775 SwathWidthC[k] = SwathWidthY[k] / 2; 776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; 777 } else { 778 SwathWidthC[k] = SwathWidthY[k]; 779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; 780 } 781 782 if (ForceSingleDPP == true) { 783 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 784 SwathWidthC[k] = SwathWidthdoubleDPPC[k]; 785 } 786 787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 789 790 if (!IsVertical(SourceRotation[k])) { 791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 795 dml_floor(ViewportXStart[k] + 796 SwathWidthY[k] + 797 Read256BytesBlockWidthY[k] - 1, 798 Read256BytesBlockWidthY[k]) - 799 dml_floor(ViewportXStart[k], 800 Read256BytesBlockWidthY[k])); 801 } else { 802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 803 dml_ceil(SwathWidthY[k] - 1, 804 Read256BytesBlockWidthY[k]) + 805 Read256BytesBlockWidthY[k]); 806 } 807 if (BytePerPixC[k] > 0) { 808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] + 812 Read256BytesBlockWidthC[k] - 1, 813 Read256BytesBlockWidthC[k]) - 814 dml_floor(ViewportXStartC[k], 815 Read256BytesBlockWidthC[k])); 816 } else { 817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 818 dml_ceil(SwathWidthC[k] - 1, 819 Read256BytesBlockWidthC[k]) + 820 Read256BytesBlockWidthC[k]); 821 } 822 } else { 823 swath_width_chroma_ub[k] = 0; 824 } 825 } else { 826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 828 829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + 831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, 832 Read256BytesBlockHeightY[k]) - 833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); 834 } else { 835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, 836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 837 } 838 if (BytePerPixC[k] > 0) { 839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] + 843 Read256BytesBlockHeightC[k] - 1, 844 Read256BytesBlockHeightC[k]) - 845 dml_floor(ViewportYStartC[k], 846 Read256BytesBlockHeightC[k])); 847 } else { 848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + 850 Read256BytesBlockHeightC[k]); 851 } 852 } else { 853 swath_width_chroma_ub[k] = 0; 854 } 855 } 856 857 #ifdef __DML_VBA_DEBUG__ 858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); 860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); 861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); 862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); 863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); 864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); 865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); 866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); 867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); 868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); 869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); 870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); 871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); 872 #endif 873 874 } 875 } // CalculateSwathWidth 876 877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, 878 unsigned int TotalNumberOfActiveDPP, 879 bool NoChroma, 880 enum output_encoder_class Output, 881 enum dm_swizzle_mode SurfaceTiling, 882 bool CompBufReservedSpaceNeedAdjustment, 883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 884 { 885 bool ret_val = false; 886 887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && 888 TotalNumberOfActiveDPP == 1 && NoChroma); 889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 890 ret_val = false; 891 892 if (SurfaceTiling == dm_sw_linear) 893 ret_val = false; 894 895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 896 ret_val = false; 897 898 #ifdef __DML_VBA_DEBUG__ 899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); 900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); 902 #endif 903 904 return (ret_val); 905 } 906 907 void dml32_CalculateDETBufferSize( 908 unsigned int DETSizeOverride[], 909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 910 bool ForceSingleDPP, 911 unsigned int NumberOfActiveSurfaces, 912 bool UnboundedRequestEnabled, 913 unsigned int nomDETInKByte, 914 unsigned int MaxTotalDETInKByte, 915 unsigned int ConfigReturnBufferSizeInKByte, 916 unsigned int MinCompressedBufferSizeInKByte, 917 unsigned int CompressedBufferSegmentSizeInkByteFinal, 918 enum source_format_class SourcePixelFormat[], 919 double ReadBandwidthLuma[], 920 double ReadBandwidthChroma[], 921 unsigned int RoundedUpMaxSwathSizeBytesY[], 922 unsigned int RoundedUpMaxSwathSizeBytesC[], 923 unsigned int DPPPerSurface[], 924 /* Output */ 925 unsigned int DETBufferSizeInKByte[], 926 unsigned int *CompressedBufferSizeInkByte) 927 { 928 unsigned int DETBufferSizePoolInKByte; 929 unsigned int NextDETBufferPieceInKByte; 930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; 931 bool NextPotentialSurfaceToAssignDETPieceFound; 932 unsigned int NextSurfaceToAssignDETPiece; 933 double TotalBandwidth; 934 double BandwidthOfSurfacesNotAssignedDETPiece; 935 unsigned int max_minDET; 936 unsigned int minDET; 937 unsigned int minDET_pipe; 938 unsigned int j, k; 939 940 #ifdef __DML_VBA_DEBUG__ 941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); 946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); 948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, 949 CompressedBufferSegmentSizeInkByteFinal); 950 #endif 951 952 // Note: Will use default det size if that fits 2 swaths 953 if (UnboundedRequestEnabled) { 954 if (DETSizeOverride[0] > 0) { 955 DETBufferSizeInKByte[0] = DETSizeOverride[0]; 956 } else { 957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * 958 ((double) RoundedUpMaxSwathSizeBytesY[0] + 959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); 960 } 961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; 962 } else { 963 DETBufferSizePoolInKByte = MaxTotalDETInKByte; 964 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 965 DETBufferSizeInKByte[k] = nomDETInKByte; 966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 967 SourcePixelFormat[k] == dm_420_12) { 968 max_minDET = nomDETInKByte - 64; 969 } else { 970 max_minDET = nomDETInKByte; 971 } 972 minDET = 128; 973 minDET_pipe = 0; 974 975 // add DET resource until can hold 2 full swaths 976 while (minDET <= max_minDET && minDET_pipe == 0) { 977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + 978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) 979 minDET_pipe = minDET; 980 minDET = minDET + 64; 981 } 982 983 #ifdef __DML_VBA_DEBUG__ 984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); 985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); 986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); 987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 988 RoundedUpMaxSwathSizeBytesY[k]); 989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 990 RoundedUpMaxSwathSizeBytesC[k]); 991 #endif 992 993 if (minDET_pipe == 0) { 994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + 995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); 996 #ifdef __DML_VBA_DEBUG__ 997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", 998 __func__, k, minDET_pipe); 999 #endif 1000 } 1001 1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1003 DETBufferSizeInKByte[k] = 0; 1004 } else if (DETSizeOverride[k] > 0) { 1005 DETBufferSizeInKByte[k] = DETSizeOverride[k]; 1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; 1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { 1009 DETBufferSizeInKByte[k] = minDET_pipe; 1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; 1012 } 1013 1014 #ifdef __DML_VBA_DEBUG__ 1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); 1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); 1019 #endif 1020 } 1021 1022 TotalBandwidth = 0; 1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) 1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 1026 } 1027 #ifdef __DML_VBA_DEBUG__ 1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); 1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) 1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); 1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); 1033 #endif 1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; 1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1036 1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1038 DETPieceAssignedToThisSurfaceAlready[k] = true; 1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * 1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= 1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { 1042 DETPieceAssignedToThisSurfaceAlready[k] = true; 1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; 1045 } else { 1046 DETPieceAssignedToThisSurfaceAlready[k] = false; 1047 } 1048 #ifdef __DML_VBA_DEBUG__ 1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, 1050 DETPieceAssignedToThisSurfaceAlready[k]); 1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, 1052 BandwidthOfSurfacesNotAssignedDETPiece); 1053 #endif 1054 } 1055 1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 1057 NextPotentialSurfaceToAssignDETPieceFound = false; 1058 NextSurfaceToAssignDETPiece = 0; 1059 1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1061 #ifdef __DML_VBA_DEBUG__ 1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, 1063 ReadBandwidthLuma[k]); 1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, 1065 ReadBandwidthChroma[k]); 1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, 1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, 1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, 1071 NextSurfaceToAssignDETPiece); 1072 #endif 1073 if (!DETPieceAssignedToThisSurfaceAlready[k] && 1074 (!NextPotentialSurfaceToAssignDETPieceFound || 1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < 1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { 1078 NextSurfaceToAssignDETPiece = k; 1079 NextPotentialSurfaceToAssignDETPieceFound = true; 1080 } 1081 #ifdef __DML_VBA_DEBUG__ 1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", 1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); 1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", 1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); 1086 #endif 1087 } 1088 1089 if (NextPotentialSurfaceToAssignDETPieceFound) { 1090 // Note: To show the banker's rounding behavior in VBA and also the fact 1091 // that the DET buffer size varies due to precision issue 1092 // 1093 //double tmp1 = ((double) DETBufferSizePoolInKByte * 1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1096 // BandwidthOfSurfacesNotAssignedDETPiece / 1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * 1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1101 //BandwidthOfSurfacesNotAssignedDETPiece / 1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1103 // 1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); 1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); 1106 1107 NextDETBufferPieceInKByte = dml_min( 1108 dml_round((double) DETBufferSizePoolInKByte * 1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1111 BandwidthOfSurfacesNotAssignedDETPiece / 1112 ((ForceSingleDPP ? 1 : 1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * 1114 (ForceSingleDPP ? 1 : 1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, 1116 dml_floor((double) DETBufferSizePoolInKByte, 1117 (ForceSingleDPP ? 1 : 1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1119 1120 // Above calculation can assign the entire DET buffer allocation to a single pipe. 1121 // We should limit the per-pipe DET size to the nominal / max per pipe. 1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < 1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - 1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; 1127 } else { 1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1129 // already has the max per-pipe value 1130 NextDETBufferPieceInKByte = 0; 1131 } 1132 } 1133 1134 #ifdef __DML_VBA_DEBUG__ 1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, 1136 DETBufferSizePoolInKByte); 1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, 1138 NextSurfaceToAssignDETPiece); 1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, 1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, 1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", 1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); 1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, 1146 NextDETBufferPieceInKByte); 1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", 1148 __func__, j, NextSurfaceToAssignDETPiece, 1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1150 #endif 1151 1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = 1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1154 + NextDETBufferPieceInKByte 1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); 1156 #ifdef __DML_VBA_DEBUG__ 1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1158 #endif 1159 1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; 1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; 1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1165 } 1166 } 1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; 1168 } 1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 1170 1171 #ifdef __DML_VBA_DEBUG__ 1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); 1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { 1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", 1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1177 } 1178 #endif 1179 } // CalculateDETBufferSize 1180 1181 void dml32_CalculateODMMode( 1182 unsigned int MaximumPixelsPerLinePerDSCUnit, 1183 unsigned int HActive, 1184 enum output_format_class OutFormat, 1185 enum output_encoder_class Output, 1186 enum odm_combine_policy ODMUse, 1187 double StateDispclk, 1188 double MaxDispclk, 1189 bool DSCEnable, 1190 unsigned int TotalNumberOfActiveDPP, 1191 unsigned int MaxNumDPP, 1192 double PixelClock, 1193 double DISPCLKDPPCLKDSCCLKDownSpreading, 1194 double DISPCLKRampingMargin, 1195 double DISPCLKDPPCLKVCOSpeed, 1196 unsigned int NumberOfDSCSlices, 1197 1198 /* Output */ 1199 bool *TotalAvailablePipesSupport, 1200 unsigned int *NumberOfDPP, 1201 enum odm_combine_mode *ODMMode, 1202 double *RequiredDISPCLKPerSurface) 1203 { 1204 1205 double SurfaceRequiredDISPCLKWithoutODMCombine; 1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1208 1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, 1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1211 MaxDispclk); 1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, 1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1214 MaxDispclk); 1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, 1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1217 MaxDispclk); 1218 *TotalAvailablePipesSupport = true; 1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable 1220 1221 if (ODMUse == dm_odm_combine_policy_none) 1222 *ODMMode = dm_odm_combine_mode_disabled; 1223 1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; 1225 *NumberOfDPP = 0; 1226 1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? 1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded") 1229 1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || 1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || 1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)) 1233 || NumberOfDSCSlices > 8)))) { 1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { 1235 *ODMMode = dm_odm_combine_mode_4to1; 1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1237 *NumberOfDPP = 4; 1238 } else { 1239 *TotalAvailablePipesSupport = false; 1240 } 1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || 1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && 1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || 1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)) 1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) { 1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { 1247 *ODMMode = dm_odm_combine_mode_2to1; 1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1249 *NumberOfDPP = 2; 1250 } else { 1251 *TotalAvailablePipesSupport = false; 1252 } 1253 } else { 1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) 1255 *NumberOfDPP = 1; 1256 else 1257 *TotalAvailablePipesSupport = false; 1258 } 1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH && 1260 ODMUse != dm_odm_combine_policy_4to1) { 1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) { 1262 *ODMMode = dm_odm_combine_mode_disabled; 1263 *NumberOfDPP = 0; 1264 *TotalAvailablePipesSupport = false; 1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 || 1266 *ODMMode == dm_odm_combine_mode_4to1) { 1267 *ODMMode = dm_odm_combine_mode_4to1; 1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1269 *NumberOfDPP = 4; 1270 } else { 1271 *ODMMode = dm_odm_combine_mode_2to1; 1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1273 *NumberOfDPP = 2; 1274 } 1275 } 1276 if (Output == dm_hdmi && OutFormat == dm_420 && 1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) { 1278 *ODMMode = dm_odm_combine_mode_disabled; 1279 *NumberOfDPP = 0; 1280 *TotalAvailablePipesSupport = false; 1281 } 1282 } 1283 1284 double dml32_CalculateRequiredDispclk( 1285 enum odm_combine_mode ODMMode, 1286 double PixelClock, 1287 double DISPCLKDPPCLKDSCCLKDownSpreading, 1288 double DISPCLKRampingMargin, 1289 double DISPCLKDPPCLKVCOSpeed, 1290 double MaxDispclk) 1291 { 1292 double RequiredDispclk = 0.; 1293 double PixelClockAfterODM; 1294 double DISPCLKWithRampingRoundedToDFSGranularity; 1295 double DISPCLKWithoutRampingRoundedToDFSGranularity; 1296 double MaxDispclkRoundedDownToDFSGranularity; 1297 1298 if (ODMMode == dm_odm_combine_mode_4to1) 1299 PixelClockAfterODM = PixelClock / 4; 1300 else if (ODMMode == dm_odm_combine_mode_2to1) 1301 PixelClockAfterODM = PixelClock / 2; 1302 else 1303 PixelClockAfterODM = PixelClock; 1304 1305 1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) 1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); 1309 1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); 1312 1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); 1314 1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; 1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; 1319 else 1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; 1321 1322 return RequiredDispclk; 1323 } 1324 1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) 1326 { 1327 if (Clock <= 0.0) 1328 return 0.0; 1329 1330 if (round_up) 1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); 1332 else 1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); 1334 } 1335 1336 void dml32_CalculateOutputLink( 1337 double PHYCLKPerState, 1338 double PHYCLKD18PerState, 1339 double PHYCLKD32PerState, 1340 double Downspreading, 1341 bool IsMainSurfaceUsingTheIndicatedTiming, 1342 enum output_encoder_class Output, 1343 enum output_format_class OutputFormat, 1344 unsigned int HTotal, 1345 unsigned int HActive, 1346 double PixelClockBackEnd, 1347 double ForcedOutputLinkBPP, 1348 unsigned int DSCInputBitPerComponent, 1349 unsigned int NumberOfDSCSlices, 1350 double AudioSampleRate, 1351 unsigned int AudioSampleLayout, 1352 enum odm_combine_mode ODMModeNoDSC, 1353 enum odm_combine_mode ODMModeDSC, 1354 bool DSCEnable, 1355 unsigned int OutputLinkDPLanes, 1356 enum dm_output_link_dp_rate OutputLinkDPRate, 1357 1358 /* Output */ 1359 bool *RequiresDSC, 1360 double *RequiresFEC, 1361 double *OutBpp, 1362 enum dm_output_type *OutputType, 1363 enum dm_output_rate *OutputRate, 1364 unsigned int *RequiredSlots) 1365 { 1366 bool LinkDSCEnable; 1367 unsigned int dummy; 1368 *RequiresDSC = false; 1369 *RequiresFEC = false; 1370 *OutBpp = 0; 1371 *OutputType = dm_output_type_unknown; 1372 *OutputRate = dm_output_rate_unknown; 1373 1374 if (IsMainSurfaceUsingTheIndicatedTiming) { 1375 if (Output == dm_hdmi) { 1376 *RequiresDSC = false; 1377 *RequiresFEC = false; 1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, 1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, 1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1381 ODMModeNoDSC, ODMModeDSC, &dummy); 1382 //OutputTypeAndRate = "HDMI"; 1383 *OutputType = dm_output_type_hdmi; 1384 1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { 1386 if (DSCEnable == true) { 1387 *RequiresDSC = true; 1388 LinkDSCEnable = true; 1389 if (Output == dm_dp || Output == dm_dp2p0) 1390 *RequiresFEC = true; 1391 else 1392 *RequiresFEC = false; 1393 } else { 1394 *RequiresDSC = false; 1395 LinkDSCEnable = false; 1396 if (Output == dm_dp2p0) 1397 *RequiresFEC = true; 1398 else 1399 *RequiresFEC = false; 1400 } 1401 if (Output == dm_dp2p0) { 1402 *OutBpp = 0; 1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && 1404 PHYCLKD32PerState >= 10000 / 32) { 1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && 1411 ForcedOutputLinkBPP == 0) { 1412 *RequiresDSC = true; 1413 LinkDSCEnable = true; 1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1416 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1417 OutputFormat, DSCInputBitPerComponent, 1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1420 } 1421 //OutputTypeAndRate = Output & " UHBR10"; 1422 *OutputType = dm_output_type_dp2p0; 1423 *OutputRate = dm_output_rate_dp_rate_uhbr10; 1424 } 1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && 1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { 1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1432 1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && 1434 ForcedOutputLinkBPP == 0) { 1435 *RequiresDSC = true; 1436 LinkDSCEnable = true; 1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1439 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1440 OutputFormat, DSCInputBitPerComponent, 1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1443 } 1444 //OutputTypeAndRate = Output & " UHBR13p5"; 1445 *OutputType = dm_output_type_dp2p0; 1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5; 1447 } 1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && 1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { 1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1456 *RequiresDSC = true; 1457 LinkDSCEnable = true; 1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1460 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1461 OutputFormat, DSCInputBitPerComponent, 1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1464 } 1465 //OutputTypeAndRate = Output & " UHBR20"; 1466 *OutputType = dm_output_type_dp2p0; 1467 *OutputRate = dm_output_rate_dp_rate_uhbr20; 1468 } 1469 } else { 1470 *OutBpp = 0; 1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && 1472 PHYCLKPerState >= 270) { 1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && 1479 ForcedOutputLinkBPP == 0) { 1480 *RequiresDSC = true; 1481 LinkDSCEnable = true; 1482 if (Output == dm_dp) 1483 *RequiresFEC = true; 1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1486 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1487 OutputFormat, DSCInputBitPerComponent, 1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1490 } 1491 //OutputTypeAndRate = Output & " HBR"; 1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1493 *OutputRate = dm_output_rate_dp_rate_hbr; 1494 } 1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && 1496 *OutBpp == 0 && PHYCLKPerState >= 540) { 1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1502 1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && 1504 ForcedOutputLinkBPP == 0) { 1505 *RequiresDSC = true; 1506 LinkDSCEnable = true; 1507 if (Output == dm_dp) 1508 *RequiresFEC = true; 1509 1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1512 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1513 OutputFormat, DSCInputBitPerComponent, 1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1516 } 1517 //OutputTypeAndRate = Output & " HBR2"; 1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1519 *OutputRate = dm_output_rate_dp_rate_hbr2; 1520 } 1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { 1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1524 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, 1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, 1527 RequiredSlots); 1528 1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1530 *RequiresDSC = true; 1531 LinkDSCEnable = true; 1532 if (Output == dm_dp) 1533 *RequiresFEC = true; 1534 1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1537 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1538 OutputFormat, DSCInputBitPerComponent, 1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1541 } 1542 //OutputTypeAndRate = Output & " HBR3"; 1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1544 *OutputRate = dm_output_rate_dp_rate_hbr3; 1545 } 1546 } 1547 } 1548 } 1549 } 1550 1551 void dml32_CalculateDPPCLK( 1552 unsigned int NumberOfActiveSurfaces, 1553 double DISPCLKDPPCLKDSCCLKDownSpreading, 1554 double DISPCLKDPPCLKVCOSpeed, 1555 double DPPCLKUsingSingleDPP[], 1556 unsigned int DPPPerSurface[], 1557 1558 /* output */ 1559 double *GlobalDPPCLK, 1560 double Dppclk[]) 1561 { 1562 unsigned int k; 1563 *GlobalDPPCLK = 0; 1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); 1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); 1567 } 1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); 1569 for (k = 0; k < NumberOfActiveSurfaces; ++k) 1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); 1571 } 1572 1573 double dml32_TruncToValidBPP( 1574 double LinkBitRate, 1575 unsigned int Lanes, 1576 unsigned int HTotal, 1577 unsigned int HActive, 1578 double PixelClock, 1579 double DesiredBPP, 1580 bool DSCEnable, 1581 enum output_encoder_class Output, 1582 enum output_format_class Format, 1583 unsigned int DSCInputBitPerComponent, 1584 unsigned int DSCSlices, 1585 unsigned int AudioRate, 1586 unsigned int AudioLayout, 1587 enum odm_combine_mode ODMModeNoDSC, 1588 enum odm_combine_mode ODMModeDSC, 1589 /* Output */ 1590 unsigned int *RequiredSlots) 1591 { 1592 double MaxLinkBPP; 1593 unsigned int MinDSCBPP; 1594 double MaxDSCBPP; 1595 unsigned int NonDSCBPP0; 1596 unsigned int NonDSCBPP1; 1597 unsigned int NonDSCBPP2; 1598 unsigned int NonDSCBPP3; 1599 1600 if (Format == dm_420) { 1601 NonDSCBPP0 = 12; 1602 NonDSCBPP1 = 15; 1603 NonDSCBPP2 = 18; 1604 MinDSCBPP = 6; 1605 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 1606 } else if (Format == dm_444) { 1607 NonDSCBPP0 = 18; 1608 NonDSCBPP1 = 24; 1609 NonDSCBPP2 = 30; 1610 NonDSCBPP3 = 36; 1611 MinDSCBPP = 8; 1612 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 1613 } else { 1614 if (Output == dm_hdmi) { 1615 NonDSCBPP0 = 24; 1616 NonDSCBPP1 = 24; 1617 NonDSCBPP2 = 24; 1618 } else { 1619 NonDSCBPP0 = 16; 1620 NonDSCBPP1 = 20; 1621 NonDSCBPP2 = 24; 1622 } 1623 if (Format == dm_n422) { 1624 MinDSCBPP = 7; 1625 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 1626 } else { 1627 MinDSCBPP = 8; 1628 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 1629 } 1630 } 1631 if (Output == dm_dp2p0) { 1632 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; 1633 } else if (DSCEnable && Output == dm_dp) { 1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 1635 } else { 1636 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 1637 } 1638 1639 if (DSCEnable) { 1640 if (ODMModeDSC == dm_odm_combine_mode_4to1) 1641 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1642 else if (ODMModeDSC == dm_odm_combine_mode_2to1) 1643 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1644 else if (ODMModeDSC == dm_odm_split_mode_1to2) 1645 MaxLinkBPP = 2 * MaxLinkBPP; 1646 } else { 1647 if (ODMModeNoDSC == dm_odm_combine_mode_4to1) 1648 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1649 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) 1650 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1651 else if (ODMModeNoDSC == dm_odm_split_mode_1to2) 1652 MaxLinkBPP = 2 * MaxLinkBPP; 1653 } 1654 1655 if (DesiredBPP == 0) { 1656 if (DSCEnable) { 1657 if (MaxLinkBPP < MinDSCBPP) 1658 return BPP_INVALID; 1659 else if (MaxLinkBPP >= MaxDSCBPP) 1660 return MaxDSCBPP; 1661 else 1662 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 1663 } else { 1664 if (MaxLinkBPP >= NonDSCBPP3) 1665 return NonDSCBPP3; 1666 else if (MaxLinkBPP >= NonDSCBPP2) 1667 return NonDSCBPP2; 1668 else if (MaxLinkBPP >= NonDSCBPP1) 1669 return NonDSCBPP1; 1670 else if (MaxLinkBPP >= NonDSCBPP0) 1671 return 16.0; 1672 else 1673 return BPP_INVALID; 1674 } 1675 } else { 1676 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || 1677 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) || 1678 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) 1679 return BPP_INVALID; 1680 else 1681 return DesiredBPP; 1682 } 1683 1684 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); 1685 1686 return BPP_INVALID; 1687 } // TruncToValidBPP 1688 1689 double dml32_RequiredDTBCLK( 1690 bool DSCEnable, 1691 double PixelClock, 1692 enum output_format_class OutputFormat, 1693 double OutputBpp, 1694 unsigned int DSCSlices, 1695 unsigned int HTotal, 1696 unsigned int HActive, 1697 unsigned int AudioRate, 1698 unsigned int AudioLayout) 1699 { 1700 double PixelWordRate; 1701 double HCActive; 1702 double HCBlank; 1703 double AverageTribyteRate; 1704 double HActiveTribyteRate; 1705 1706 if (DSCEnable != true) 1707 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); 1708 1709 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); 1710 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * 1711 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); 1712 HCBlank = 64 + 32 * 1713 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); 1714 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; 1715 HActiveTribyteRate = PixelWordRate * HCActive / HActive; 1716 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; 1717 } 1718 1719 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, 1720 enum odm_combine_mode ODMMode, 1721 unsigned int DSCInputBitPerComponent, 1722 double OutputBpp, 1723 unsigned int HActive, 1724 unsigned int HTotal, 1725 unsigned int NumberOfDSCSlices, 1726 enum output_format_class OutputFormat, 1727 enum output_encoder_class Output, 1728 double PixelClock, 1729 double PixelClockBackEnd, 1730 double dsc_delay_factor_wa) 1731 { 1732 unsigned int DSCDelayRequirement_val; 1733 1734 if (DSCEnabled == true && OutputBpp != 0) { 1735 if (ODMMode == dm_odm_combine_mode_4to1) { 1736 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1737 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, 1738 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1739 } else if (ODMMode == dm_odm_combine_mode_2to1) { 1740 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1741 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, 1742 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1743 } else { 1744 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1745 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, 1746 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); 1747 } 1748 1749 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * 1750 dml_ceil((double)DSCDelayRequirement_val / HActive, 1); 1751 1752 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; 1753 1754 } else { 1755 DSCDelayRequirement_val = 0; 1756 } 1757 1758 #ifdef __DML_VBA_DEBUG__ 1759 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); 1760 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); 1761 dml_print("DML::%s: HActive = %d\n", __func__, HActive); 1762 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); 1763 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); 1764 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); 1765 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); 1766 #endif 1767 1768 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1); 1769 } 1770 1771 void dml32_CalculateSurfaceSizeInMall( 1772 unsigned int NumberOfActiveSurfaces, 1773 unsigned int MALLAllocatedForDCN, 1774 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1775 bool DCCEnable[], 1776 bool ViewportStationary[], 1777 unsigned int ViewportXStartY[], 1778 unsigned int ViewportYStartY[], 1779 unsigned int ViewportXStartC[], 1780 unsigned int ViewportYStartC[], 1781 unsigned int ViewportWidthY[], 1782 unsigned int ViewportHeightY[], 1783 unsigned int BytesPerPixelY[], 1784 unsigned int ViewportWidthC[], 1785 unsigned int ViewportHeightC[], 1786 unsigned int BytesPerPixelC[], 1787 unsigned int SurfaceWidthY[], 1788 unsigned int SurfaceWidthC[], 1789 unsigned int SurfaceHeightY[], 1790 unsigned int SurfaceHeightC[], 1791 unsigned int Read256BytesBlockWidthY[], 1792 unsigned int Read256BytesBlockWidthC[], 1793 unsigned int Read256BytesBlockHeightY[], 1794 unsigned int Read256BytesBlockHeightC[], 1795 unsigned int ReadBlockWidthY[], 1796 unsigned int ReadBlockWidthC[], 1797 unsigned int ReadBlockHeightY[], 1798 unsigned int ReadBlockHeightC[], 1799 unsigned int DCCMetaPitchY[], 1800 unsigned int DCCMetaPitchC[], 1801 1802 /* Output */ 1803 unsigned int SurfaceSizeInMALL[], 1804 bool *ExceededMALLSize) 1805 { 1806 unsigned int TotalSurfaceSizeInMALL = 0; 1807 unsigned int k; 1808 1809 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1810 if (ViewportStationary[k]) { 1811 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), 1812 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, 1813 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 1814 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], 1815 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1816 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - 1817 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; 1818 1819 if (ReadBlockWidthC[k] > 0) { 1820 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1821 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), 1822 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 1823 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - 1824 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * 1825 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), 1826 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1827 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - 1828 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * 1829 BytesPerPixelC[k]; 1830 } 1831 if (DCCEnable[k] == true) { 1832 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1833 dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]), 1834 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * 1835 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) 1836 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) 1837 * dml_min(dml_ceil(SurfaceHeightY[k], 8 * 1838 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1839 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * 1840 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 1841 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256; 1842 if (Read256BytesBlockWidthC[k] > 0) { 1843 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1844 dml_min(dml_ceil(DCCMetaPitchC[k], 8 * 1845 Read256BytesBlockWidthC[k]), 1846 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 1847 * Read256BytesBlockWidthC[k] - 1, 8 * 1848 Read256BytesBlockWidthC[k]) - 1849 dml_floor(ViewportXStartC[k], 8 * 1850 Read256BytesBlockWidthC[k])) * 1851 dml_min(dml_ceil(SurfaceHeightC[k], 8 * 1852 Read256BytesBlockHeightC[k]), 1853 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1854 8 * Read256BytesBlockHeightC[k] - 1, 8 * 1855 Read256BytesBlockHeightC[k]) - 1856 dml_floor(ViewportYStartC[k], 8 * 1857 Read256BytesBlockHeightC[k])) * 1858 BytesPerPixelC[k] / 256; 1859 } 1860 } 1861 } else { 1862 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 1863 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * 1864 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 1865 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * 1866 BytesPerPixelY[k]; 1867 if (ReadBlockWidthC[k] > 0) { 1868 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1869 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 1870 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * 1871 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 1872 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * 1873 BytesPerPixelC[k]; 1874 } 1875 if (DCCEnable[k] == true) { 1876 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1877 dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 * 1878 Read256BytesBlockWidthY[k] - 1), 8 * 1879 Read256BytesBlockWidthY[k]) * 1880 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * 1881 Read256BytesBlockHeightY[k] - 1), 8 * 1882 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256; 1883 1884 if (Read256BytesBlockWidthC[k] > 0) { 1885 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1886 dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 * 1887 Read256BytesBlockWidthC[k] - 1), 8 * 1888 Read256BytesBlockWidthC[k]) * 1889 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * 1890 Read256BytesBlockHeightC[k] - 1), 8 * 1891 Read256BytesBlockHeightC[k]) * 1892 BytesPerPixelC[k] / 256; 1893 } 1894 } 1895 } 1896 } 1897 1898 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1899 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) 1900 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 1901 } 1902 *ExceededMALLSize = (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024); 1903 } // CalculateSurfaceSizeInMall 1904 1905 void dml32_CalculateVMRowAndSwath( 1906 unsigned int NumberOfActiveSurfaces, 1907 DmlPipe myPipe[], 1908 unsigned int SurfaceSizeInMALL[], 1909 unsigned int PTEBufferSizeInRequestsLuma, 1910 unsigned int PTEBufferSizeInRequestsChroma, 1911 unsigned int DCCMetaBufferSizeBytes, 1912 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1913 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 1914 unsigned int MALLAllocatedForDCN, 1915 double SwathWidthY[], 1916 double SwathWidthC[], 1917 bool GPUVMEnable, 1918 bool HostVMEnable, 1919 unsigned int HostVMMaxNonCachedPageTableLevels, 1920 unsigned int GPUVMMaxPageTableLevels, 1921 unsigned int GPUVMMinPageSizeKBytes[], 1922 unsigned int HostVMMinPageSize, 1923 1924 /* Output */ 1925 bool PTEBufferSizeNotExceeded[], 1926 bool DCCMetaBufferSizeNotExceeded[], 1927 unsigned int dpte_row_width_luma_ub[], 1928 unsigned int dpte_row_width_chroma_ub[], 1929 unsigned int dpte_row_height_luma[], 1930 unsigned int dpte_row_height_chroma[], 1931 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA 1932 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA 1933 unsigned int meta_req_width[], 1934 unsigned int meta_req_width_chroma[], 1935 unsigned int meta_req_height[], 1936 unsigned int meta_req_height_chroma[], 1937 unsigned int meta_row_width[], 1938 unsigned int meta_row_width_chroma[], 1939 unsigned int meta_row_height[], 1940 unsigned int meta_row_height_chroma[], 1941 unsigned int vm_group_bytes[], 1942 unsigned int dpte_group_bytes[], 1943 unsigned int PixelPTEReqWidthY[], 1944 unsigned int PixelPTEReqHeightY[], 1945 unsigned int PTERequestSizeY[], 1946 unsigned int PixelPTEReqWidthC[], 1947 unsigned int PixelPTEReqHeightC[], 1948 unsigned int PTERequestSizeC[], 1949 unsigned int dpde0_bytes_per_frame_ub_l[], 1950 unsigned int meta_pte_bytes_per_frame_ub_l[], 1951 unsigned int dpde0_bytes_per_frame_ub_c[], 1952 unsigned int meta_pte_bytes_per_frame_ub_c[], 1953 double PrefetchSourceLinesY[], 1954 double PrefetchSourceLinesC[], 1955 double VInitPreFillY[], 1956 double VInitPreFillC[], 1957 unsigned int MaxNumSwathY[], 1958 unsigned int MaxNumSwathC[], 1959 double meta_row_bw[], 1960 double dpte_row_bw[], 1961 double PixelPTEBytesPerRow[], 1962 double PDEAndMetaPTEBytesFrame[], 1963 double MetaRowByte[], 1964 bool use_one_row_for_frame[], 1965 bool use_one_row_for_frame_flip[], 1966 bool UsesMALLForStaticScreen[], 1967 bool PTE_BUFFER_MODE[], 1968 unsigned int BIGK_FRAGMENT_SIZE[]) 1969 { 1970 unsigned int k; 1971 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX]; 1972 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX]; 1973 unsigned int PDEAndMetaPTEBytesFrameY; 1974 unsigned int PDEAndMetaPTEBytesFrameC; 1975 unsigned int MetaRowByteY[DC__NUM_DPP__MAX]; 1976 unsigned int MetaRowByteC[DC__NUM_DPP__MAX]; 1977 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX]; 1978 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX]; 1979 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX]; 1980 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX]; 1981 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1982 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX]; 1983 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX]; 1984 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX]; 1985 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX]; 1986 1987 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1988 if (HostVMEnable == true) { 1989 vm_group_bytes[k] = 512; 1990 dpte_group_bytes[k] = 512; 1991 } else if (GPUVMEnable == true) { 1992 vm_group_bytes[k] = 2048; 1993 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) 1994 dpte_group_bytes[k] = 512; 1995 else 1996 dpte_group_bytes[k] = 2048; 1997 } else { 1998 vm_group_bytes[k] = 0; 1999 dpte_group_bytes[k] = 0; 2000 } 2001 2002 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || 2003 myPipe[k].SourcePixelFormat == dm_420_12 || 2004 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { 2005 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && 2006 !IsVertical(myPipe[k].SourceRotation)) { 2007 PTEBufferSizeInRequestsForLuma[k] = 2008 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; 2009 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k]; 2010 } else { 2011 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 2012 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 2013 } 2014 2015 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 2016 myPipe[k].ViewportStationary, 2017 myPipe[k].DCCEnable, 2018 myPipe[k].DPPPerSurface, 2019 myPipe[k].BlockHeight256BytesC, 2020 myPipe[k].BlockWidth256BytesC, 2021 myPipe[k].SourcePixelFormat, 2022 myPipe[k].SurfaceTiling, 2023 myPipe[k].BytePerPixelC, 2024 myPipe[k].SourceRotation, 2025 SwathWidthC[k], 2026 myPipe[k].ViewportHeightChroma, 2027 myPipe[k].ViewportXStartC, 2028 myPipe[k].ViewportYStartC, 2029 GPUVMEnable, 2030 HostVMEnable, 2031 HostVMMaxNonCachedPageTableLevels, 2032 GPUVMMaxPageTableLevels, 2033 GPUVMMinPageSizeKBytes[k], 2034 HostVMMinPageSize, 2035 PTEBufferSizeInRequestsForChroma[k], 2036 myPipe[k].PitchC, 2037 myPipe[k].DCCMetaPitchC, 2038 myPipe[k].BlockWidthC, 2039 myPipe[k].BlockHeightC, 2040 2041 /* Output */ 2042 &MetaRowByteC[k], 2043 &PixelPTEBytesPerRowC[k], 2044 &dpte_row_width_chroma_ub[k], 2045 &dpte_row_height_chroma[k], 2046 &dpte_row_height_linear_chroma[k], 2047 &PixelPTEBytesPerRowC_one_row_per_frame[k], 2048 &dpte_row_width_chroma_ub_one_row_per_frame[k], 2049 &dpte_row_height_chroma_one_row_per_frame[k], 2050 &meta_req_width_chroma[k], 2051 &meta_req_height_chroma[k], 2052 &meta_row_width_chroma[k], 2053 &meta_row_height_chroma[k], 2054 &PixelPTEReqWidthC[k], 2055 &PixelPTEReqHeightC[k], 2056 &PTERequestSizeC[k], 2057 &dpde0_bytes_per_frame_ub_c[k], 2058 &meta_pte_bytes_per_frame_ub_c[k]); 2059 2060 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( 2061 myPipe[k].VRatioChroma, 2062 myPipe[k].VTapsChroma, 2063 myPipe[k].InterlaceEnable, 2064 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2065 myPipe[k].SwathHeightC, 2066 myPipe[k].SourceRotation, 2067 myPipe[k].ViewportStationary, 2068 SwathWidthC[k], 2069 myPipe[k].ViewportHeightChroma, 2070 myPipe[k].ViewportXStartC, 2071 myPipe[k].ViewportYStartC, 2072 2073 /* Output */ 2074 &VInitPreFillC[k], 2075 &MaxNumSwathC[k]); 2076 } else { 2077 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2078 PTEBufferSizeInRequestsForChroma[k] = 0; 2079 PixelPTEBytesPerRowC[k] = 0; 2080 PDEAndMetaPTEBytesFrameC = 0; 2081 MetaRowByteC[k] = 0; 2082 MaxNumSwathC[k] = 0; 2083 PrefetchSourceLinesC[k] = 0; 2084 dpte_row_height_chroma_one_row_per_frame[k] = 0; 2085 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2086 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2087 } 2088 2089 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2090 myPipe[k].ViewportStationary, 2091 myPipe[k].DCCEnable, 2092 myPipe[k].DPPPerSurface, 2093 myPipe[k].BlockHeight256BytesY, 2094 myPipe[k].BlockWidth256BytesY, 2095 myPipe[k].SourcePixelFormat, 2096 myPipe[k].SurfaceTiling, 2097 myPipe[k].BytePerPixelY, 2098 myPipe[k].SourceRotation, 2099 SwathWidthY[k], 2100 myPipe[k].ViewportHeight, 2101 myPipe[k].ViewportXStart, 2102 myPipe[k].ViewportYStart, 2103 GPUVMEnable, 2104 HostVMEnable, 2105 HostVMMaxNonCachedPageTableLevels, 2106 GPUVMMaxPageTableLevels, 2107 GPUVMMinPageSizeKBytes[k], 2108 HostVMMinPageSize, 2109 PTEBufferSizeInRequestsForLuma[k], 2110 myPipe[k].PitchY, 2111 myPipe[k].DCCMetaPitchY, 2112 myPipe[k].BlockWidthY, 2113 myPipe[k].BlockHeightY, 2114 2115 /* Output */ 2116 &MetaRowByteY[k], 2117 &PixelPTEBytesPerRowY[k], 2118 &dpte_row_width_luma_ub[k], 2119 &dpte_row_height_luma[k], 2120 &dpte_row_height_linear_luma[k], 2121 &PixelPTEBytesPerRowY_one_row_per_frame[k], 2122 &dpte_row_width_luma_ub_one_row_per_frame[k], 2123 &dpte_row_height_luma_one_row_per_frame[k], 2124 &meta_req_width[k], 2125 &meta_req_height[k], 2126 &meta_row_width[k], 2127 &meta_row_height[k], 2128 &PixelPTEReqWidthY[k], 2129 &PixelPTEReqHeightY[k], 2130 &PTERequestSizeY[k], 2131 &dpde0_bytes_per_frame_ub_l[k], 2132 &meta_pte_bytes_per_frame_ub_l[k]); 2133 2134 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( 2135 myPipe[k].VRatio, 2136 myPipe[k].VTaps, 2137 myPipe[k].InterlaceEnable, 2138 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2139 myPipe[k].SwathHeightY, 2140 myPipe[k].SourceRotation, 2141 myPipe[k].ViewportStationary, 2142 SwathWidthY[k], 2143 myPipe[k].ViewportHeight, 2144 myPipe[k].ViewportXStart, 2145 myPipe[k].ViewportYStart, 2146 2147 /* Output */ 2148 &VInitPreFillY[k], 2149 &MaxNumSwathY[k]); 2150 2151 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2152 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k]; 2153 2154 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] && 2155 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) { 2156 PTEBufferSizeNotExceeded[k] = true; 2157 } else { 2158 PTEBufferSizeNotExceeded[k] = false; 2159 } 2160 2161 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2162 PTEBufferSizeInRequestsForLuma[k] && 2163 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]); 2164 } 2165 2166 dml32_CalculateMALLUseForStaticScreen( 2167 NumberOfActiveSurfaces, 2168 MALLAllocatedForDCN, 2169 UseMALLForStaticScreen, // mode 2170 SurfaceSizeInMALL, 2171 one_row_per_frame_fits_in_buffer, 2172 /* Output */ 2173 UsesMALLForStaticScreen); // boolen 2174 2175 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2176 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2177 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2178 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2179 (GPUVMMinPageSizeKBytes[k] > 64); 2180 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; 2181 } 2182 2183 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2184 #ifdef __DML_VBA_DEBUG__ 2185 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); 2186 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2187 #endif 2188 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2189 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2190 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2191 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); 2192 2193 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && 2194 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); 2195 2196 if (use_one_row_for_frame[k]) { 2197 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k]; 2198 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k]; 2199 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k]; 2200 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k]; 2201 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k]; 2202 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k]; 2203 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k]; 2204 } 2205 2206 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) 2207 DCCMetaBufferSizeNotExceeded[k] = true; 2208 else 2209 DCCMetaBufferSizeNotExceeded[k] = false; 2210 2211 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k]; 2212 if (use_one_row_for_frame[k]) 2213 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; 2214 2215 dml32_CalculateRowBandwidth( 2216 GPUVMEnable, 2217 myPipe[k].SourcePixelFormat, 2218 myPipe[k].VRatio, 2219 myPipe[k].VRatioChroma, 2220 myPipe[k].DCCEnable, 2221 myPipe[k].HTotal / myPipe[k].PixelClock, 2222 MetaRowByteY[k], MetaRowByteC[k], 2223 meta_row_height[k], 2224 meta_row_height_chroma[k], 2225 PixelPTEBytesPerRowY[k], 2226 PixelPTEBytesPerRowC[k], 2227 dpte_row_height_luma[k], 2228 dpte_row_height_chroma[k], 2229 2230 /* Output */ 2231 &meta_row_bw[k], 2232 &dpte_row_bw[k]); 2233 #ifdef __DML_VBA_DEBUG__ 2234 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); 2235 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", 2236 __func__, k, use_one_row_for_frame_flip[k]); 2237 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", 2238 __func__, k, UseMALLForPStateChange[k]); 2239 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); 2240 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", 2241 __func__, k, dpte_row_width_luma_ub[k]); 2242 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]); 2243 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", 2244 __func__, k, dpte_row_height_chroma[k]); 2245 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", 2246 __func__, k, dpte_row_width_chroma_ub[k]); 2247 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]); 2248 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); 2249 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", 2250 __func__, k, PTEBufferSizeNotExceeded[k]); 2251 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); 2252 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); 2253 #endif 2254 } 2255 } // CalculateVMRowAndSwath 2256 2257 unsigned int dml32_CalculateVMAndRowBytes( 2258 bool ViewportStationary, 2259 bool DCCEnable, 2260 unsigned int NumberOfDPPs, 2261 unsigned int BlockHeight256Bytes, 2262 unsigned int BlockWidth256Bytes, 2263 enum source_format_class SourcePixelFormat, 2264 unsigned int SurfaceTiling, 2265 unsigned int BytePerPixel, 2266 enum dm_rotation_angle SourceRotation, 2267 double SwathWidth, 2268 unsigned int ViewportHeight, 2269 unsigned int ViewportXStart, 2270 unsigned int ViewportYStart, 2271 bool GPUVMEnable, 2272 bool HostVMEnable, 2273 unsigned int HostVMMaxNonCachedPageTableLevels, 2274 unsigned int GPUVMMaxPageTableLevels, 2275 unsigned int GPUVMMinPageSizeKBytes, 2276 unsigned int HostVMMinPageSize, 2277 unsigned int PTEBufferSizeInRequests, 2278 unsigned int Pitch, 2279 unsigned int DCCMetaPitch, 2280 unsigned int MacroTileWidth, 2281 unsigned int MacroTileHeight, 2282 2283 /* Output */ 2284 unsigned int *MetaRowByte, 2285 unsigned int *PixelPTEBytesPerRow, 2286 unsigned int *dpte_row_width_ub, 2287 unsigned int *dpte_row_height, 2288 unsigned int *dpte_row_height_linear, 2289 unsigned int *PixelPTEBytesPerRow_one_row_per_frame, 2290 unsigned int *dpte_row_width_ub_one_row_per_frame, 2291 unsigned int *dpte_row_height_one_row_per_frame, 2292 unsigned int *MetaRequestWidth, 2293 unsigned int *MetaRequestHeight, 2294 unsigned int *meta_row_width, 2295 unsigned int *meta_row_height, 2296 unsigned int *PixelPTEReqWidth, 2297 unsigned int *PixelPTEReqHeight, 2298 unsigned int *PTERequestSize, 2299 unsigned int *DPDE0BytesFrame, 2300 unsigned int *MetaPTEBytesFrame) 2301 { 2302 unsigned int MPDEBytesFrame; 2303 unsigned int DCCMetaSurfaceBytes; 2304 unsigned int ExtraDPDEBytesFrame; 2305 unsigned int PDEAndMetaPTEBytesFrame; 2306 unsigned int HostVMDynamicLevels = 0; 2307 unsigned int MacroTileSizeBytes; 2308 unsigned int vp_height_meta_ub; 2309 unsigned int vp_height_dpte_ub; 2310 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this 2311 2312 if (GPUVMEnable == true && HostVMEnable == true) { 2313 if (HostVMMinPageSize < 2048) 2314 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 2315 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 2316 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 2317 else 2318 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 2319 } 2320 2321 *MetaRequestHeight = 8 * BlockHeight256Bytes; 2322 *MetaRequestWidth = 8 * BlockWidth256Bytes; 2323 if (SurfaceTiling == dm_sw_linear) { 2324 *meta_row_height = 32; 2325 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) 2326 - dml_floor(ViewportXStart, *MetaRequestWidth); 2327 } else if (!IsVertical(SourceRotation)) { 2328 *meta_row_height = *MetaRequestHeight; 2329 if (ViewportStationary && NumberOfDPPs == 1) { 2330 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, 2331 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); 2332 } else { 2333 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 2334 } 2335 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 2336 } else { 2337 *meta_row_height = *MetaRequestWidth; 2338 if (ViewportStationary && NumberOfDPPs == 1) { 2339 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, 2340 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); 2341 } else { 2342 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 2343 } 2344 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 2345 } 2346 2347 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2348 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 2349 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); 2350 } else if (!IsVertical(SourceRotation)) { 2351 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2352 } else { 2353 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2354 } 2355 2356 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; 2357 2358 if (GPUVMEnable == true) { 2359 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / 2360 (8 * 4.0 * 1024), 1) + 1) * 64; 2361 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); 2362 } else { 2363 *MetaPTEBytesFrame = 0; 2364 MPDEBytesFrame = 0; 2365 } 2366 2367 if (DCCEnable != true) { 2368 *MetaPTEBytesFrame = 0; 2369 MPDEBytesFrame = 0; 2370 *MetaRowByte = 0; 2371 } 2372 2373 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; 2374 2375 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { 2376 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2377 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + 2378 MacroTileHeight - 1, MacroTileHeight) - 2379 dml_floor(ViewportYStart, MacroTileHeight); 2380 } else if (!IsVertical(SourceRotation)) { 2381 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; 2382 } else { 2383 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; 2384 } 2385 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / 2386 (8 * 2097152), 1) + 1); 2387 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); 2388 } else { 2389 *DPDE0BytesFrame = 0; 2390 ExtraDPDEBytesFrame = 0; 2391 vp_height_dpte_ub = 0; 2392 } 2393 2394 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2395 2396 #ifdef __DML_VBA_DEBUG__ 2397 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 2398 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 2399 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); 2400 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); 2401 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); 2402 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); 2403 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); 2404 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); 2405 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); 2406 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2407 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2408 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2409 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2410 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2411 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); 2412 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); 2413 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); 2414 #endif 2415 2416 if (HostVMEnable == true) 2417 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2418 2419 if (SurfaceTiling == dm_sw_linear) { 2420 *PixelPTEReqHeight = 1; 2421 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2422 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2423 *PTERequestSize = 64; 2424 } else if (GPUVMMinPageSizeKBytes == 4) { 2425 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2426 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2427 *PTERequestSize = 128; 2428 } else { 2429 *PixelPTEReqHeight = MacroTileHeight; 2430 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); 2431 *PTERequestSize = 64; 2432 } 2433 #ifdef __DML_VBA_DEBUG__ 2434 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2435 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); 2436 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); 2437 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); 2438 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); 2439 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); 2440 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); 2441 #endif 2442 2443 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; 2444 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / 2445 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * 2446 (double) *PixelPTEReqWidth; 2447 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * 2448 *PTERequestSize; 2449 2450 if (SurfaceTiling == dm_sw_linear) { 2451 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2452 *PixelPTEReqWidth / Pitch), 1)); 2453 #ifdef __DML_VBA_DEBUG__ 2454 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, 2455 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); 2456 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, 2457 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); 2458 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, 2459 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2460 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, 2461 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2462 *PixelPTEReqWidth / Pitch), 1)); 2463 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2464 #endif 2465 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), 2466 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; 2467 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; 2468 2469 // VBA_DELTA, VBA doesn't have programming value for pte row height linear. 2470 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2471 PixelPTEReqWidth_linear / Pitch), 1); 2472 if (*dpte_row_height_linear > 128) 2473 *dpte_row_height_linear = 128; 2474 2475 } else if (!IsVertical(SourceRotation)) { 2476 *dpte_row_height = *PixelPTEReqHeight; 2477 2478 if (GPUVMMinPageSizeKBytes > 64) { 2479 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / 2480 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2481 } else if (ViewportStationary && (NumberOfDPPs == 1)) { 2482 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + 2483 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - 2484 dml_floor(ViewportXStart, *PixelPTEReqWidth); 2485 } else { 2486 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * 2487 *PixelPTEReqWidth; 2488 } 2489 2490 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2491 } else { 2492 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); 2493 2494 if (ViewportStationary && (NumberOfDPPs == 1)) { 2495 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, 2496 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); 2497 } else { 2498 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) 2499 * *PixelPTEReqHeight; 2500 } 2501 2502 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2503 } 2504 2505 if (GPUVMEnable != true) 2506 *PixelPTEBytesPerRow = 0; 2507 if (HostVMEnable == true) 2508 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2509 2510 #ifdef __DML_VBA_DEBUG__ 2511 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2512 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2513 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); 2514 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); 2515 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); 2516 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); 2517 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); 2518 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", 2519 __func__, *dpte_row_width_ub_one_row_per_frame); 2520 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", 2521 __func__, *PixelPTEBytesPerRow_one_row_per_frame); 2522 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", 2523 *MetaPTEBytesFrame); 2524 #endif 2525 2526 return PDEAndMetaPTEBytesFrame; 2527 } // CalculateVMAndRowBytes 2528 2529 double dml32_CalculatePrefetchSourceLines( 2530 double VRatio, 2531 unsigned int VTaps, 2532 bool Interlace, 2533 bool ProgressiveToInterlaceUnitInOPP, 2534 unsigned int SwathHeight, 2535 enum dm_rotation_angle SourceRotation, 2536 bool ViewportStationary, 2537 double SwathWidth, 2538 unsigned int ViewportHeight, 2539 unsigned int ViewportXStart, 2540 unsigned int ViewportYStart, 2541 2542 /* Output */ 2543 double *VInitPreFill, 2544 unsigned int *MaxNumSwath) 2545 { 2546 2547 unsigned int vp_start_rot; 2548 unsigned int sw0_tmp; 2549 unsigned int MaxPartialSwath; 2550 double numLines; 2551 2552 #ifdef __DML_VBA_DEBUG__ 2553 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 2554 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); 2555 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); 2556 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); 2557 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); 2558 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 2559 #endif 2560 if (ProgressiveToInterlaceUnitInOPP) 2561 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); 2562 else 2563 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 2564 2565 if (ViewportStationary) { 2566 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { 2567 vp_start_rot = SwathHeight - 2568 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); 2569 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { 2570 vp_start_rot = ViewportXStart; 2571 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { 2572 vp_start_rot = SwathHeight - 2573 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); 2574 } else { 2575 vp_start_rot = ViewportYStart; 2576 } 2577 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); 2578 if (sw0_tmp < *VInitPreFill) 2579 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; 2580 else 2581 *MaxNumSwath = 1; 2582 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); 2583 } else { 2584 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; 2585 if (*VInitPreFill > 1) 2586 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); 2587 else 2588 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); 2589 } 2590 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; 2591 2592 #ifdef __DML_VBA_DEBUG__ 2593 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); 2594 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); 2595 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 2596 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 2597 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); 2598 #endif 2599 return numLines; 2600 2601 } // CalculatePrefetchSourceLines 2602 2603 void dml32_CalculateMALLUseForStaticScreen( 2604 unsigned int NumberOfActiveSurfaces, 2605 unsigned int MALLAllocatedForDCNFinal, 2606 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, 2607 unsigned int SurfaceSizeInMALL[], 2608 bool one_row_per_frame_fits_in_buffer[], 2609 2610 /* output */ 2611 bool UsesMALLForStaticScreen[]) 2612 { 2613 unsigned int k; 2614 unsigned int SurfaceToAddToMALL; 2615 bool CanAddAnotherSurfaceToMALL; 2616 unsigned int TotalSurfaceSizeInMALL; 2617 2618 TotalSurfaceSizeInMALL = 0; 2619 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2620 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); 2621 if (UsesMALLForStaticScreen[k]) 2622 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 2623 #ifdef __DML_VBA_DEBUG__ 2624 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2625 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); 2626 #endif 2627 } 2628 2629 SurfaceToAddToMALL = 0; 2630 CanAddAnotherSurfaceToMALL = true; 2631 while (CanAddAnotherSurfaceToMALL) { 2632 CanAddAnotherSurfaceToMALL = false; 2633 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2634 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && 2635 !UsesMALLForStaticScreen[k] && 2636 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && 2637 one_row_per_frame_fits_in_buffer[k] && 2638 (!CanAddAnotherSurfaceToMALL || 2639 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { 2640 CanAddAnotherSurfaceToMALL = true; 2641 SurfaceToAddToMALL = k; 2642 #ifdef __DML_VBA_DEBUG__ 2643 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", 2644 __func__, k, UseMALLForStaticScreen[k]); 2645 #endif 2646 } 2647 } 2648 if (CanAddAnotherSurfaceToMALL) { 2649 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; 2650 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; 2651 2652 #ifdef __DML_VBA_DEBUG__ 2653 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); 2654 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); 2655 #endif 2656 2657 } 2658 } 2659 } 2660 2661 void dml32_CalculateRowBandwidth( 2662 bool GPUVMEnable, 2663 enum source_format_class SourcePixelFormat, 2664 double VRatio, 2665 double VRatioChroma, 2666 bool DCCEnable, 2667 double LineTime, 2668 unsigned int MetaRowByteLuma, 2669 unsigned int MetaRowByteChroma, 2670 unsigned int meta_row_height_luma, 2671 unsigned int meta_row_height_chroma, 2672 unsigned int PixelPTEBytesPerRowLuma, 2673 unsigned int PixelPTEBytesPerRowChroma, 2674 unsigned int dpte_row_height_luma, 2675 unsigned int dpte_row_height_chroma, 2676 /* Output */ 2677 double *meta_row_bw, 2678 double *dpte_row_bw) 2679 { 2680 if (DCCEnable != true) { 2681 *meta_row_bw = 0; 2682 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2683 SourcePixelFormat == dm_rgbe_alpha) { 2684 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * 2685 MetaRowByteChroma / (meta_row_height_chroma * LineTime); 2686 } else { 2687 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 2688 } 2689 2690 if (GPUVMEnable != true) { 2691 *dpte_row_bw = 0; 2692 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2693 SourcePixelFormat == dm_rgbe_alpha) { 2694 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + 2695 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 2696 } else { 2697 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 2698 } 2699 } 2700 2701 double dml32_CalculateUrgentLatency( 2702 double UrgentLatencyPixelDataOnly, 2703 double UrgentLatencyPixelMixedWithVMData, 2704 double UrgentLatencyVMDataOnly, 2705 bool DoUrgentLatencyAdjustment, 2706 double UrgentLatencyAdjustmentFabricClockComponent, 2707 double UrgentLatencyAdjustmentFabricClockReference, 2708 double FabricClock) 2709 { 2710 double ret; 2711 2712 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 2713 if (DoUrgentLatencyAdjustment == true) { 2714 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * 2715 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 2716 } 2717 return ret; 2718 } 2719 2720 void dml32_CalculateUrgentBurstFactor( 2721 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 2722 unsigned int swath_width_luma_ub, 2723 unsigned int swath_width_chroma_ub, 2724 unsigned int SwathHeightY, 2725 unsigned int SwathHeightC, 2726 double LineTime, 2727 double UrgentLatency, 2728 double CursorBufferSize, 2729 unsigned int CursorWidth, 2730 unsigned int CursorBPP, 2731 double VRatio, 2732 double VRatioC, 2733 double BytePerPixelInDETY, 2734 double BytePerPixelInDETC, 2735 unsigned int DETBufferSizeY, 2736 unsigned int DETBufferSizeC, 2737 /* Output */ 2738 double *UrgentBurstFactorCursor, 2739 double *UrgentBurstFactorLuma, 2740 double *UrgentBurstFactorChroma, 2741 bool *NotEnoughUrgentLatencyHiding) 2742 { 2743 double LinesInDETLuma; 2744 double LinesInDETChroma; 2745 unsigned int LinesInCursorBuffer; 2746 double CursorBufferSizeInTime; 2747 double DETBufferSizeInTimeLuma; 2748 double DETBufferSizeInTimeChroma; 2749 2750 *NotEnoughUrgentLatencyHiding = 0; 2751 2752 if (CursorWidth > 0) { 2753 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / 2754 (CursorWidth * CursorBPP / 8.0)), 1.0); 2755 if (VRatio > 0) { 2756 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 2757 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 2758 *NotEnoughUrgentLatencyHiding = 1; 2759 *UrgentBurstFactorCursor = 0; 2760 } else { 2761 *UrgentBurstFactorCursor = CursorBufferSizeInTime / 2762 (CursorBufferSizeInTime - UrgentLatency); 2763 } 2764 } else { 2765 *UrgentBurstFactorCursor = 1; 2766 } 2767 } 2768 2769 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : 2770 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; 2771 2772 if (VRatio > 0) { 2773 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 2774 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 2775 *NotEnoughUrgentLatencyHiding = 1; 2776 *UrgentBurstFactorLuma = 0; 2777 } else { 2778 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 2779 } 2780 } else { 2781 *UrgentBurstFactorLuma = 1; 2782 } 2783 2784 if (BytePerPixelInDETC > 0) { 2785 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 2786 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC 2787 / swath_width_chroma_ub; 2788 2789 if (VRatio > 0) { 2790 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 2791 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 2792 *NotEnoughUrgentLatencyHiding = 1; 2793 *UrgentBurstFactorChroma = 0; 2794 } else { 2795 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma 2796 / (DETBufferSizeInTimeChroma - UrgentLatency); 2797 } 2798 } else { 2799 *UrgentBurstFactorChroma = 1; 2800 } 2801 } 2802 } // CalculateUrgentBurstFactor 2803 2804 void dml32_CalculateDCFCLKDeepSleep( 2805 unsigned int NumberOfActiveSurfaces, 2806 unsigned int BytePerPixelY[], 2807 unsigned int BytePerPixelC[], 2808 double VRatio[], 2809 double VRatioChroma[], 2810 double SwathWidthY[], 2811 double SwathWidthC[], 2812 unsigned int DPPPerSurface[], 2813 double HRatio[], 2814 double HRatioChroma[], 2815 double PixelClock[], 2816 double PSCL_THROUGHPUT[], 2817 double PSCL_THROUGHPUT_CHROMA[], 2818 double Dppclk[], 2819 double ReadBandwidthLuma[], 2820 double ReadBandwidthChroma[], 2821 unsigned int ReturnBusWidth, 2822 2823 /* Output */ 2824 double *DCFClkDeepSleep) 2825 { 2826 unsigned int k; 2827 double DisplayPipeLineDeliveryTimeLuma; 2828 double DisplayPipeLineDeliveryTimeChroma; 2829 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; 2830 double ReadBandwidth = 0.0; 2831 2832 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2833 2834 if (VRatio[k] <= 1) { 2835 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] 2836 / PixelClock[k]; 2837 } else { 2838 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 2839 } 2840 if (BytePerPixelC[k] == 0) { 2841 DisplayPipeLineDeliveryTimeChroma = 0; 2842 } else { 2843 if (VRatioChroma[k] <= 1) { 2844 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * 2845 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 2846 } else { 2847 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] 2848 / Dppclk[k]; 2849 } 2850 } 2851 2852 if (BytePerPixelC[k] > 0) { 2853 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * 2854 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 2855 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 2856 32.0 / DisplayPipeLineDeliveryTimeChroma); 2857 } else { 2858 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 2859 64.0 / DisplayPipeLineDeliveryTimeLuma; 2860 } 2861 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); 2862 2863 #ifdef __DML_VBA_DEBUG__ 2864 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); 2865 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 2866 #endif 2867 } 2868 2869 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2870 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 2871 2872 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); 2873 2874 #ifdef __DML_VBA_DEBUG__ 2875 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); 2876 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); 2877 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); 2878 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); 2879 #endif 2880 2881 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2882 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 2883 #ifdef __DML_VBA_DEBUG__ 2884 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 2885 #endif 2886 } // CalculateDCFCLKDeepSleep 2887 2888 double dml32_CalculateWriteBackDelay( 2889 enum source_format_class WritebackPixelFormat, 2890 double WritebackHRatio, 2891 double WritebackVRatio, 2892 unsigned int WritebackVTaps, 2893 unsigned int WritebackDestinationWidth, 2894 unsigned int WritebackDestinationHeight, 2895 unsigned int WritebackSourceHeight, 2896 unsigned int HTotal) 2897 { 2898 double CalculateWriteBackDelay; 2899 double Line_length; 2900 double Output_lines_last_notclamped; 2901 double WritebackVInit; 2902 2903 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 2904 Line_length = dml_max((double) WritebackDestinationWidth, 2905 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); 2906 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - 2907 dml_ceil(((double)WritebackSourceHeight - 2908 (double) WritebackVInit) / (double)WritebackVRatio, 1.0); 2909 if (Output_lines_last_notclamped < 0) { 2910 CalculateWriteBackDelay = 0; 2911 } else { 2912 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + 2913 (HTotal - WritebackDestinationWidth) + 80; 2914 } 2915 return CalculateWriteBackDelay; 2916 } 2917 2918 void dml32_UseMinimumDCFCLK( 2919 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 2920 bool DRRDisplay[], 2921 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 2922 unsigned int MaxInterDCNTileRepeaters, 2923 unsigned int MaxPrefetchMode, 2924 double DRAMClockChangeLatencyFinal, 2925 double FCLKChangeLatency, 2926 double SREnterPlusExitTime, 2927 unsigned int ReturnBusWidth, 2928 unsigned int RoundTripPingLatencyCycles, 2929 unsigned int ReorderingBytes, 2930 unsigned int PixelChunkSizeInKByte, 2931 unsigned int MetaChunkSize, 2932 bool GPUVMEnable, 2933 unsigned int GPUVMMaxPageTableLevels, 2934 bool HostVMEnable, 2935 unsigned int NumberOfActiveSurfaces, 2936 double HostVMMinPageSize, 2937 unsigned int HostVMMaxNonCachedPageTableLevels, 2938 bool DynamicMetadataVMEnabled, 2939 bool ImmediateFlipRequirement, 2940 bool ProgressiveToInterlaceUnitInOPP, 2941 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, 2942 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, 2943 unsigned int VTotal[], 2944 unsigned int VActive[], 2945 unsigned int DynamicMetadataTransmittedBytes[], 2946 unsigned int DynamicMetadataLinesBeforeActiveRequired[], 2947 bool Interlace[], 2948 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], 2949 double RequiredDISPCLK[][2], 2950 double UrgLatency[], 2951 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 2952 double ProjectedDCFClkDeepSleep[][2], 2953 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 2954 unsigned int TotalNumberOfActiveDPP[][2], 2955 unsigned int TotalNumberOfDCCActiveDPP[][2], 2956 unsigned int dpte_group_bytes[], 2957 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 2958 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 2959 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 2960 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 2961 unsigned int BytePerPixelY[], 2962 unsigned int BytePerPixelC[], 2963 unsigned int HTotal[], 2964 double PixelClock[], 2965 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 2966 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 2967 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 2968 bool DynamicMetadataEnable[], 2969 double ReadBandwidthLuma[], 2970 double ReadBandwidthChroma[], 2971 double DCFCLKPerState[], 2972 /* Output */ 2973 double DCFCLKState[][2]) 2974 { 2975 unsigned int i, j, k; 2976 unsigned int dummy1; 2977 double dummy2, dummy3; 2978 double NormalEfficiency; 2979 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 2980 2981 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; 2982 for (i = 0; i < DC__VOLTAGE_STATES; ++i) { 2983 for (j = 0; j <= 1; ++j) { 2984 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 2985 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 2986 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; 2987 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 2988 double MinimumTWait = 0.0; 2989 double DPTEBandwidth; 2990 double DCFCLKRequiredForAverageBandwidth; 2991 unsigned int ExtraLatencyBytes; 2992 double ExtraLatencyCycles; 2993 double DCFCLKRequiredForPeakBandwidth; 2994 unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; 2995 double MinimumTvmPlus2Tr0; 2996 2997 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 2998 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2999 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 3000 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] 3001 / (15.75 * HTotal[k] / PixelClock[k]); 3002 } 3003 3004 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) 3005 NoOfDPPState[k] = NoOfDPP[i][j][k]; 3006 3007 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; 3008 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); 3009 3010 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, 3011 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, 3012 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, 3013 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, 3014 HostVMMaxNonCachedPageTableLevels); 3015 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ 3016 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 3017 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3018 double DCFCLKCyclesRequiredInPrefetch; 3019 double PrefetchTime; 3020 3021 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] 3022 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 3023 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] 3024 * BytePerPixelC[k]) / NormalEfficiency 3025 / ReturnBusWidth; 3026 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 3027 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency 3028 / NormalEfficiency / ReturnBusWidth 3029 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 3030 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency 3031 / ReturnBusWidth 3032 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth 3033 + PixelDCFCLKCyclesRequiredInPrefetch[k]; 3034 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) 3035 * HTotal[k] / PixelClock[k]; 3036 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && 3037 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 3038 UrgLatency[i] * GPUVMMaxPageTableLevels * 3039 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 3040 3041 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, 3042 UseMALLForPStateChange[k], 3043 SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3044 DRRDisplay[k], 3045 DRAMClockChangeLatencyFinal, 3046 FCLKChangeLatency, 3047 UrgLatency[i], 3048 SREnterPlusExitTime); 3049 3050 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - 3051 MinimumTWait - UrgLatency[i] * 3052 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : 3053 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? 3054 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - 3055 DynamicMetadataVMExtraLatency[k]; 3056 3057 if (PrefetchTime > 0) { 3058 double ExpectedVRatioPrefetch; 3059 3060 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * 3061 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3062 DCFCLKCyclesRequiredInPrefetch); 3063 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * 3064 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3065 PrefetchPixelLinesTime[k] * 3066 dml_max(1.0, ExpectedVRatioPrefetch) * 3067 dml_max(1.0, ExpectedVRatioPrefetch / 4); 3068 if (HostVMEnable == true || ImmediateFlipRequirement == true) { 3069 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3070 DCFCLKRequiredForPeakBandwidthPerSurface[k] + 3071 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / 3072 NormalEfficiency / ReturnBusWidth; 3073 } 3074 } else { 3075 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3076 } 3077 if (DynamicMetadataEnable[k] == true) { 3078 double TSetupPipe; 3079 double TdmbfPipe; 3080 double TdmsksPipe; 3081 double TdmecPipe; 3082 double AllowedTimeForUrgentExtraLatency; 3083 3084 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3085 MaxInterDCNTileRepeaters, 3086 RequiredDPPCLKPerSurface[i][j][k], 3087 RequiredDISPCLK[i][j], 3088 ProjectedDCFClkDeepSleep[i][j], 3089 PixelClock[k], 3090 HTotal[k], 3091 VTotal[k] - VActive[k], 3092 DynamicMetadataTransmittedBytes[k], 3093 DynamicMetadataLinesBeforeActiveRequired[k], 3094 Interlace[k], 3095 ProgressiveToInterlaceUnitInOPP, 3096 3097 /* output */ 3098 &TSetupPipe, 3099 &TdmbfPipe, 3100 &TdmecPipe, 3101 &TdmsksPipe, 3102 &dummy1, 3103 &dummy2, 3104 &dummy3); 3105 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / 3106 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - 3107 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 3108 if (AllowedTimeForUrgentExtraLatency > 0) 3109 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3110 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], 3111 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 3112 else 3113 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3114 } 3115 } 3116 DCFCLKRequiredForPeakBandwidth = 0; 3117 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { 3118 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + 3119 DCFCLKRequiredForPeakBandwidthPerSurface[k]; 3120 } 3121 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? 3122 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * 3123 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); 3124 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3125 double MaximumTvmPlus2Tr0PlusTsw; 3126 3127 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / 3128 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 3129 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 3130 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 3131 } else { 3132 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 3133 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - 3134 MinimumTvmPlus2Tr0 - 3135 PrefetchPixelLinesTime[k] / 4), 3136 (2 * ExtraLatencyCycles + 3137 PixelDCFCLKCyclesRequiredInPrefetch[k]) / 3138 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 3139 } 3140 } 3141 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * 3142 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 3143 } 3144 } 3145 } 3146 3147 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, 3148 unsigned int TotalNumberOfActiveDPP, 3149 unsigned int PixelChunkSizeInKByte, 3150 unsigned int TotalNumberOfDCCActiveDPP, 3151 unsigned int MetaChunkSize, 3152 bool GPUVMEnable, 3153 bool HostVMEnable, 3154 unsigned int NumberOfActiveSurfaces, 3155 unsigned int NumberOfDPP[], 3156 unsigned int dpte_group_bytes[], 3157 double HostVMInefficiencyFactor, 3158 double HostVMMinPageSize, 3159 unsigned int HostVMMaxNonCachedPageTableLevels) 3160 { 3161 unsigned int k; 3162 double ret; 3163 unsigned int HostVMDynamicLevels; 3164 3165 if (GPUVMEnable == true && HostVMEnable == true) { 3166 if (HostVMMinPageSize < 2048) 3167 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 3168 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 3169 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 3170 else 3171 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 3172 } else { 3173 HostVMDynamicLevels = 0; 3174 } 3175 3176 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + 3177 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 3178 3179 if (GPUVMEnable == true) { 3180 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3181 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * 3182 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 3183 } 3184 } 3185 return ret; 3186 } 3187 3188 void dml32_CalculateVUpdateAndDynamicMetadataParameters( 3189 unsigned int MaxInterDCNTileRepeaters, 3190 double Dppclk, 3191 double Dispclk, 3192 double DCFClkDeepSleep, 3193 double PixelClock, 3194 unsigned int HTotal, 3195 unsigned int VBlank, 3196 unsigned int DynamicMetadataTransmittedBytes, 3197 unsigned int DynamicMetadataLinesBeforeActiveRequired, 3198 unsigned int InterlaceEnable, 3199 bool ProgressiveToInterlaceUnitInOPP, 3200 3201 /* output */ 3202 double *TSetup, 3203 double *Tdmbf, 3204 double *Tdmec, 3205 double *Tdmsks, 3206 unsigned int *VUpdateOffsetPix, 3207 double *VUpdateWidthPix, 3208 double *VReadyOffsetPix) 3209 { 3210 double TotalRepeaterDelayTime; 3211 3212 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); 3213 *VUpdateWidthPix = 3214 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); 3215 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, 3216 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); 3217 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); 3218 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3219 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; 3220 *Tdmec = HTotal / PixelClock; 3221 3222 if (DynamicMetadataLinesBeforeActiveRequired == 0) 3223 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3224 else 3225 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3226 3227 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) 3228 *Tdmsks = *Tdmsks / 2; 3229 #ifdef __DML_VBA_DEBUG__ 3230 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3231 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3232 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3233 3234 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", 3235 __func__, DynamicMetadataLinesBeforeActiveRequired); 3236 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); 3237 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); 3238 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); 3239 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); 3240 #endif 3241 } 3242 3243 double dml32_CalculateTWait( 3244 unsigned int PrefetchMode, 3245 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 3246 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3247 bool DRRDisplay, 3248 double DRAMClockChangeLatency, 3249 double FCLKChangeLatency, 3250 double UrgentLatency, 3251 double SREnterPlusExitTime) 3252 { 3253 double TWait = 0.0; 3254 3255 if (PrefetchMode == 0 && 3256 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && 3257 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && 3258 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && 3259 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { 3260 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3261 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3262 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3263 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3264 TWait = dml_max(SREnterPlusExitTime, UrgentLatency); 3265 } else { 3266 TWait = UrgentLatency; 3267 } 3268 3269 #ifdef __DML_VBA_DEBUG__ 3270 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); 3271 dml_print("DML::%s: TWait = %f\n", __func__, TWait); 3272 #endif 3273 return TWait; 3274 } // CalculateTWait 3275 3276 // Function: get_return_bw_mbps 3277 // Megabyte per second 3278 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, 3279 const int VoltageLevel, 3280 const bool HostVMEnable, 3281 const double DCFCLK, 3282 const double FabricClock, 3283 const double DRAMSpeed) 3284 { 3285 double ReturnBW = 0.; 3286 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; 3287 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; 3288 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; 3289 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3290 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3291 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3292 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3293 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3294 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3295 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3296 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3297 3298 if (HostVMEnable != true) 3299 ReturnBW = PixelDataOnlyReturnBW; 3300 else 3301 ReturnBW = PixelMixedWithVMDataReturnBW; 3302 3303 #ifdef __DML_VBA_DEBUG__ 3304 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3305 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); 3306 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3307 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3308 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3309 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); 3310 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); 3311 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); 3312 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); 3313 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); 3314 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); 3315 #endif 3316 return ReturnBW; 3317 } 3318 3319 // Function: get_return_bw_mbps_vm_only 3320 // Megabyte per second 3321 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, 3322 const int VoltageLevel, 3323 const double DCFCLK, 3324 const double FabricClock, 3325 const double DRAMSpeed) 3326 { 3327 double VMDataOnlyReturnBW = dml_min3( 3328 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3329 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes 3330 * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3331 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes 3332 * (VoltageLevel < 2 ? 3333 soc->pct_ideal_dram_bw_after_urgent_strobe : 3334 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); 3335 #ifdef __DML_VBA_DEBUG__ 3336 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3337 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3338 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3339 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3340 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 3341 #endif 3342 return VMDataOnlyReturnBW; 3343 } 3344 3345 double dml32_CalculateExtraLatency( 3346 unsigned int RoundTripPingLatencyCycles, 3347 unsigned int ReorderingBytes, 3348 double DCFCLK, 3349 unsigned int TotalNumberOfActiveDPP, 3350 unsigned int PixelChunkSizeInKByte, 3351 unsigned int TotalNumberOfDCCActiveDPP, 3352 unsigned int MetaChunkSize, 3353 double ReturnBW, 3354 bool GPUVMEnable, 3355 bool HostVMEnable, 3356 unsigned int NumberOfActiveSurfaces, 3357 unsigned int NumberOfDPP[], 3358 unsigned int dpte_group_bytes[], 3359 double HostVMInefficiencyFactor, 3360 double HostVMMinPageSize, 3361 unsigned int HostVMMaxNonCachedPageTableLevels) 3362 { 3363 double ExtraLatencyBytes; 3364 double ExtraLatency; 3365 3366 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( 3367 ReorderingBytes, 3368 TotalNumberOfActiveDPP, 3369 PixelChunkSizeInKByte, 3370 TotalNumberOfDCCActiveDPP, 3371 MetaChunkSize, 3372 GPUVMEnable, 3373 HostVMEnable, 3374 NumberOfActiveSurfaces, 3375 NumberOfDPP, 3376 dpte_group_bytes, 3377 HostVMInefficiencyFactor, 3378 HostVMMinPageSize, 3379 HostVMMaxNonCachedPageTableLevels); 3380 3381 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 3382 3383 #ifdef __DML_VBA_DEBUG__ 3384 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 3385 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 3386 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 3387 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 3388 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 3389 #endif 3390 3391 return ExtraLatency; 3392 } // CalculateExtraLatency 3393 3394 bool dml32_CalculatePrefetchSchedule( 3395 struct vba_vars_st *v, 3396 unsigned int k, 3397 double HostVMInefficiencyFactor, 3398 DmlPipe *myPipe, 3399 unsigned int DSCDelay, 3400 unsigned int DPP_RECOUT_WIDTH, 3401 unsigned int VStartup, 3402 unsigned int MaxVStartup, 3403 double UrgentLatency, 3404 double UrgentExtraLatency, 3405 double TCalc, 3406 unsigned int PDEAndMetaPTEBytesFrame, 3407 unsigned int MetaRowByte, 3408 unsigned int PixelPTEBytesPerRow, 3409 double PrefetchSourceLinesY, 3410 unsigned int SwathWidthY, 3411 unsigned int VInitPreFillY, 3412 unsigned int MaxNumSwathY, 3413 double PrefetchSourceLinesC, 3414 unsigned int SwathWidthC, 3415 unsigned int VInitPreFillC, 3416 unsigned int MaxNumSwathC, 3417 unsigned int swath_width_luma_ub, 3418 unsigned int swath_width_chroma_ub, 3419 unsigned int SwathHeightY, 3420 unsigned int SwathHeightC, 3421 double TWait, 3422 double TPreReq, 3423 /* Output */ 3424 double *DSTXAfterScaler, 3425 double *DSTYAfterScaler, 3426 double *DestinationLinesForPrefetch, 3427 double *PrefetchBandwidth, 3428 double *DestinationLinesToRequestVMInVBlank, 3429 double *DestinationLinesToRequestRowInVBlank, 3430 double *VRatioPrefetchY, 3431 double *VRatioPrefetchC, 3432 double *RequiredPrefetchPixDataBWLuma, 3433 double *RequiredPrefetchPixDataBWChroma, 3434 bool *NotEnoughTimeForDynamicMetadata, 3435 double *Tno_bw, 3436 double *prefetch_vmrow_bw, 3437 double *Tdmdl_vm, 3438 double *Tdmdl, 3439 double *TSetup, 3440 unsigned int *VUpdateOffsetPix, 3441 double *VUpdateWidthPix, 3442 double *VReadyOffsetPix) 3443 { 3444 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater; 3445 bool MyError = false; 3446 unsigned int DPPCycles, DISPCLKCycles; 3447 double DSTTotalPixelsAfterScaler; 3448 double LineTime; 3449 double dst_y_prefetch_equ; 3450 double prefetch_bw_oto; 3451 double Tvm_oto; 3452 double Tr0_oto; 3453 double Tvm_oto_lines; 3454 double Tr0_oto_lines; 3455 double dst_y_prefetch_oto; 3456 double TimeForFetchingMetaPTE = 0; 3457 double TimeForFetchingRowInVBlank = 0; 3458 double LinesToRequestPrefetchPixelData = 0; 3459 unsigned int HostVMDynamicLevelsTrips; 3460 double trip_to_mem; 3461 double Tvm_trips; 3462 double Tr0_trips; 3463 double Tvm_trips_rounded; 3464 double Tr0_trips_rounded; 3465 double Lsw_oto; 3466 double Tpre_rounded; 3467 double prefetch_bw_equ; 3468 double Tvm_equ; 3469 double Tr0_equ; 3470 double Tdmbf; 3471 double Tdmec; 3472 double Tdmsks; 3473 double prefetch_sw_bytes; 3474 double bytes_pp; 3475 double dep_bytes; 3476 unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__; 3477 double min_Lsw; 3478 double Tsw_est1 = 0; 3479 double Tsw_est3 = 0; 3480 3481 if (v->GPUVMEnable == true && v->HostVMEnable == true) 3482 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3483 else 3484 HostVMDynamicLevelsTrips = 0; 3485 #ifdef __DML_VBA_DEBUG__ 3486 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable); 3487 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels); 3488 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); 3489 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n", 3490 __func__, v->HostVMEnable, HostVMInefficiencyFactor); 3491 #endif 3492 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3493 v->MaxInterDCNTileRepeaters, 3494 myPipe->Dppclk, 3495 myPipe->Dispclk, 3496 myPipe->DCFClkDeepSleep, 3497 myPipe->PixelClock, 3498 myPipe->HTotal, 3499 myPipe->VBlank, 3500 v->DynamicMetadataTransmittedBytes[k], 3501 v->DynamicMetadataLinesBeforeActiveRequired[k], 3502 myPipe->InterlaceEnable, 3503 myPipe->ProgressiveToInterlaceUnitInOPP, 3504 TSetup, 3505 3506 /* output */ 3507 &Tdmbf, 3508 &Tdmec, 3509 &Tdmsks, 3510 VUpdateOffsetPix, 3511 VUpdateWidthPix, 3512 VReadyOffsetPix); 3513 3514 LineTime = myPipe->HTotal / myPipe->PixelClock; 3515 trip_to_mem = UrgentLatency; 3516 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 3517 3518 if (v->DynamicMetadataVMEnabled == true) 3519 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 3520 else 3521 *Tdmdl = TWait + UrgentExtraLatency; 3522 3523 #ifdef __DML_VBA_ALLOW_DELTA__ 3524 if (v->DynamicMetadataEnable[k] == false) 3525 *Tdmdl = 0.0; 3526 #endif 3527 3528 if (v->DynamicMetadataEnable[k] == true) { 3529 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 3530 *NotEnoughTimeForDynamicMetadata = true; 3531 #ifdef __DML_VBA_DEBUG__ 3532 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3533 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3534 __func__, Tdmbf); 3535 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3536 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3537 __func__, Tdmsks); 3538 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3539 __func__, *Tdmdl); 3540 #endif 3541 } else { 3542 *NotEnoughTimeForDynamicMetadata = false; 3543 } 3544 } else { 3545 *NotEnoughTimeForDynamicMetadata = false; 3546 } 3547 3548 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true && 3549 v->GPUVMEnable == true ? TWait + Tvm_trips : 0); 3550 3551 if (myPipe->ScalerEnabled) 3552 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL; 3553 else 3554 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly; 3555 3556 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor; 3557 3558 DISPCLKCycles = v->DISPCLKDelaySubtotal; 3559 3560 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3561 return true; 3562 3563 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles * 3564 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3565 3566 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) 3567 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH 3568 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? 3569 myPipe->HActive / 2 : 0) 3570 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3571 3572 #ifdef __DML_VBA_DEBUG__ 3573 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 3574 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3575 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3576 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 3577 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3578 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3579 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); 3580 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); 3581 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 3582 #endif 3583 3584 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 3585 *DSTYAfterScaler = 1; 3586 else 3587 *DSTYAfterScaler = 0; 3588 3589 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3590 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3591 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3592 #ifdef __DML_VBA_DEBUG__ 3593 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3594 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); 3595 #endif 3596 3597 MyError = false; 3598 3599 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 3600 3601 if (v->GPUVMEnable == true) { 3602 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime; 3603 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3604 if (v->GPUVMMaxPageTableLevels >= 3) { 3605 *Tno_bw = UrgentExtraLatency + trip_to_mem * 3606 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1); 3607 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) { 3608 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) / 3609 4.0 * LineTime; // VBA_ERROR 3610 *Tno_bw = UrgentExtraLatency; 3611 } else { 3612 *Tno_bw = 0; 3613 } 3614 } else if (myPipe->DCCEnable == true) { 3615 Tvm_trips_rounded = LineTime / 4.0; 3616 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime; 3617 *Tno_bw = 0; 3618 } else { 3619 Tvm_trips_rounded = LineTime / 4.0; 3620 Tr0_trips_rounded = LineTime / 2.0; 3621 *Tno_bw = 0; 3622 } 3623 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0); 3624 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0); 3625 3626 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3627 || myPipe->SourcePixelFormat == dm_420_12) { 3628 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3629 } else { 3630 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3631 } 3632 3633 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3634 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3635 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3636 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 3637 3638 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 3639 min_Lsw = dml_max(min_Lsw, 1.0); 3640 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0; 3641 3642 if (v->GPUVMEnable == true) { 3643 Tvm_oto = dml_max3( 3644 Tvm_trips, 3645 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, 3646 LineTime / 4.0); 3647 } else 3648 Tvm_oto = LineTime / 4.0; 3649 3650 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3651 Tr0_oto = dml_max4( 3652 Tr0_trips, 3653 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, 3654 (LineTime - Tvm_oto)/2.0, 3655 LineTime / 4.0); 3656 #ifdef __DML_VBA_DEBUG__ 3657 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3658 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto); 3659 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips); 3660 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto); 3661 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4); 3662 #endif 3663 } else 3664 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 3665 3666 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 3667 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 3668 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 3669 3670 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - 3671 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3672 3673 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__); 3674 #ifdef __DML_VBA_DEBUG__ 3675 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3676 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw); 3677 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3678 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3679 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem); 3680 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3681 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3682 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3683 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3684 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3685 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3686 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes); 3687 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp); 3688 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3689 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3690 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3691 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3692 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 3693 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 3694 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 3695 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 3696 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 3697 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines); 3698 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines); 3699 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto); 3700 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto); 3701 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ); 3702 #endif 3703 3704 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 3705 Tpre_rounded = dst_y_prefetch_equ * LineTime; 3706 #ifdef __DML_VBA_DEBUG__ 3707 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ); 3708 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime); 3709 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3710 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3711 __func__, VStartup * LineTime); 3712 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3713 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3714 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 3715 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 3716 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3717 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3718 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3719 __func__, *DSTYAfterScaler); 3720 #endif 3721 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3722 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3723 3724 if (prefetch_sw_bytes < dep_bytes) 3725 prefetch_sw_bytes = 2 * dep_bytes; 3726 3727 *PrefetchBandwidth = 0; 3728 *DestinationLinesToRequestVMInVBlank = 0; 3729 *DestinationLinesToRequestRowInVBlank = 0; 3730 *VRatioPrefetchY = 0; 3731 *VRatioPrefetchC = 0; 3732 *RequiredPrefetchPixDataBWLuma = 0; 3733 if (dst_y_prefetch_equ > 1 && 3734 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) { 3735 double PrefetchBandwidth1; 3736 double PrefetchBandwidth2; 3737 double PrefetchBandwidth3; 3738 double PrefetchBandwidth4; 3739 3740 if (Tpre_rounded - *Tno_bw > 0) { 3741 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3742 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3743 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 3744 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 3745 } else 3746 PrefetchBandwidth1 = 0; 3747 3748 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw) 3749 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 3750 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3751 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3752 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 3753 } 3754 3755 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 3756 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / 3757 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 3758 else 3759 PrefetchBandwidth2 = 0; 3760 3761 if (Tpre_rounded - Tvm_trips_rounded > 0) { 3762 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3763 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 3764 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 3765 } else 3766 PrefetchBandwidth3 = 0; 3767 3768 3769 if (VStartup == MaxVStartup && 3770 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 * 3771 LineTime - Tvm_trips_rounded > 0) { 3772 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3773 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 3774 } 3775 3776 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) { 3777 PrefetchBandwidth4 = prefetch_sw_bytes / 3778 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 3779 } else { 3780 PrefetchBandwidth4 = 0; 3781 } 3782 3783 #ifdef __DML_VBA_DEBUG__ 3784 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 3785 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3786 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 3787 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1); 3788 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3); 3789 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3790 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3791 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 3792 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); 3793 #endif 3794 { 3795 bool Case1OK; 3796 bool Case2OK; 3797 bool Case3OK; 3798 3799 if (PrefetchBandwidth1 > 0) { 3800 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3801 >= Tvm_trips_rounded 3802 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3803 / PrefetchBandwidth1 >= Tr0_trips_rounded) { 3804 Case1OK = true; 3805 } else { 3806 Case1OK = false; 3807 } 3808 } else { 3809 Case1OK = false; 3810 } 3811 3812 if (PrefetchBandwidth2 > 0) { 3813 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3814 >= Tvm_trips_rounded 3815 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3816 / PrefetchBandwidth2 < Tr0_trips_rounded) { 3817 Case2OK = true; 3818 } else { 3819 Case2OK = false; 3820 } 3821 } else { 3822 Case2OK = false; 3823 } 3824 3825 if (PrefetchBandwidth3 > 0) { 3826 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3827 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3828 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3829 Tr0_trips_rounded) { 3830 Case3OK = true; 3831 } else { 3832 Case3OK = false; 3833 } 3834 } else { 3835 Case3OK = false; 3836 } 3837 3838 if (Case1OK) 3839 prefetch_bw_equ = PrefetchBandwidth1; 3840 else if (Case2OK) 3841 prefetch_bw_equ = PrefetchBandwidth2; 3842 else if (Case3OK) 3843 prefetch_bw_equ = PrefetchBandwidth3; 3844 else 3845 prefetch_bw_equ = PrefetchBandwidth4; 3846 3847 #ifdef __DML_VBA_DEBUG__ 3848 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3849 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3850 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3851 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 3852 #endif 3853 3854 if (prefetch_bw_equ > 0) { 3855 if (v->GPUVMEnable == true) { 3856 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3857 HostVMInefficiencyFactor / prefetch_bw_equ, 3858 Tvm_trips, LineTime / 4); 3859 } else { 3860 Tvm_equ = LineTime / 4; 3861 } 3862 3863 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) { 3864 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3865 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, 3866 (LineTime - Tvm_equ) / 2, LineTime / 4); 3867 } else { 3868 Tr0_equ = (LineTime - Tvm_equ) / 2; 3869 } 3870 } else { 3871 Tvm_equ = 0; 3872 Tr0_equ = 0; 3873 #ifdef __DML_VBA_DEBUG__ 3874 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3875 #endif 3876 } 3877 } 3878 3879 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 3880 if (dst_y_prefetch_oto * LineTime < TPreReq) { 3881 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3882 } else { 3883 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 3884 } 3885 TimeForFetchingMetaPTE = Tvm_oto; 3886 TimeForFetchingRowInVBlank = Tr0_oto; 3887 *PrefetchBandwidth = prefetch_bw_oto; 3888 } else { 3889 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 3890 TimeForFetchingMetaPTE = Tvm_equ; 3891 TimeForFetchingRowInVBlank = Tr0_equ; 3892 *PrefetchBandwidth = prefetch_bw_equ; 3893 } 3894 3895 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 3896 3897 *DestinationLinesToRequestRowInVBlank = 3898 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 3899 3900 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3901 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3902 3903 #ifdef __DML_VBA_DEBUG__ 3904 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3905 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3906 __func__, *DestinationLinesToRequestVMInVBlank); 3907 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 3908 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3909 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3910 __func__, *DestinationLinesToRequestRowInVBlank); 3911 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3912 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 3913 #endif 3914 3915 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) { 3916 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 3917 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3918 #ifdef __DML_VBA_DEBUG__ 3919 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3920 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 3921 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3922 #endif 3923 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3924 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3925 *VRatioPrefetchY = 3926 dml_max((double) PrefetchSourceLinesY / 3927 LinesToRequestPrefetchPixelData, 3928 (double) MaxNumSwathY * SwathHeightY / 3929 (LinesToRequestPrefetchPixelData - 3930 (VInitPreFillY - 3.0) / 2.0)); 3931 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3932 } else { 3933 MyError = true; 3934 *VRatioPrefetchY = 0; 3935 } 3936 #ifdef __DML_VBA_DEBUG__ 3937 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3938 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3939 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 3940 #endif 3941 } 3942 3943 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 3944 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3945 3946 #ifdef __DML_VBA_DEBUG__ 3947 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3948 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 3949 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3950 #endif 3951 if ((SwathHeightC > 4)) { 3952 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3953 *VRatioPrefetchC = 3954 dml_max(*VRatioPrefetchC, 3955 (double) MaxNumSwathC * SwathHeightC / 3956 (LinesToRequestPrefetchPixelData - 3957 (VInitPreFillC - 3.0) / 2.0)); 3958 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3959 } else { 3960 MyError = true; 3961 *VRatioPrefetchC = 0; 3962 } 3963 #ifdef __DML_VBA_DEBUG__ 3964 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3965 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3966 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 3967 #endif 3968 } 3969 3970 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 3971 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3972 / LineTime; 3973 3974 #ifdef __DML_VBA_DEBUG__ 3975 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3976 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3977 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 3978 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 3979 __func__, *RequiredPrefetchPixDataBWLuma); 3980 #endif 3981 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 3982 LinesToRequestPrefetchPixelData 3983 * myPipe->BytePerPixelC 3984 * swath_width_chroma_ub / LineTime; 3985 } else { 3986 MyError = true; 3987 #ifdef __DML_VBA_DEBUG__ 3988 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 3989 __func__, LinesToRequestPrefetchPixelData); 3990 #endif 3991 *VRatioPrefetchY = 0; 3992 *VRatioPrefetchC = 0; 3993 *RequiredPrefetchPixDataBWLuma = 0; 3994 *RequiredPrefetchPixDataBWChroma = 0; 3995 } 3996 #ifdef __DML_VBA_DEBUG__ 3997 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 3998 (double)LinesToRequestPrefetchPixelData * LineTime + 3999 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 4000 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 4001 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 4002 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 4003 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 4004 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - 4005 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 4006 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 4007 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 4008 PixelPTEBytesPerRow); 4009 #endif 4010 } else { 4011 MyError = true; 4012 #ifdef __DML_VBA_DEBUG__ 4013 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 4014 __func__, dst_y_prefetch_equ); 4015 #endif 4016 } 4017 4018 { 4019 double prefetch_vm_bw; 4020 double prefetch_row_bw; 4021 4022 if (PDEAndMetaPTEBytesFrame == 0) { 4023 prefetch_vm_bw = 0; 4024 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 4025 #ifdef __DML_VBA_DEBUG__ 4026 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 4027 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 4028 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 4029 __func__, *DestinationLinesToRequestVMInVBlank); 4030 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 4031 #endif 4032 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 4033 (*DestinationLinesToRequestVMInVBlank * LineTime); 4034 #ifdef __DML_VBA_DEBUG__ 4035 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 4036 #endif 4037 } else { 4038 prefetch_vm_bw = 0; 4039 MyError = true; 4040 #ifdef __DML_VBA_DEBUG__ 4041 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", 4042 __func__, *DestinationLinesToRequestVMInVBlank); 4043 #endif 4044 } 4045 4046 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 4047 prefetch_row_bw = 0; 4048 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 4049 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 4050 (*DestinationLinesToRequestRowInVBlank * LineTime); 4051 4052 #ifdef __DML_VBA_DEBUG__ 4053 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 4054 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 4055 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 4056 __func__, *DestinationLinesToRequestRowInVBlank); 4057 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 4058 #endif 4059 } else { 4060 prefetch_row_bw = 0; 4061 MyError = true; 4062 #ifdef __DML_VBA_DEBUG__ 4063 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", 4064 __func__, *DestinationLinesToRequestRowInVBlank); 4065 #endif 4066 } 4067 4068 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 4069 } 4070 4071 if (MyError) { 4072 *PrefetchBandwidth = 0; 4073 TimeForFetchingMetaPTE = 0; 4074 TimeForFetchingRowInVBlank = 0; 4075 *DestinationLinesToRequestVMInVBlank = 0; 4076 *DestinationLinesToRequestRowInVBlank = 0; 4077 *DestinationLinesForPrefetch = 0; 4078 LinesToRequestPrefetchPixelData = 0; 4079 *VRatioPrefetchY = 0; 4080 *VRatioPrefetchC = 0; 4081 *RequiredPrefetchPixDataBWLuma = 0; 4082 *RequiredPrefetchPixDataBWChroma = 0; 4083 } 4084 4085 return MyError; 4086 } // CalculatePrefetchSchedule 4087 4088 void dml32_CalculateFlipSchedule( 4089 double HostVMInefficiencyFactor, 4090 double UrgentExtraLatency, 4091 double UrgentLatency, 4092 unsigned int GPUVMMaxPageTableLevels, 4093 bool HostVMEnable, 4094 unsigned int HostVMMaxNonCachedPageTableLevels, 4095 bool GPUVMEnable, 4096 double HostVMMinPageSize, 4097 double PDEAndMetaPTEBytesPerFrame, 4098 double MetaRowBytes, 4099 double DPTEBytesPerRow, 4100 double BandwidthAvailableForImmediateFlip, 4101 unsigned int TotImmediateFlipBytes, 4102 enum source_format_class SourcePixelFormat, 4103 double LineTime, 4104 double VRatio, 4105 double VRatioChroma, 4106 double Tno_bw, 4107 bool DCCEnable, 4108 unsigned int dpte_row_height, 4109 unsigned int meta_row_height, 4110 unsigned int dpte_row_height_chroma, 4111 unsigned int meta_row_height_chroma, 4112 bool use_one_row_for_frame_flip, 4113 4114 /* Output */ 4115 double *DestinationLinesToRequestVMInImmediateFlip, 4116 double *DestinationLinesToRequestRowInImmediateFlip, 4117 double *final_flip_bw, 4118 bool *ImmediateFlipSupportedForPipe) 4119 { 4120 double min_row_time = 0.0; 4121 unsigned int HostVMDynamicLevelsTrips; 4122 double TimeForFetchingMetaPTEImmediateFlip; 4123 double TimeForFetchingRowInVBlankImmediateFlip; 4124 double ImmediateFlipBW; 4125 4126 if (GPUVMEnable == true && HostVMEnable == true) 4127 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 4128 else 4129 HostVMDynamicLevelsTrips = 0; 4130 4131 #ifdef __DML_VBA_DEBUG__ 4132 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); 4133 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 4134 #endif 4135 4136 if (TotImmediateFlipBytes > 0) { 4137 if (use_one_row_for_frame_flip) { 4138 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * 4139 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4140 } else { 4141 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * 4142 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4143 } 4144 if (GPUVMEnable == true) { 4145 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * 4146 HostVMInefficiencyFactor / ImmediateFlipBW, 4147 UrgentExtraLatency + UrgentLatency * 4148 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 4149 LineTime / 4.0); 4150 } else { 4151 TimeForFetchingMetaPTEImmediateFlip = 0; 4152 } 4153 if ((GPUVMEnable == true || DCCEnable == true)) { 4154 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 4155 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 4156 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); 4157 } else { 4158 TimeForFetchingRowInVBlankImmediateFlip = 0; 4159 } 4160 4161 *DestinationLinesToRequestVMInImmediateFlip = 4162 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; 4163 *DestinationLinesToRequestRowInImmediateFlip = 4164 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; 4165 4166 if (GPUVMEnable == true) { 4167 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / 4168 (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 4169 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4170 (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 4171 } else if ((GPUVMEnable == true || DCCEnable == true)) { 4172 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4173 (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 4174 } else { 4175 *final_flip_bw = 0; 4176 } 4177 } else { 4178 TimeForFetchingMetaPTEImmediateFlip = 0; 4179 TimeForFetchingRowInVBlankImmediateFlip = 0; 4180 *DestinationLinesToRequestVMInImmediateFlip = 0; 4181 *DestinationLinesToRequestRowInImmediateFlip = 0; 4182 *final_flip_bw = 0; 4183 } 4184 4185 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 4186 if (GPUVMEnable == true && DCCEnable != true) { 4187 min_row_time = dml_min(dpte_row_height * 4188 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 4189 } else if (GPUVMEnable != true && DCCEnable == true) { 4190 min_row_time = dml_min(meta_row_height * 4191 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 4192 } else { 4193 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * 4194 LineTime / VRatio, dpte_row_height_chroma * LineTime / 4195 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); 4196 } 4197 } else { 4198 if (GPUVMEnable == true && DCCEnable != true) { 4199 min_row_time = dpte_row_height * LineTime / VRatio; 4200 } else if (GPUVMEnable != true && DCCEnable == true) { 4201 min_row_time = meta_row_height * LineTime / VRatio; 4202 } else { 4203 min_row_time = 4204 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 4205 } 4206 } 4207 4208 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 4209 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip 4210 > min_row_time) { 4211 *ImmediateFlipSupportedForPipe = false; 4212 } else { 4213 *ImmediateFlipSupportedForPipe = true; 4214 } 4215 4216 #ifdef __DML_VBA_DEBUG__ 4217 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 4218 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 4219 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", 4220 __func__, *DestinationLinesToRequestVMInImmediateFlip); 4221 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", 4222 __func__, *DestinationLinesToRequestRowInImmediateFlip); 4223 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 4224 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", 4225 __func__, TimeForFetchingRowInVBlankImmediateFlip); 4226 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 4227 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 4228 #endif 4229 } // CalculateFlipSchedule 4230 4231 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 4232 struct vba_vars_st *v, 4233 unsigned int PrefetchMode, 4234 double DCFCLK, 4235 double ReturnBW, 4236 SOCParametersList mmSOCParameters, 4237 double SOCCLK, 4238 double DCFClkDeepSleep, 4239 unsigned int DETBufferSizeY[], 4240 unsigned int DETBufferSizeC[], 4241 unsigned int SwathHeightY[], 4242 unsigned int SwathHeightC[], 4243 double SwathWidthY[], 4244 double SwathWidthC[], 4245 unsigned int DPPPerSurface[], 4246 double BytePerPixelDETY[], 4247 double BytePerPixelDETC[], 4248 double DSTXAfterScaler[], 4249 double DSTYAfterScaler[], 4250 bool UnboundedRequestEnabled, 4251 unsigned int CompressedBufferSizeInkByte, 4252 4253 /* Output */ 4254 enum clock_change_support *DRAMClockChangeSupport, 4255 double MaxActiveDRAMClockChangeLatencySupported[], 4256 unsigned int SubViewportLinesNeededInMALL[], 4257 enum dm_fclock_change_support *FCLKChangeSupport, 4258 double *MinActiveFCLKChangeLatencySupported, 4259 bool *USRRetrainingSupport, 4260 double ActiveDRAMClockChangeLatencyMargin[]) 4261 { 4262 unsigned int i, j, k; 4263 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0; 4264 unsigned int DRAMClockChangeSupportNumber = 0; 4265 unsigned int LastSurfaceWithoutMargin; 4266 unsigned int DRAMClockChangeMethod = 0; 4267 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4268 double MinActiveFCLKChangeMargin = 0.; 4269 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4270 double ActiveClockChangeLatencyHidingY; 4271 double ActiveClockChangeLatencyHidingC; 4272 double ActiveClockChangeLatencyHiding; 4273 double EffectiveDETBufferSizeY; 4274 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX]; 4275 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX]; 4276 double TotalPixelBW = 0.0; 4277 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX]; 4278 double EffectiveLBLatencyHidingY; 4279 double EffectiveLBLatencyHidingC; 4280 double LinesInDETY[DC__NUM_DPP__MAX]; 4281 double LinesInDETC[DC__NUM_DPP__MAX]; 4282 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 4283 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX]; 4284 double FullDETBufferingTimeY; 4285 double FullDETBufferingTimeC; 4286 double WritebackDRAMClockChangeLatencyMargin; 4287 double WritebackFCLKChangeLatencyMargin; 4288 double WritebackLatencyHiding; 4289 bool SameTimingForFCLKChange; 4290 4291 unsigned int TotalActiveWriteback = 0; 4292 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX]; 4293 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX]; 4294 4295 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; 4296 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency 4297 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; 4298 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark; 4299 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark; 4300 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency 4301 + 10 / DCFClkDeepSleep; 4302 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency 4303 + 10 / DCFClkDeepSleep; 4304 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency 4305 + 10 / DCFClkDeepSleep; 4306 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time 4307 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; 4308 4309 #ifdef __DML_VBA_DEBUG__ 4310 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); 4311 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); 4312 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); 4313 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark); 4314 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark); 4315 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark); 4316 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark); 4317 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark); 4318 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark); 4319 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark); 4320 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", 4321 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark); 4322 #endif 4323 4324 4325 TotalActiveWriteback = 0; 4326 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4327 if (v->WritebackEnable[k] == true) 4328 TotalActiveWriteback = TotalActiveWriteback + 1; 4329 } 4330 4331 if (TotalActiveWriteback <= 1) { 4332 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; 4333 } else { 4334 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency 4335 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4336 } 4337 if (v->USRRetrainingRequiredFinal) 4338 v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark 4339 + mmSOCParameters.USRRetrainingLatency; 4340 4341 if (TotalActiveWriteback <= 1) { 4342 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4343 + mmSOCParameters.WritebackLatency; 4344 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4345 + mmSOCParameters.WritebackLatency; 4346 } else { 4347 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4348 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4349 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4350 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK; 4351 } 4352 4353 if (v->USRRetrainingRequiredFinal) 4354 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark 4355 + mmSOCParameters.USRRetrainingLatency; 4356 4357 if (v->USRRetrainingRequiredFinal) 4358 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark 4359 + mmSOCParameters.USRRetrainingLatency; 4360 4361 #ifdef __DML_VBA_DEBUG__ 4362 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", 4363 __func__, v->Watermark.WritebackDRAMClockChangeWatermark); 4364 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark); 4365 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark); 4366 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal); 4367 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); 4368 #endif 4369 4370 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4371 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + 4372 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); 4373 } 4374 4375 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4376 4377 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 4378 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 4379 4380 4381 #ifdef __DML_VBA_DEBUG__ 4382 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines); 4383 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal); 4384 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]); 4385 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]); 4386 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]); 4387 #endif 4388 4389 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 4390 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 4391 EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4392 4393 if (UnboundedRequestEnabled) { 4394 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 4395 + CompressedBufferSizeInkByte * 1024 4396 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k]) 4397 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 4398 } 4399 4400 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4401 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 4402 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 4403 4404 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 4405 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k]; 4406 4407 if (v->NumberOfActiveSurfaces > 1) { 4408 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY 4409 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k] 4410 / v->PixelClock[k] / v->VRatio[k]; 4411 } 4412 4413 if (BytePerPixelDETC[k] > 0) { 4414 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4415 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]); 4416 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) 4417 / v->VRatioChroma[k]; 4418 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 4419 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] 4420 / v->PixelClock[k]; 4421 if (v->NumberOfActiveSurfaces > 1) { 4422 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC 4423 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k] 4424 / v->PixelClock[k] / v->VRatioChroma[k]; 4425 } 4426 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY, 4427 ActiveClockChangeLatencyHidingC); 4428 } else { 4429 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY; 4430 } 4431 4432 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4433 - v->Watermark.DRAMClockChangeWatermark; 4434 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark 4435 - v->Watermark.FCLKChangeWatermark; 4436 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark; 4437 4438 if (v->WritebackEnable[k]) { 4439 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024 4440 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4441 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 4442 if (v->WritebackPixelFormat[k] == dm_444_64) 4443 WritebackLatencyHiding = WritebackLatencyHiding / 2; 4444 4445 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding 4446 - v->Watermark.WritebackDRAMClockChangeWatermark; 4447 4448 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding 4449 - v->Watermark.WritebackFCLKChangeWatermark; 4450 4451 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], 4452 WritebackFCLKChangeLatencyMargin); 4453 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k], 4454 WritebackDRAMClockChangeLatencyMargin); 4455 } 4456 MaxActiveDRAMClockChangeLatencySupported[k] = 4457 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 4458 0 : 4459 (ActiveDRAMClockChangeLatencyMargin[k] 4460 + mmSOCParameters.DRAMClockChangeLatency); 4461 } 4462 4463 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) { 4464 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) { 4465 if (i == j || 4466 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) || 4467 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) || 4468 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) || 4469 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] && 4470 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] && 4471 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal && 4472 (v->DRRDisplay[i] || v->DRRDisplay[j]))) { 4473 SynchronizedSurfaces[i][j] = true; 4474 } else { 4475 SynchronizedSurfaces[i][j] = false; 4476 } 4477 } 4478 } 4479 4480 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4481 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4482 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4483 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) { 4484 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4485 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k]; 4486 SurfaceWithMinActiveFCLKChangeMargin = k; 4487 } 4488 } 4489 4490 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4491 4492 SameTimingForFCLKChange = true; 4493 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4494 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) { 4495 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4496 (SameTimingForFCLKChange || 4497 ActiveFCLKChangeLatencyMargin[k] < 4498 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4499 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k]; 4500 } 4501 SameTimingForFCLKChange = false; 4502 } 4503 } 4504 4505 if (MinActiveFCLKChangeMargin > 0) { 4506 *FCLKChangeSupport = dm_fclock_change_vactive; 4507 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4508 (PrefetchMode <= 1)) { 4509 *FCLKChangeSupport = dm_fclock_change_vblank; 4510 } else { 4511 *FCLKChangeSupport = dm_fclock_change_unsupported; 4512 } 4513 4514 *USRRetrainingSupport = true; 4515 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4516 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4517 (USRRetrainingLatencyMargin[k] < 0)) { 4518 *USRRetrainingSupport = false; 4519 } 4520 } 4521 4522 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4523 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && 4524 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && 4525 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && 4526 ActiveDRAMClockChangeLatencyMargin[k] < 0) { 4527 if (PrefetchMode > 0) { 4528 DRAMClockChangeSupportNumber = 2; 4529 } else if (DRAMClockChangeSupportNumber == 0) { 4530 DRAMClockChangeSupportNumber = 1; 4531 LastSurfaceWithoutMargin = k; 4532 } else if (DRAMClockChangeSupportNumber == 1 && 4533 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) { 4534 DRAMClockChangeSupportNumber = 2; 4535 } 4536 } 4537 } 4538 4539 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4540 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) 4541 DRAMClockChangeMethod = 1; 4542 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) 4543 DRAMClockChangeMethod = 2; 4544 } 4545 4546 if (DRAMClockChangeMethod == 0) { 4547 if (DRAMClockChangeSupportNumber == 0) 4548 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 4549 else if (DRAMClockChangeSupportNumber == 1) 4550 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 4551 else 4552 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4553 } else if (DRAMClockChangeMethod == 1) { 4554 if (DRAMClockChangeSupportNumber == 0) 4555 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; 4556 else if (DRAMClockChangeSupportNumber == 1) 4557 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; 4558 else 4559 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4560 } else { 4561 if (DRAMClockChangeSupportNumber == 0) 4562 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; 4563 else if (DRAMClockChangeSupportNumber == 1) 4564 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; 4565 else 4566 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4567 } 4568 4569 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) { 4570 unsigned int dst_y_pstate; 4571 unsigned int src_y_pstate_l; 4572 unsigned int src_y_pstate_c; 4573 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; 4574 4575 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1); 4576 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]); 4577 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k]; 4578 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k]; 4579 4580 #ifdef __DML_VBA_DEBUG__ 4581 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 4582 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 4583 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 4584 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 4585 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]); 4586 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); 4587 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); 4588 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); 4589 dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]); 4590 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); 4591 #endif 4592 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; 4593 4594 if (BytePerPixelDETC[k] > 0) { 4595 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]); 4596 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k]; 4597 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k]; 4598 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); 4599 4600 #ifdef __DML_VBA_DEBUG__ 4601 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); 4602 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); 4603 dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]); 4604 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); 4605 #endif 4606 } 4607 } 4608 #ifdef __DML_VBA_DEBUG__ 4609 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); 4610 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); 4611 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", 4612 __func__, *MinActiveFCLKChangeLatencySupported); 4613 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); 4614 #endif 4615 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport 4616 4617 double dml32_CalculateWriteBackDISPCLK( 4618 enum source_format_class WritebackPixelFormat, 4619 double PixelClock, 4620 double WritebackHRatio, 4621 double WritebackVRatio, 4622 unsigned int WritebackHTaps, 4623 unsigned int WritebackVTaps, 4624 unsigned int WritebackSourceWidth, 4625 unsigned int WritebackDestinationWidth, 4626 unsigned int HTotal, 4627 unsigned int WritebackLineBufferSize, 4628 double DISPCLKDPPCLKVCOSpeed) 4629 { 4630 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 4631 4632 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 4633 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 4634 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * 4635 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 4636 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); 4637 } 4638 4639 void dml32_CalculateMinAndMaxPrefetchMode( 4640 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, 4641 unsigned int *MinPrefetchMode, 4642 unsigned int *MaxPrefetchMode) 4643 { 4644 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { 4645 *MinPrefetchMode = 3; 4646 *MaxPrefetchMode = 3; 4647 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { 4648 *MinPrefetchMode = 2; 4649 *MaxPrefetchMode = 2; 4650 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { 4651 *MinPrefetchMode = 1; 4652 *MaxPrefetchMode = 1; 4653 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { 4654 *MinPrefetchMode = 0; 4655 *MaxPrefetchMode = 0; 4656 } else { 4657 *MinPrefetchMode = 0; 4658 *MaxPrefetchMode = 3; 4659 } 4660 } // CalculateMinAndMaxPrefetchMode 4661 4662 void dml32_CalculatePixelDeliveryTimes( 4663 unsigned int NumberOfActiveSurfaces, 4664 double VRatio[], 4665 double VRatioChroma[], 4666 double VRatioPrefetchY[], 4667 double VRatioPrefetchC[], 4668 unsigned int swath_width_luma_ub[], 4669 unsigned int swath_width_chroma_ub[], 4670 unsigned int DPPPerSurface[], 4671 double HRatio[], 4672 double HRatioChroma[], 4673 double PixelClock[], 4674 double PSCL_THROUGHPUT[], 4675 double PSCL_THROUGHPUT_CHROMA[], 4676 double Dppclk[], 4677 unsigned int BytePerPixelC[], 4678 enum dm_rotation_angle SourceRotation[], 4679 unsigned int NumberOfCursors[], 4680 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 4681 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 4682 unsigned int BlockWidth256BytesY[], 4683 unsigned int BlockHeight256BytesY[], 4684 unsigned int BlockWidth256BytesC[], 4685 unsigned int BlockHeight256BytesC[], 4686 4687 /* Output */ 4688 double DisplayPipeLineDeliveryTimeLuma[], 4689 double DisplayPipeLineDeliveryTimeChroma[], 4690 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 4691 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 4692 double DisplayPipeRequestDeliveryTimeLuma[], 4693 double DisplayPipeRequestDeliveryTimeChroma[], 4694 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 4695 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 4696 double CursorRequestDeliveryTime[], 4697 double CursorRequestDeliveryTimePrefetch[]) 4698 { 4699 double req_per_swath_ub; 4700 unsigned int k; 4701 4702 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4703 4704 #ifdef __DML_VBA_DEBUG__ 4705 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 4706 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 4707 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 4708 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 4709 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 4710 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 4711 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); 4712 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); 4713 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 4714 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); 4715 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); 4716 #endif 4717 4718 if (VRatio[k] <= 1) { 4719 DisplayPipeLineDeliveryTimeLuma[k] = 4720 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4721 } else { 4722 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4723 } 4724 4725 if (BytePerPixelC[k] == 0) { 4726 DisplayPipeLineDeliveryTimeChroma[k] = 0; 4727 } else { 4728 if (VRatioChroma[k] <= 1) { 4729 DisplayPipeLineDeliveryTimeChroma[k] = 4730 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4731 } else { 4732 DisplayPipeLineDeliveryTimeChroma[k] = 4733 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4734 } 4735 } 4736 4737 if (VRatioPrefetchY[k] <= 1) { 4738 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4739 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4740 } else { 4741 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4742 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4743 } 4744 4745 if (BytePerPixelC[k] == 0) { 4746 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 4747 } else { 4748 if (VRatioPrefetchC[k] <= 1) { 4749 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * 4750 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4751 } else { 4752 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 4753 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4754 } 4755 } 4756 #ifdef __DML_VBA_DEBUG__ 4757 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", 4758 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 4759 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", 4760 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 4761 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", 4762 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 4763 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", 4764 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 4765 #endif 4766 } 4767 4768 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4769 if (!IsVertical(SourceRotation[k])) 4770 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 4771 else 4772 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 4773 #ifdef __DML_VBA_DEBUG__ 4774 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); 4775 #endif 4776 4777 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 4778 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = 4779 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 4780 if (BytePerPixelC[k] == 0) { 4781 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 4782 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 4783 } else { 4784 if (!IsVertical(SourceRotation[k])) 4785 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 4786 else 4787 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 4788 #ifdef __DML_VBA_DEBUG__ 4789 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); 4790 #endif 4791 DisplayPipeRequestDeliveryTimeChroma[k] = 4792 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 4793 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 4794 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 4795 } 4796 #ifdef __DML_VBA_DEBUG__ 4797 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", 4798 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 4799 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", 4800 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 4801 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", 4802 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 4803 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", 4804 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 4805 #endif 4806 } 4807 4808 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4809 unsigned int cursor_req_per_width; 4810 4811 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / 4812 256.0 / 8.0, 1.0); 4813 if (NumberOfCursors[k] > 0) { 4814 if (VRatio[k] <= 1) { 4815 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4816 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4817 } else { 4818 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4819 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4820 } 4821 if (VRatioPrefetchY[k] <= 1) { 4822 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4823 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4824 } else { 4825 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4826 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4827 } 4828 } else { 4829 CursorRequestDeliveryTime[k] = 0; 4830 CursorRequestDeliveryTimePrefetch[k] = 0; 4831 } 4832 #ifdef __DML_VBA_DEBUG__ 4833 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", 4834 __func__, k, NumberOfCursors[k]); 4835 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", 4836 __func__, k, CursorRequestDeliveryTime[k]); 4837 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", 4838 __func__, k, CursorRequestDeliveryTimePrefetch[k]); 4839 #endif 4840 } 4841 } // CalculatePixelDeliveryTimes 4842 4843 void dml32_CalculateMetaAndPTETimes( 4844 bool use_one_row_for_frame[], 4845 unsigned int NumberOfActiveSurfaces, 4846 bool GPUVMEnable, 4847 unsigned int MetaChunkSize, 4848 unsigned int MinMetaChunkSizeBytes, 4849 unsigned int HTotal[], 4850 double VRatio[], 4851 double VRatioChroma[], 4852 double DestinationLinesToRequestRowInVBlank[], 4853 double DestinationLinesToRequestRowInImmediateFlip[], 4854 bool DCCEnable[], 4855 double PixelClock[], 4856 unsigned int BytePerPixelY[], 4857 unsigned int BytePerPixelC[], 4858 enum dm_rotation_angle SourceRotation[], 4859 unsigned int dpte_row_height[], 4860 unsigned int dpte_row_height_chroma[], 4861 unsigned int meta_row_width[], 4862 unsigned int meta_row_width_chroma[], 4863 unsigned int meta_row_height[], 4864 unsigned int meta_row_height_chroma[], 4865 unsigned int meta_req_width[], 4866 unsigned int meta_req_width_chroma[], 4867 unsigned int meta_req_height[], 4868 unsigned int meta_req_height_chroma[], 4869 unsigned int dpte_group_bytes[], 4870 unsigned int PTERequestSizeY[], 4871 unsigned int PTERequestSizeC[], 4872 unsigned int PixelPTEReqWidthY[], 4873 unsigned int PixelPTEReqHeightY[], 4874 unsigned int PixelPTEReqWidthC[], 4875 unsigned int PixelPTEReqHeightC[], 4876 unsigned int dpte_row_width_luma_ub[], 4877 unsigned int dpte_row_width_chroma_ub[], 4878 4879 /* Output */ 4880 double DST_Y_PER_PTE_ROW_NOM_L[], 4881 double DST_Y_PER_PTE_ROW_NOM_C[], 4882 double DST_Y_PER_META_ROW_NOM_L[], 4883 double DST_Y_PER_META_ROW_NOM_C[], 4884 double TimePerMetaChunkNominal[], 4885 double TimePerChromaMetaChunkNominal[], 4886 double TimePerMetaChunkVBlank[], 4887 double TimePerChromaMetaChunkVBlank[], 4888 double TimePerMetaChunkFlip[], 4889 double TimePerChromaMetaChunkFlip[], 4890 double time_per_pte_group_nom_luma[], 4891 double time_per_pte_group_vblank_luma[], 4892 double time_per_pte_group_flip_luma[], 4893 double time_per_pte_group_nom_chroma[], 4894 double time_per_pte_group_vblank_chroma[], 4895 double time_per_pte_group_flip_chroma[]) 4896 { 4897 unsigned int meta_chunk_width; 4898 unsigned int min_meta_chunk_width; 4899 unsigned int meta_chunk_per_row_int; 4900 unsigned int meta_row_remainder; 4901 unsigned int meta_chunk_threshold; 4902 unsigned int meta_chunks_per_row_ub; 4903 unsigned int meta_chunk_width_chroma; 4904 unsigned int min_meta_chunk_width_chroma; 4905 unsigned int meta_chunk_per_row_int_chroma; 4906 unsigned int meta_row_remainder_chroma; 4907 unsigned int meta_chunk_threshold_chroma; 4908 unsigned int meta_chunks_per_row_ub_chroma; 4909 unsigned int dpte_group_width_luma; 4910 unsigned int dpte_groups_per_row_luma_ub; 4911 unsigned int dpte_group_width_chroma; 4912 unsigned int dpte_groups_per_row_chroma_ub; 4913 unsigned int k; 4914 4915 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4916 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 4917 if (BytePerPixelC[k] == 0) 4918 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 4919 else 4920 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 4921 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 4922 if (BytePerPixelC[k] == 0) 4923 DST_Y_PER_META_ROW_NOM_C[k] = 0; 4924 else 4925 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 4926 } 4927 4928 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4929 if (DCCEnable[k] == true) { 4930 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 4931 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 4932 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 4933 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 4934 if (!IsVertical(SourceRotation[k])) 4935 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 4936 else 4937 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 4938 4939 if (meta_row_remainder <= meta_chunk_threshold) 4940 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 4941 else 4942 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 4943 4944 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * 4945 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4946 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4947 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4948 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4949 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4950 if (BytePerPixelC[k] == 0) { 4951 TimePerChromaMetaChunkNominal[k] = 0; 4952 TimePerChromaMetaChunkVBlank[k] = 0; 4953 TimePerChromaMetaChunkFlip[k] = 0; 4954 } else { 4955 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / 4956 meta_row_height_chroma[k]; 4957 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / 4958 meta_row_height_chroma[k]; 4959 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / 4960 meta_chunk_width_chroma; 4961 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 4962 if (!IsVertical(SourceRotation[k])) { 4963 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4964 meta_req_width_chroma[k]; 4965 } else { 4966 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4967 meta_req_height_chroma[k]; 4968 } 4969 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) 4970 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 4971 else 4972 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 4973 4974 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * 4975 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4976 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4977 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4978 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4979 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4980 } 4981 } else { 4982 TimePerMetaChunkNominal[k] = 0; 4983 TimePerMetaChunkVBlank[k] = 0; 4984 TimePerMetaChunkFlip[k] = 0; 4985 TimePerChromaMetaChunkNominal[k] = 0; 4986 TimePerChromaMetaChunkVBlank[k] = 0; 4987 TimePerChromaMetaChunkFlip[k] = 0; 4988 } 4989 } 4990 4991 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4992 if (GPUVMEnable == true) { 4993 if (!IsVertical(SourceRotation[k])) { 4994 dpte_group_width_luma = (double) dpte_group_bytes[k] / 4995 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 4996 } else { 4997 dpte_group_width_luma = (double) dpte_group_bytes[k] / 4998 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 4999 } 5000 5001 if (use_one_row_for_frame[k]) { 5002 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5003 (double) dpte_group_width_luma / 2.0, 1.0); 5004 } else { 5005 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 5006 (double) dpte_group_width_luma, 1.0); 5007 } 5008 #ifdef __DML_VBA_DEBUG__ 5009 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", 5010 __func__, k, use_one_row_for_frame[k]); 5011 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", 5012 __func__, k, dpte_group_bytes[k]); 5013 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", 5014 __func__, k, PTERequestSizeY[k]); 5015 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", 5016 __func__, k, PixelPTEReqWidthY[k]); 5017 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", 5018 __func__, k, PixelPTEReqHeightY[k]); 5019 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", 5020 __func__, k, dpte_row_width_luma_ub[k]); 5021 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", 5022 __func__, k, dpte_group_width_luma); 5023 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", 5024 __func__, k, dpte_groups_per_row_luma_ub); 5025 #endif 5026 5027 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * 5028 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5029 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * 5030 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5031 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5032 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 5033 if (BytePerPixelC[k] == 0) { 5034 time_per_pte_group_nom_chroma[k] = 0; 5035 time_per_pte_group_vblank_chroma[k] = 0; 5036 time_per_pte_group_flip_chroma[k] = 0; 5037 } else { 5038 if (!IsVertical(SourceRotation[k])) { 5039 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5040 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 5041 } else { 5042 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 5043 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 5044 } 5045 5046 if (use_one_row_for_frame[k]) { 5047 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5048 (double) dpte_group_width_chroma / 2.0, 1.0); 5049 } else { 5050 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 5051 (double) dpte_group_width_chroma, 1.0); 5052 } 5053 #ifdef __DML_VBA_DEBUG__ 5054 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", 5055 __func__, k, dpte_row_width_chroma_ub[k]); 5056 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", 5057 __func__, k, dpte_group_width_chroma); 5058 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", 5059 __func__, k, dpte_groups_per_row_chroma_ub); 5060 #endif 5061 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * 5062 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5063 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * 5064 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5065 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5066 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5067 } 5068 } else { 5069 time_per_pte_group_nom_luma[k] = 0; 5070 time_per_pte_group_vblank_luma[k] = 0; 5071 time_per_pte_group_flip_luma[k] = 0; 5072 time_per_pte_group_nom_chroma[k] = 0; 5073 time_per_pte_group_vblank_chroma[k] = 0; 5074 time_per_pte_group_flip_chroma[k] = 0; 5075 } 5076 #ifdef __DML_VBA_DEBUG__ 5077 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", 5078 __func__, k, DestinationLinesToRequestRowInVBlank[k]); 5079 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", 5080 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); 5081 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", 5082 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); 5083 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", 5084 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); 5085 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", 5086 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); 5087 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", 5088 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); 5089 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", 5090 __func__, k, TimePerMetaChunkNominal[k]); 5091 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", 5092 __func__, k, TimePerMetaChunkVBlank[k]); 5093 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", 5094 __func__, k, TimePerMetaChunkFlip[k]); 5095 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", 5096 __func__, k, TimePerChromaMetaChunkNominal[k]); 5097 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", 5098 __func__, k, TimePerChromaMetaChunkVBlank[k]); 5099 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", 5100 __func__, k, TimePerChromaMetaChunkFlip[k]); 5101 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", 5102 __func__, k, time_per_pte_group_nom_luma[k]); 5103 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", 5104 __func__, k, time_per_pte_group_vblank_luma[k]); 5105 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", 5106 __func__, k, time_per_pte_group_flip_luma[k]); 5107 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", 5108 __func__, k, time_per_pte_group_nom_chroma[k]); 5109 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", 5110 __func__, k, time_per_pte_group_vblank_chroma[k]); 5111 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", 5112 __func__, k, time_per_pte_group_flip_chroma[k]); 5113 #endif 5114 } 5115 } // CalculateMetaAndPTETimes 5116 5117 void dml32_CalculateVMGroupAndRequestTimes( 5118 unsigned int NumberOfActiveSurfaces, 5119 bool GPUVMEnable, 5120 unsigned int GPUVMMaxPageTableLevels, 5121 unsigned int HTotal[], 5122 unsigned int BytePerPixelC[], 5123 double DestinationLinesToRequestVMInVBlank[], 5124 double DestinationLinesToRequestVMInImmediateFlip[], 5125 bool DCCEnable[], 5126 double PixelClock[], 5127 unsigned int dpte_row_width_luma_ub[], 5128 unsigned int dpte_row_width_chroma_ub[], 5129 unsigned int vm_group_bytes[], 5130 unsigned int dpde0_bytes_per_frame_ub_l[], 5131 unsigned int dpde0_bytes_per_frame_ub_c[], 5132 unsigned int meta_pte_bytes_per_frame_ub_l[], 5133 unsigned int meta_pte_bytes_per_frame_ub_c[], 5134 5135 /* Output */ 5136 double TimePerVMGroupVBlank[], 5137 double TimePerVMGroupFlip[], 5138 double TimePerVMRequestVBlank[], 5139 double TimePerVMRequestFlip[]) 5140 { 5141 unsigned int k; 5142 unsigned int num_group_per_lower_vm_stage; 5143 unsigned int num_req_per_lower_vm_stage; 5144 5145 #ifdef __DML_VBA_DEBUG__ 5146 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 5147 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 5148 #endif 5149 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5150 5151 #ifdef __DML_VBA_DEBUG__ 5152 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); 5153 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); 5154 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", 5155 __func__, k, dpde0_bytes_per_frame_ub_l[k]); 5156 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", 5157 __func__, k, dpde0_bytes_per_frame_ub_c[k]); 5158 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", 5159 __func__, k, meta_pte_bytes_per_frame_ub_l[k]); 5160 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", 5161 __func__, k, meta_pte_bytes_per_frame_ub_c[k]); 5162 #endif 5163 5164 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 5165 if (DCCEnable[k] == false) { 5166 if (BytePerPixelC[k] > 0) { 5167 num_group_per_lower_vm_stage = dml_ceil( 5168 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5169 (double) (vm_group_bytes[k]), 1.0) + 5170 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5171 (double) (vm_group_bytes[k]), 1.0); 5172 } else { 5173 num_group_per_lower_vm_stage = dml_ceil( 5174 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5175 (double) (vm_group_bytes[k]), 1.0); 5176 } 5177 } else { 5178 if (GPUVMMaxPageTableLevels == 1) { 5179 if (BytePerPixelC[k] > 0) { 5180 num_group_per_lower_vm_stage = dml_ceil( 5181 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5182 (double) (vm_group_bytes[k]), 1.0) + 5183 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5184 (double) (vm_group_bytes[k]), 1.0); 5185 } else { 5186 num_group_per_lower_vm_stage = dml_ceil( 5187 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5188 (double) (vm_group_bytes[k]), 1.0); 5189 } 5190 } else { 5191 if (BytePerPixelC[k] > 0) { 5192 num_group_per_lower_vm_stage = 2 + dml_ceil( 5193 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5194 (double) (vm_group_bytes[k]), 1) + 5195 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5196 (double) (vm_group_bytes[k]), 1) + 5197 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / 5198 (double) (vm_group_bytes[k]), 1) + 5199 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5200 (double) (vm_group_bytes[k]), 1); 5201 } else { 5202 num_group_per_lower_vm_stage = 1 + dml_ceil( 5203 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5204 (double) (vm_group_bytes[k]), 1) + dml_ceil( 5205 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5206 (double) (vm_group_bytes[k]), 1); 5207 } 5208 } 5209 } 5210 5211 if (DCCEnable[k] == false) { 5212 if (BytePerPixelC[k] > 0) { 5213 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + 5214 dpde0_bytes_per_frame_ub_c[k] / 64; 5215 } else { 5216 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 5217 } 5218 } else { 5219 if (GPUVMMaxPageTableLevels == 1) { 5220 if (BytePerPixelC[k] > 0) { 5221 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + 5222 meta_pte_bytes_per_frame_ub_c[k] / 64; 5223 } else { 5224 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 5225 } 5226 } else { 5227 if (BytePerPixelC[k] > 0) { 5228 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5229 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + 5230 meta_pte_bytes_per_frame_ub_l[k] / 64 + 5231 meta_pte_bytes_per_frame_ub_c[k] / 64; 5232 } else { 5233 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5234 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 5235 } 5236 } 5237 } 5238 5239 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5240 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5241 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5242 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5243 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5244 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5245 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5246 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5247 5248 if (GPUVMMaxPageTableLevels > 2) { 5249 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 5250 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 5251 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 5252 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 5253 } 5254 5255 } else { 5256 TimePerVMGroupVBlank[k] = 0; 5257 TimePerVMGroupFlip[k] = 0; 5258 TimePerVMRequestVBlank[k] = 0; 5259 TimePerVMRequestFlip[k] = 0; 5260 } 5261 5262 #ifdef __DML_VBA_DEBUG__ 5263 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); 5264 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); 5265 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); 5266 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); 5267 #endif 5268 } 5269 } // CalculateVMGroupAndRequestTimes 5270 5271 void dml32_CalculateDCCConfiguration( 5272 bool DCCEnabled, 5273 bool DCCProgrammingAssumesScanDirectionUnknown, 5274 enum source_format_class SourcePixelFormat, 5275 unsigned int SurfaceWidthLuma, 5276 unsigned int SurfaceWidthChroma, 5277 unsigned int SurfaceHeightLuma, 5278 unsigned int SurfaceHeightChroma, 5279 unsigned int nomDETInKByte, 5280 unsigned int RequestHeight256ByteLuma, 5281 unsigned int RequestHeight256ByteChroma, 5282 enum dm_swizzle_mode TilingFormat, 5283 unsigned int BytePerPixelY, 5284 unsigned int BytePerPixelC, 5285 double BytePerPixelDETY, 5286 double BytePerPixelDETC, 5287 enum dm_rotation_angle SourceRotation, 5288 /* Output */ 5289 unsigned int *MaxUncompressedBlockLuma, 5290 unsigned int *MaxUncompressedBlockChroma, 5291 unsigned int *MaxCompressedBlockLuma, 5292 unsigned int *MaxCompressedBlockChroma, 5293 unsigned int *IndependentBlockLuma, 5294 unsigned int *IndependentBlockChroma) 5295 { 5296 typedef enum { 5297 REQ_256Bytes, 5298 REQ_128BytesNonContiguous, 5299 REQ_128BytesContiguous, 5300 REQ_NA 5301 } RequestType; 5302 5303 RequestType RequestLuma; 5304 RequestType RequestChroma; 5305 5306 unsigned int segment_order_horz_contiguous_luma; 5307 unsigned int segment_order_horz_contiguous_chroma; 5308 unsigned int segment_order_vert_contiguous_luma; 5309 unsigned int segment_order_vert_contiguous_chroma; 5310 unsigned int req128_horz_wc_l; 5311 unsigned int req128_horz_wc_c; 5312 unsigned int req128_vert_wc_l; 5313 unsigned int req128_vert_wc_c; 5314 unsigned int MAS_vp_horz_limit; 5315 unsigned int MAS_vp_vert_limit; 5316 unsigned int max_vp_horz_width; 5317 unsigned int max_vp_vert_height; 5318 unsigned int eff_surf_width_l; 5319 unsigned int eff_surf_width_c; 5320 unsigned int eff_surf_height_l; 5321 unsigned int eff_surf_height_c; 5322 unsigned int full_swath_bytes_horz_wc_l; 5323 unsigned int full_swath_bytes_horz_wc_c; 5324 unsigned int full_swath_bytes_vert_wc_l; 5325 unsigned int full_swath_bytes_vert_wc_c; 5326 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; 5327 5328 unsigned int yuv420; 5329 unsigned int horz_div_l; 5330 unsigned int horz_div_c; 5331 unsigned int vert_div_l; 5332 unsigned int vert_div_c; 5333 5334 unsigned int swath_buf_size; 5335 double detile_buf_vp_horz_limit; 5336 double detile_buf_vp_vert_limit; 5337 5338 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || 5339 SourcePixelFormat == dm_420_12) ? 1 : 0); 5340 horz_div_l = 1; 5341 horz_div_c = 1; 5342 vert_div_l = 1; 5343 vert_div_c = 1; 5344 5345 if (BytePerPixelY == 1) 5346 vert_div_l = 0; 5347 if (BytePerPixelC == 1) 5348 vert_div_c = 0; 5349 5350 if (BytePerPixelC == 0) { 5351 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; 5352 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5353 BytePerPixelY / (1 + horz_div_l)); 5354 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5355 (1 + vert_div_l)); 5356 } else { 5357 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; 5358 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5359 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * 5360 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 5361 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5362 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / 5363 (1 + vert_div_c) / (1 + yuv420)); 5364 } 5365 5366 if (SourcePixelFormat == dm_420_10) { 5367 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 5368 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 5369 } 5370 5371 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 5372 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 5373 5374 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; 5375 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); 5376 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 5377 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 5378 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 5379 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 5380 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 5381 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 5382 5383 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 5384 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 5385 if (BytePerPixelC > 0) { 5386 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 5387 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 5388 } else { 5389 full_swath_bytes_horz_wc_c = 0; 5390 full_swath_bytes_vert_wc_c = 0; 5391 } 5392 5393 if (SourcePixelFormat == dm_420_10) { 5394 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); 5395 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); 5396 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); 5397 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); 5398 } 5399 5400 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5401 req128_horz_wc_l = 0; 5402 req128_horz_wc_c = 0; 5403 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + 5404 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5405 req128_horz_wc_l = 0; 5406 req128_horz_wc_c = 1; 5407 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * 5408 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5409 req128_horz_wc_l = 1; 5410 req128_horz_wc_c = 0; 5411 } else { 5412 req128_horz_wc_l = 1; 5413 req128_horz_wc_c = 1; 5414 } 5415 5416 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5417 req128_vert_wc_l = 0; 5418 req128_vert_wc_c = 0; 5419 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * 5420 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5421 req128_vert_wc_l = 0; 5422 req128_vert_wc_c = 1; 5423 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && 5424 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5425 req128_vert_wc_l = 1; 5426 req128_vert_wc_c = 0; 5427 } else { 5428 req128_vert_wc_l = 1; 5429 req128_vert_wc_c = 1; 5430 } 5431 5432 if (BytePerPixelY == 2) { 5433 segment_order_horz_contiguous_luma = 0; 5434 segment_order_vert_contiguous_luma = 1; 5435 } else { 5436 segment_order_horz_contiguous_luma = 1; 5437 segment_order_vert_contiguous_luma = 0; 5438 } 5439 5440 if (BytePerPixelC == 2) { 5441 segment_order_horz_contiguous_chroma = 0; 5442 segment_order_vert_contiguous_chroma = 1; 5443 } else { 5444 segment_order_horz_contiguous_chroma = 1; 5445 segment_order_vert_contiguous_chroma = 0; 5446 } 5447 #ifdef __DML_VBA_DEBUG__ 5448 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); 5449 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 5450 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); 5451 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); 5452 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); 5453 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); 5454 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); 5455 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); 5456 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", 5457 __func__, segment_order_horz_contiguous_chroma); 5458 #endif 5459 5460 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 5461 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) 5462 RequestLuma = REQ_256Bytes; 5463 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || 5464 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) 5465 RequestLuma = REQ_128BytesNonContiguous; 5466 else 5467 RequestLuma = REQ_128BytesContiguous; 5468 5469 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) 5470 RequestChroma = REQ_256Bytes; 5471 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || 5472 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) 5473 RequestChroma = REQ_128BytesNonContiguous; 5474 else 5475 RequestChroma = REQ_128BytesContiguous; 5476 5477 } else if (!IsVertical(SourceRotation)) { 5478 if (req128_horz_wc_l == 0) 5479 RequestLuma = REQ_256Bytes; 5480 else if (segment_order_horz_contiguous_luma == 0) 5481 RequestLuma = REQ_128BytesNonContiguous; 5482 else 5483 RequestLuma = REQ_128BytesContiguous; 5484 5485 if (req128_horz_wc_c == 0) 5486 RequestChroma = REQ_256Bytes; 5487 else if (segment_order_horz_contiguous_chroma == 0) 5488 RequestChroma = REQ_128BytesNonContiguous; 5489 else 5490 RequestChroma = REQ_128BytesContiguous; 5491 5492 } else { 5493 if (req128_vert_wc_l == 0) 5494 RequestLuma = REQ_256Bytes; 5495 else if (segment_order_vert_contiguous_luma == 0) 5496 RequestLuma = REQ_128BytesNonContiguous; 5497 else 5498 RequestLuma = REQ_128BytesContiguous; 5499 5500 if (req128_vert_wc_c == 0) 5501 RequestChroma = REQ_256Bytes; 5502 else if (segment_order_vert_contiguous_chroma == 0) 5503 RequestChroma = REQ_128BytesNonContiguous; 5504 else 5505 RequestChroma = REQ_128BytesContiguous; 5506 } 5507 5508 if (RequestLuma == REQ_256Bytes) { 5509 *MaxUncompressedBlockLuma = 256; 5510 *MaxCompressedBlockLuma = 256; 5511 *IndependentBlockLuma = 0; 5512 } else if (RequestLuma == REQ_128BytesContiguous) { 5513 *MaxUncompressedBlockLuma = 256; 5514 *MaxCompressedBlockLuma = 128; 5515 *IndependentBlockLuma = 128; 5516 } else { 5517 *MaxUncompressedBlockLuma = 256; 5518 *MaxCompressedBlockLuma = 64; 5519 *IndependentBlockLuma = 64; 5520 } 5521 5522 if (RequestChroma == REQ_256Bytes) { 5523 *MaxUncompressedBlockChroma = 256; 5524 *MaxCompressedBlockChroma = 256; 5525 *IndependentBlockChroma = 0; 5526 } else if (RequestChroma == REQ_128BytesContiguous) { 5527 *MaxUncompressedBlockChroma = 256; 5528 *MaxCompressedBlockChroma = 128; 5529 *IndependentBlockChroma = 128; 5530 } else { 5531 *MaxUncompressedBlockChroma = 256; 5532 *MaxCompressedBlockChroma = 64; 5533 *IndependentBlockChroma = 64; 5534 } 5535 5536 if (DCCEnabled != true || BytePerPixelC == 0) { 5537 *MaxUncompressedBlockChroma = 0; 5538 *MaxCompressedBlockChroma = 0; 5539 *IndependentBlockChroma = 0; 5540 } 5541 5542 if (DCCEnabled != true) { 5543 *MaxUncompressedBlockLuma = 0; 5544 *MaxCompressedBlockLuma = 0; 5545 *IndependentBlockLuma = 0; 5546 } 5547 5548 #ifdef __DML_VBA_DEBUG__ 5549 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); 5550 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); 5551 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); 5552 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); 5553 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); 5554 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); 5555 #endif 5556 5557 } // CalculateDCCConfiguration 5558 5559 void dml32_CalculateStutterEfficiency( 5560 unsigned int CompressedBufferSizeInkByte, 5561 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 5562 bool UnboundedRequestEnabled, 5563 unsigned int MetaFIFOSizeInKEntries, 5564 unsigned int ZeroSizeBufferEntries, 5565 unsigned int PixelChunkSizeInKByte, 5566 unsigned int NumberOfActiveSurfaces, 5567 unsigned int ROBBufferSizeInKByte, 5568 double TotalDataReadBandwidth, 5569 double DCFCLK, 5570 double ReturnBW, 5571 unsigned int CompbufReservedSpace64B, 5572 unsigned int CompbufReservedSpaceZs, 5573 double SRExitTime, 5574 double SRExitZ8Time, 5575 bool SynchronizeTimingsFinal, 5576 unsigned int BlendingAndTiming[], 5577 double StutterEnterPlusExitWatermark, 5578 double Z8StutterEnterPlusExitWatermark, 5579 bool ProgressiveToInterlaceUnitInOPP, 5580 bool Interlace[], 5581 double MinTTUVBlank[], 5582 unsigned int DPPPerSurface[], 5583 unsigned int DETBufferSizeY[], 5584 unsigned int BytePerPixelY[], 5585 double BytePerPixelDETY[], 5586 double SwathWidthY[], 5587 unsigned int SwathHeightY[], 5588 unsigned int SwathHeightC[], 5589 double NetDCCRateLuma[], 5590 double NetDCCRateChroma[], 5591 double DCCFractionOfZeroSizeRequestsLuma[], 5592 double DCCFractionOfZeroSizeRequestsChroma[], 5593 unsigned int HTotal[], 5594 unsigned int VTotal[], 5595 double PixelClock[], 5596 double VRatio[], 5597 enum dm_rotation_angle SourceRotation[], 5598 unsigned int BlockHeight256BytesY[], 5599 unsigned int BlockWidth256BytesY[], 5600 unsigned int BlockHeight256BytesC[], 5601 unsigned int BlockWidth256BytesC[], 5602 unsigned int DCCYMaxUncompressedBlock[], 5603 unsigned int DCCCMaxUncompressedBlock[], 5604 unsigned int VActive[], 5605 bool DCCEnable[], 5606 bool WritebackEnable[], 5607 double ReadBandwidthSurfaceLuma[], 5608 double ReadBandwidthSurfaceChroma[], 5609 double meta_row_bw[], 5610 double dpte_row_bw[], 5611 5612 /* Output */ 5613 double *StutterEfficiencyNotIncludingVBlank, 5614 double *StutterEfficiency, 5615 unsigned int *NumberOfStutterBurstsPerFrame, 5616 double *Z8StutterEfficiencyNotIncludingVBlank, 5617 double *Z8StutterEfficiency, 5618 unsigned int *Z8NumberOfStutterBurstsPerFrame, 5619 double *StutterPeriod, 5620 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) 5621 { 5622 5623 bool FoundCriticalSurface = false; 5624 unsigned int SwathSizeCriticalSurface = 0; 5625 unsigned int LastChunkOfSwathSize; 5626 unsigned int MissingPartOfLastSwathOfDETSize; 5627 double LastZ8StutterPeriod = 0.0; 5628 double LastStutterPeriod = 0.0; 5629 unsigned int TotalNumberOfActiveOTG = 0; 5630 double doublePixelClock; 5631 unsigned int doubleHTotal; 5632 unsigned int doubleVTotal; 5633 bool SameTiming = true; 5634 double DETBufferingTimeY; 5635 double SwathWidthYCriticalSurface = 0.0; 5636 double SwathHeightYCriticalSurface = 0.0; 5637 double VActiveTimeCriticalSurface = 0.0; 5638 double FrameTimeCriticalSurface = 0.0; 5639 unsigned int BytePerPixelYCriticalSurface = 0; 5640 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; 5641 unsigned int DETBufferSizeYCriticalSurface = 0; 5642 double MinTTUVBlankCriticalSurface = 0.0; 5643 unsigned int BlockWidth256BytesYCriticalSurface = 0; 5644 bool doublePlaneCriticalSurface = 0; 5645 bool doublePipeCriticalSurface = 0; 5646 double TotalCompressedReadBandwidth; 5647 double TotalRowReadBandwidth; 5648 double AverageDCCCompressionRate; 5649 double EffectiveCompressedBufferSize; 5650 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 5651 double StutterBurstTime; 5652 unsigned int TotalActiveWriteback; 5653 double LinesInDETY; 5654 double LinesInDETYRoundedDownToSwath; 5655 double MaximumEffectiveCompressionLuma; 5656 double MaximumEffectiveCompressionChroma; 5657 double TotalZeroSizeRequestReadBandwidth; 5658 double TotalZeroSizeCompressedReadBandwidth; 5659 double AverageDCCZeroSizeFraction; 5660 double AverageZeroSizeCompressionRate; 5661 unsigned int k; 5662 5663 TotalZeroSizeRequestReadBandwidth = 0; 5664 TotalZeroSizeCompressedReadBandwidth = 0; 5665 TotalRowReadBandwidth = 0; 5666 TotalCompressedReadBandwidth = 0; 5667 5668 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5669 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5670 if (DCCEnable[k] == true) { 5671 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) 5672 || (!IsVertical(SourceRotation[k]) 5673 && BlockHeight256BytesY[k] > SwathHeightY[k]) 5674 || DCCYMaxUncompressedBlock[k] < 256) { 5675 MaximumEffectiveCompressionLuma = 2; 5676 } else { 5677 MaximumEffectiveCompressionLuma = 4; 5678 } 5679 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5680 + ReadBandwidthSurfaceLuma[k] 5681 / dml_min(NetDCCRateLuma[k], 5682 MaximumEffectiveCompressionLuma); 5683 #ifdef __DML_VBA_DEBUG__ 5684 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5685 __func__, k, ReadBandwidthSurfaceLuma[k]); 5686 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", 5687 __func__, k, NetDCCRateLuma[k]); 5688 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", 5689 __func__, k, MaximumEffectiveCompressionLuma); 5690 #endif 5691 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5692 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 5693 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5694 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] 5695 / MaximumEffectiveCompressionLuma; 5696 5697 if (ReadBandwidthSurfaceChroma[k] > 0) { 5698 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) 5699 || (!IsVertical(SourceRotation[k]) 5700 && BlockHeight256BytesC[k] > SwathHeightC[k]) 5701 || DCCCMaxUncompressedBlock[k] < 256) { 5702 MaximumEffectiveCompressionChroma = 2; 5703 } else { 5704 MaximumEffectiveCompressionChroma = 4; 5705 } 5706 TotalCompressedReadBandwidth = 5707 TotalCompressedReadBandwidth 5708 + ReadBandwidthSurfaceChroma[k] 5709 / dml_min(NetDCCRateChroma[k], 5710 MaximumEffectiveCompressionChroma); 5711 #ifdef __DML_VBA_DEBUG__ 5712 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", 5713 __func__, k, ReadBandwidthSurfaceChroma[k]); 5714 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", 5715 __func__, k, NetDCCRateChroma[k]); 5716 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", 5717 __func__, k, MaximumEffectiveCompressionChroma); 5718 #endif 5719 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5720 + ReadBandwidthSurfaceChroma[k] 5721 * DCCFractionOfZeroSizeRequestsChroma[k]; 5722 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5723 + ReadBandwidthSurfaceChroma[k] 5724 * DCCFractionOfZeroSizeRequestsChroma[k] 5725 / MaximumEffectiveCompressionChroma; 5726 } 5727 } else { 5728 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5729 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; 5730 } 5731 TotalRowReadBandwidth = TotalRowReadBandwidth 5732 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); 5733 } 5734 } 5735 5736 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 5737 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 5738 5739 #ifdef __DML_VBA_DEBUG__ 5740 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 5741 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 5742 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 5743 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", 5744 __func__, TotalZeroSizeCompressedReadBandwidth); 5745 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 5746 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 5747 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5748 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 5749 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); 5750 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); 5751 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 5752 #endif 5753 if (AverageDCCZeroSizeFraction == 1) { 5754 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5755 / TotalZeroSizeCompressedReadBandwidth; 5756 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 5757 * AverageZeroSizeCompressionRate 5758 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5759 * AverageZeroSizeCompressionRate; 5760 } else if (AverageDCCZeroSizeFraction > 0) { 5761 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5762 / TotalZeroSizeCompressedReadBandwidth; 5763 EffectiveCompressedBufferSize = dml_min( 5764 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5765 (double) MetaFIFOSizeInKEntries * 1024 * 64 5766 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate 5767 + 1 / AverageDCCCompressionRate)) 5768 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5769 * AverageDCCCompressionRate, 5770 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5771 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5772 5773 #ifdef __DML_VBA_DEBUG__ 5774 dml_print("DML::%s: min 1 = %f\n", __func__, 5775 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5776 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / 5777 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / 5778 AverageDCCCompressionRate)); 5779 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - 5780 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); 5781 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / 5782 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5783 #endif 5784 } else { 5785 EffectiveCompressedBufferSize = dml_min( 5786 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5787 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) 5788 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5789 * AverageDCCCompressionRate; 5790 5791 #ifdef __DML_VBA_DEBUG__ 5792 dml_print("DML::%s: min 1 = %f\n", __func__, 5793 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5794 dml_print("DML::%s: min 2 = %f\n", __func__, 5795 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 5796 #endif 5797 } 5798 5799 #ifdef __DML_VBA_DEBUG__ 5800 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 5801 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 5802 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5803 #endif 5804 5805 *StutterPeriod = 0; 5806 5807 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5808 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5809 LinesInDETY = ((double) DETBufferSizeY[k] 5810 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) 5811 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) 5812 / BytePerPixelDETY[k] / SwathWidthY[k]; 5813 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 5814 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) 5815 / VRatio[k]; 5816 #ifdef __DML_VBA_DEBUG__ 5817 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 5818 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 5819 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 5820 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5821 __func__, k, ReadBandwidthSurfaceLuma[k]); 5822 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 5823 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); 5824 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", 5825 __func__, k, LinesInDETYRoundedDownToSwath); 5826 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); 5827 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5828 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); 5829 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 5830 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5831 #endif 5832 5833 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { 5834 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 5835 5836 FoundCriticalSurface = true; 5837 *StutterPeriod = DETBufferingTimeY; 5838 FrameTimeCriticalSurface = ( 5839 isInterlaceTiming ? 5840 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) 5841 * (double) HTotal[k] / PixelClock[k]; 5842 VActiveTimeCriticalSurface = ( 5843 isInterlaceTiming ? 5844 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) 5845 * (double) HTotal[k] / PixelClock[k]; 5846 BytePerPixelYCriticalSurface = BytePerPixelY[k]; 5847 SwathWidthYCriticalSurface = SwathWidthY[k]; 5848 SwathHeightYCriticalSurface = SwathHeightY[k]; 5849 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; 5850 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] 5851 - (LinesInDETY - LinesInDETYRoundedDownToSwath); 5852 DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; 5853 MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; 5854 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); 5855 doublePipeCriticalSurface = (DPPPerSurface[k] == 1); 5856 5857 #ifdef __DML_VBA_DEBUG__ 5858 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", 5859 __func__, k, FoundCriticalSurface); 5860 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", 5861 __func__, k, *StutterPeriod); 5862 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", 5863 __func__, k, MinTTUVBlankCriticalSurface); 5864 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", 5865 __func__, k, FrameTimeCriticalSurface); 5866 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", 5867 __func__, k, VActiveTimeCriticalSurface); 5868 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", 5869 __func__, k, BytePerPixelYCriticalSurface); 5870 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", 5871 __func__, k, SwathWidthYCriticalSurface); 5872 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", 5873 __func__, k, SwathHeightYCriticalSurface); 5874 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", 5875 __func__, k, BlockWidth256BytesYCriticalSurface); 5876 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", 5877 __func__, k, doublePlaneCriticalSurface); 5878 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", 5879 __func__, k, doublePipeCriticalSurface); 5880 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", 5881 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); 5882 #endif 5883 } 5884 } 5885 } 5886 5887 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, 5888 EffectiveCompressedBufferSize); 5889 #ifdef __DML_VBA_DEBUG__ 5890 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 5891 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5892 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5893 __func__, *StutterPeriod * TotalDataReadBandwidth); 5894 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5895 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, 5896 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 5897 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 5898 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 5899 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 5900 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 5901 #endif 5902 5903 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate 5904 / ReturnBW 5905 + (*StutterPeriod * TotalDataReadBandwidth 5906 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 5907 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 5908 #ifdef __DML_VBA_DEBUG__ 5909 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 5910 AverageDCCCompressionRate / ReturnBW); 5911 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5912 __func__, (*StutterPeriod * TotalDataReadBandwidth)); 5913 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - 5914 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 5915 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 5916 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 5917 #endif 5918 StutterBurstTime = dml_max(StutterBurstTime, 5919 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface 5920 * SwathWidthYCriticalSurface / ReturnBW); 5921 5922 #ifdef __DML_VBA_DEBUG__ 5923 dml_print("DML::%s: Time to finish residue swath=%f\n", 5924 __func__, 5925 LinesToFinishSwathTransferStutterCriticalSurface * 5926 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); 5927 #endif 5928 5929 TotalActiveWriteback = 0; 5930 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5931 if (WritebackEnable[k]) 5932 TotalActiveWriteback = TotalActiveWriteback + 1; 5933 } 5934 5935 if (TotalActiveWriteback == 0) { 5936 #ifdef __DML_VBA_DEBUG__ 5937 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 5938 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 5939 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 5940 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 5941 #endif 5942 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 5943 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 5944 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 5945 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 5946 *NumberOfStutterBurstsPerFrame = ( 5947 *StutterEfficiencyNotIncludingVBlank > 0 ? 5948 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5949 *Z8NumberOfStutterBurstsPerFrame = ( 5950 *Z8StutterEfficiencyNotIncludingVBlank > 0 ? 5951 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5952 } else { 5953 *StutterEfficiencyNotIncludingVBlank = 0.; 5954 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 5955 *NumberOfStutterBurstsPerFrame = 0; 5956 *Z8NumberOfStutterBurstsPerFrame = 0; 5957 } 5958 #ifdef __DML_VBA_DEBUG__ 5959 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); 5960 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 5961 __func__, *StutterEfficiencyNotIncludingVBlank); 5962 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", 5963 __func__, *Z8StutterEfficiencyNotIncludingVBlank); 5964 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 5965 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 5966 #endif 5967 5968 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5969 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5970 if (BlendingAndTiming[k] == k) { 5971 if (TotalNumberOfActiveOTG == 0) { 5972 doublePixelClock = PixelClock[k]; 5973 doubleHTotal = HTotal[k]; 5974 doubleVTotal = VTotal[k]; 5975 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] 5976 || doubleVTotal != VTotal[k]) { 5977 SameTiming = false; 5978 } 5979 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 5980 } 5981 } 5982 } 5983 5984 if (*StutterEfficiencyNotIncludingVBlank > 0) { 5985 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 5986 5987 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming 5988 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { 5989 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime 5990 + StutterBurstTime * VActiveTimeCriticalSurface 5991 / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 5992 } else { 5993 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 5994 } 5995 } else { 5996 *StutterEfficiency = 0; 5997 } 5998 5999 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6000 LastZ8StutterPeriod = VActiveTimeCriticalSurface 6001 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6002 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + 6003 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { 6004 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime 6005 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 6006 } else { 6007 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6008 } 6009 } else { 6010 *Z8StutterEfficiency = 0.; 6011 } 6012 6013 #ifdef __DML_VBA_DEBUG__ 6014 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6015 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6016 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6017 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6018 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6019 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6020 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 6021 __func__, *StutterEfficiencyNotIncludingVBlank); 6022 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6023 #endif 6024 6025 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface 6026 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); 6027 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); 6028 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) 6029 - DETBufferSizeYCriticalSurface; 6030 6031 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) 6032 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) 6033 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) 6034 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); 6035 6036 #ifdef __DML_VBA_DEBUG__ 6037 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); 6038 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); 6039 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); 6040 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 6041 #endif 6042 } // CalculateStutterEfficiency 6043 6044 void dml32_CalculateMaxDETAndMinCompressedBufferSize( 6045 unsigned int ConfigReturnBufferSizeInKByte, 6046 unsigned int ROBBufferSizeInKByte, 6047 unsigned int MaxNumDPP, 6048 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size 6049 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA 6050 6051 /* Output */ 6052 unsigned int *MaxTotalDETInKByte, 6053 unsigned int *nomDETInKByte, 6054 unsigned int *MinCompressedBufferSizeInKByte) 6055 { 6056 bool det_buff_size_override_en = nomDETInKByteOverrideEnable; 6057 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; 6058 6059 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + 6060 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); 6061 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); 6062 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; 6063 6064 #ifdef __DML_VBA_DEBUG__ 6065 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); 6066 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); 6067 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); 6068 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); 6069 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); 6070 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); 6071 #endif 6072 6073 if (det_buff_size_override_en) { 6074 *nomDETInKByte = det_buff_size_override_val; 6075 #ifdef __DML_VBA_DEBUG__ 6076 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); 6077 #endif 6078 } 6079 } // CalculateMaxDETAndMinCompressedBufferSize 6080 6081 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, 6082 double ReturnBW, 6083 bool NotUrgentLatencyHiding[], 6084 double ReadBandwidthLuma[], 6085 double ReadBandwidthChroma[], 6086 double cursor_bw[], 6087 double meta_row_bandwidth[], 6088 double dpte_row_bandwidth[], 6089 unsigned int NumberOfDPP[], 6090 double UrgentBurstFactorLuma[], 6091 double UrgentBurstFactorChroma[], 6092 double UrgentBurstFactorCursor[]) 6093 { 6094 unsigned int k; 6095 bool NotEnoughUrgentLatencyHiding = false; 6096 bool CalculateVActiveBandwithSupport_val = false; 6097 double VActiveBandwith = 0; 6098 6099 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6100 if (NotUrgentLatencyHiding[k]) { 6101 NotEnoughUrgentLatencyHiding = true; 6102 } 6103 } 6104 6105 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6106 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; 6107 } 6108 6109 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6110 6111 #ifdef __DML_VBA_DEBUG__ 6112 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); 6113 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); 6114 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6115 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); 6116 #endif 6117 return CalculateVActiveBandwithSupport_val; 6118 } 6119 6120 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, 6121 double ReturnBW, 6122 bool NotUrgentLatencyHiding[], 6123 double ReadBandwidthLuma[], 6124 double ReadBandwidthChroma[], 6125 double PrefetchBandwidthLuma[], 6126 double PrefetchBandwidthChroma[], 6127 double cursor_bw[], 6128 double meta_row_bandwidth[], 6129 double dpte_row_bandwidth[], 6130 double cursor_bw_pre[], 6131 double prefetch_vmrow_bw[], 6132 unsigned int NumberOfDPP[], 6133 double UrgentBurstFactorLuma[], 6134 double UrgentBurstFactorChroma[], 6135 double UrgentBurstFactorCursor[], 6136 double UrgentBurstFactorLumaPre[], 6137 double UrgentBurstFactorChromaPre[], 6138 double UrgentBurstFactorCursorPre[], 6139 6140 /* output */ 6141 double *PrefetchBandwidth, 6142 double *FractionOfUrgentBandwidth, 6143 bool *PrefetchBandwidthSupport) 6144 { 6145 unsigned int k; 6146 bool NotEnoughUrgentLatencyHiding = false; 6147 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6148 if (NotUrgentLatencyHiding[k]) { 6149 NotEnoughUrgentLatencyHiding = true; 6150 } 6151 } 6152 6153 *PrefetchBandwidth = 0; 6154 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6155 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6156 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), 6157 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6158 } 6159 6160 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6161 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW; 6162 } 6163 6164 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, 6165 double ReturnBW, 6166 double ReadBandwidthLuma[], 6167 double ReadBandwidthChroma[], 6168 double PrefetchBandwidthLuma[], 6169 double PrefetchBandwidthChroma[], 6170 double cursor_bw[], 6171 double cursor_bw_pre[], 6172 unsigned int NumberOfDPP[], 6173 double UrgentBurstFactorLuma[], 6174 double UrgentBurstFactorChroma[], 6175 double UrgentBurstFactorCursor[], 6176 double UrgentBurstFactorLumaPre[], 6177 double UrgentBurstFactorChromaPre[], 6178 double UrgentBurstFactorCursorPre[]) 6179 { 6180 unsigned int k; 6181 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; 6182 6183 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6184 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6185 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6186 } 6187 6188 return CalculateBandwidthAvailableForImmediateFlip_val; 6189 } 6190 6191 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, 6192 double ReturnBW, 6193 enum immediate_flip_requirement ImmediateFlipRequirement[], 6194 double final_flip_bw[], 6195 double ReadBandwidthLuma[], 6196 double ReadBandwidthChroma[], 6197 double PrefetchBandwidthLuma[], 6198 double PrefetchBandwidthChroma[], 6199 double cursor_bw[], 6200 double meta_row_bandwidth[], 6201 double dpte_row_bandwidth[], 6202 double cursor_bw_pre[], 6203 double prefetch_vmrow_bw[], 6204 unsigned int NumberOfDPP[], 6205 double UrgentBurstFactorLuma[], 6206 double UrgentBurstFactorChroma[], 6207 double UrgentBurstFactorCursor[], 6208 double UrgentBurstFactorLumaPre[], 6209 double UrgentBurstFactorChromaPre[], 6210 double UrgentBurstFactorCursorPre[], 6211 6212 /* output */ 6213 double *TotalBandwidth, 6214 double *FractionOfUrgentBandwidth, 6215 bool *ImmediateFlipBandwidthSupport) 6216 { 6217 unsigned int k; 6218 *TotalBandwidth = 0; 6219 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6220 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { 6221 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6222 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6223 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6224 } else { 6225 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6226 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6227 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6228 } 6229 } 6230 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); 6231 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; 6232 } 6233 6234 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces, 6235 double ReturnBW, 6236 double UrgentLatency, 6237 unsigned int SwathHeightY[], 6238 unsigned int SwathHeightC[], 6239 unsigned int SwathWidthY[], 6240 unsigned int SwathWidthC[], 6241 double BytePerPixelInDETY[], 6242 double BytePerPixelInDETC[], 6243 unsigned int DETBufferSizeY[], 6244 unsigned int DETBufferSizeC[], 6245 unsigned int NumOfDPP[], 6246 unsigned int HTotal[], 6247 double PixelClock[], 6248 double VRatioY[], 6249 double VRatioC[], 6250 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]) 6251 { 6252 int k; 6253 double SwathSizeAllSurfaces = 0; 6254 double SwathSizeAllSurfacesInFetchTimeUs; 6255 double DETSwathLatencyHidingUs; 6256 double DETSwathLatencyHidingYUs; 6257 double DETSwathLatencyHidingCUs; 6258 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX]; 6259 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX]; 6260 bool NotEnoughDETSwathFillLatencyHiding = false; 6261 6262 /* calculate sum of single swath size for all pipes in bytes */ 6263 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6264 SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k]; 6265 6266 if (SwathHeightC[k] != 0) 6267 SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k]; 6268 else 6269 SwathSizePerSurfaceC[k] = 0; 6270 6271 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k]; 6272 } 6273 6274 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency; 6275 6276 /* ensure all DET - 1 swath can hide a fetch for all surfaces */ 6277 for (k = 0; k < NumberOfActiveSurfaces; k++) { 6278 double LineTime = HTotal[k] / PixelClock[k]; 6279 6280 /* only care if surface is not phantom */ 6281 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 6282 DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime; 6283 6284 if (SwathHeightC[k] != 0) { 6285 DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime; 6286 6287 DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs); 6288 } else { 6289 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs; 6290 } 6291 6292 /* DET must be able to hide time to fetch 1 swath for each surface */ 6293 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) { 6294 NotEnoughDETSwathFillLatencyHiding = true; 6295 break; 6296 } 6297 } 6298 } 6299 6300 return NotEnoughDETSwathFillLatencyHiding; 6301 } 6302