1 /* 2 * Copyright 2022 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 #include "display_mode_vba_util_32.h" 26 #include "../dml_inline_defs.h" 27 #include "display_mode_vba_32.h" 28 #include "../display_mode_lib.h" 29 30 unsigned int dml32_dscceComputeDelay( 31 unsigned int bpc, 32 double BPP, 33 unsigned int sliceWidth, 34 unsigned int numSlices, 35 enum output_format_class pixelFormat, 36 enum output_encoder_class Output) 37 { 38 // valid bpc = source bits per component in the set of {8, 10, 12} 39 // valid bpp = increments of 1/16 of a bit 40 // min = 6/7/8 in N420/N422/444, respectively 41 // max = such that compression is 1:1 42 //valid sliceWidth = number of pixels per slice line, 43 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 44 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 45 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 46 47 // fixed value 48 unsigned int rcModelSize = 8192; 49 50 // N422/N420 operate at 2 pixels per clock 51 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L, 52 Delay, pixels; 53 54 if (pixelFormat == dm_420) 55 pixelsPerClock = 2; 56 else if (pixelFormat == dm_n422) 57 pixelsPerClock = 2; 58 // #all other modes operate at 1 pixel per clock 59 else 60 pixelsPerClock = 1; 61 62 //initial transmit delay as per PPS 63 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 64 65 //compute ssm delay 66 if (bpc == 8) 67 D = 81; 68 else if (bpc == 10) 69 D = 89; 70 else 71 D = 113; 72 73 //divide by pixel per cycle to compute slice width as seen by DSC 74 w = sliceWidth / pixelsPerClock; 75 76 //422 mode has an additional cycle of delay 77 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 78 s = 0; 79 else 80 s = 1; 81 82 //main calculation for the dscce 83 ix = initalXmitDelay + 45; 84 wx = (w + 2) / 3; 85 p = 3 * wx - w; 86 l0 = ix / w; 87 a = ix + p * l0; 88 ax = (a + 2) / 3 + D + 6 + 1; 89 L = (ax + wx - 1) / wx; 90 if ((ix % w) == 0 && p != 0) 91 lstall = 1; 92 else 93 lstall = 0; 94 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 95 96 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 97 pixels = Delay * 3 * pixelsPerClock; 98 99 #ifdef __DML_VBA_DEBUG__ 100 dml_print("DML::%s: bpc: %d\n", __func__, bpc); 101 dml_print("DML::%s: BPP: %f\n", __func__, BPP); 102 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth); 103 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices); 104 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat); 105 dml_print("DML::%s: Output: %d\n", __func__, Output); 106 dml_print("DML::%s: pixels: %d\n", __func__, pixels); 107 #endif 108 109 return pixels; 110 } 111 112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 113 { 114 unsigned int Delay = 0; 115 116 if (pixelFormat == dm_420) { 117 // sfr 118 Delay = Delay + 2; 119 // dsccif 120 Delay = Delay + 0; 121 // dscc - input deserializer 122 Delay = Delay + 3; 123 // dscc gets pixels every other cycle 124 Delay = Delay + 2; 125 // dscc - input cdc fifo 126 Delay = Delay + 12; 127 // dscc gets pixels every other cycle 128 Delay = Delay + 13; 129 // dscc - cdc uncertainty 130 Delay = Delay + 2; 131 // dscc - output cdc fifo 132 Delay = Delay + 7; 133 // dscc gets pixels every other cycle 134 Delay = Delay + 3; 135 // dscc - cdc uncertainty 136 Delay = Delay + 2; 137 // dscc - output serializer 138 Delay = Delay + 1; 139 // sft 140 Delay = Delay + 1; 141 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) { 142 // sfr 143 Delay = Delay + 2; 144 // dsccif 145 Delay = Delay + 1; 146 // dscc - input deserializer 147 Delay = Delay + 5; 148 // dscc - input cdc fifo 149 Delay = Delay + 25; 150 // dscc - cdc uncertainty 151 Delay = Delay + 2; 152 // dscc - output cdc fifo 153 Delay = Delay + 10; 154 // dscc - cdc uncertainty 155 Delay = Delay + 2; 156 // dscc - output serializer 157 Delay = Delay + 1; 158 // sft 159 Delay = Delay + 1; 160 } else { 161 // sfr 162 Delay = Delay + 2; 163 // dsccif 164 Delay = Delay + 0; 165 // dscc - input deserializer 166 Delay = Delay + 3; 167 // dscc - input cdc fifo 168 Delay = Delay + 12; 169 // dscc - cdc uncertainty 170 Delay = Delay + 2; 171 // dscc - output cdc fifo 172 Delay = Delay + 7; 173 // dscc - output serializer 174 Delay = Delay + 1; 175 // dscc - cdc uncertainty 176 Delay = Delay + 2; 177 // sft 178 Delay = Delay + 1; 179 } 180 181 return Delay; 182 } 183 184 185 bool IsVertical(enum dm_rotation_angle Scan) 186 { 187 bool is_vert = false; 188 189 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m) 190 is_vert = true; 191 else 192 is_vert = false; 193 return is_vert; 194 } 195 196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput( 197 double HRatio, 198 double HRatioChroma, 199 double VRatio, 200 double VRatioChroma, 201 double MaxDCHUBToPSCLThroughput, 202 double MaxPSCLToLBThroughput, 203 double PixelClock, 204 enum source_format_class SourcePixelFormat, 205 unsigned int HTaps, 206 unsigned int HTapsChroma, 207 unsigned int VTaps, 208 unsigned int VTapsChroma, 209 210 /* output */ 211 double *PSCL_THROUGHPUT, 212 double *PSCL_THROUGHPUT_CHROMA, 213 double *DPPCLKUsingSingleDPP) 214 { 215 double DPPCLKUsingSingleDPPLuma; 216 double DPPCLKUsingSingleDPPChroma; 217 218 if (HRatio > 1) { 219 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / 220 dml_ceil((double) HTaps / 6.0, 1.0)); 221 } else { 222 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 223 } 224 225 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio / 226 *PSCL_THROUGHPUT, 1); 227 228 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock) 229 DPPCLKUsingSingleDPPLuma = 2 * PixelClock; 230 231 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 && 232 SourcePixelFormat != dm_rgbe_alpha)) { 233 *PSCL_THROUGHPUT_CHROMA = 0; 234 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma; 235 } else { 236 if (HRatioChroma > 1) { 237 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * 238 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0)); 239 } else { 240 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput); 241 } 242 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma), 243 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1); 244 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock) 245 DPPCLKUsingSingleDPPChroma = 2 * PixelClock; 246 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma); 247 } 248 } 249 250 void dml32_CalculateBytePerPixelAndBlockSizes( 251 enum source_format_class SourcePixelFormat, 252 enum dm_swizzle_mode SurfaceTiling, 253 254 /* Output */ 255 unsigned int *BytePerPixelY, 256 unsigned int *BytePerPixelC, 257 double *BytePerPixelDETY, 258 double *BytePerPixelDETC, 259 unsigned int *BlockHeight256BytesY, 260 unsigned int *BlockHeight256BytesC, 261 unsigned int *BlockWidth256BytesY, 262 unsigned int *BlockWidth256BytesC, 263 unsigned int *MacroTileHeightY, 264 unsigned int *MacroTileHeightC, 265 unsigned int *MacroTileWidthY, 266 unsigned int *MacroTileWidthC) 267 { 268 if (SourcePixelFormat == dm_444_64) { 269 *BytePerPixelDETY = 8; 270 *BytePerPixelDETC = 0; 271 *BytePerPixelY = 8; 272 *BytePerPixelC = 0; 273 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 274 *BytePerPixelDETY = 4; 275 *BytePerPixelDETC = 0; 276 *BytePerPixelY = 4; 277 *BytePerPixelC = 0; 278 } else if (SourcePixelFormat == dm_444_16) { 279 *BytePerPixelDETY = 2; 280 *BytePerPixelDETC = 0; 281 *BytePerPixelY = 2; 282 *BytePerPixelC = 0; 283 } else if (SourcePixelFormat == dm_444_8) { 284 *BytePerPixelDETY = 1; 285 *BytePerPixelDETC = 0; 286 *BytePerPixelY = 1; 287 *BytePerPixelC = 0; 288 } else if (SourcePixelFormat == dm_rgbe_alpha) { 289 *BytePerPixelDETY = 4; 290 *BytePerPixelDETC = 1; 291 *BytePerPixelY = 4; 292 *BytePerPixelC = 1; 293 } else if (SourcePixelFormat == dm_420_8) { 294 *BytePerPixelDETY = 1; 295 *BytePerPixelDETC = 2; 296 *BytePerPixelY = 1; 297 *BytePerPixelC = 2; 298 } else if (SourcePixelFormat == dm_420_12) { 299 *BytePerPixelDETY = 2; 300 *BytePerPixelDETC = 4; 301 *BytePerPixelY = 2; 302 *BytePerPixelC = 4; 303 } else { 304 *BytePerPixelDETY = 4.0 / 3; 305 *BytePerPixelDETC = 8.0 / 3; 306 *BytePerPixelY = 2; 307 *BytePerPixelC = 4; 308 } 309 #ifdef __DML_VBA_DEBUG__ 310 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat); 311 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY); 312 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC); 313 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY); 314 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC); 315 #endif 316 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 317 || SourcePixelFormat == dm_444_16 318 || SourcePixelFormat == dm_444_8 319 || SourcePixelFormat == dm_mono_16 320 || SourcePixelFormat == dm_mono_8 321 || SourcePixelFormat == dm_rgbe)) { 322 if (SurfaceTiling == dm_sw_linear) 323 *BlockHeight256BytesY = 1; 324 else if (SourcePixelFormat == dm_444_64) 325 *BlockHeight256BytesY = 4; 326 else if (SourcePixelFormat == dm_444_8) 327 *BlockHeight256BytesY = 16; 328 else 329 *BlockHeight256BytesY = 8; 330 331 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 332 *BlockHeight256BytesC = 0; 333 *BlockWidth256BytesC = 0; 334 } else { 335 if (SurfaceTiling == dm_sw_linear) { 336 *BlockHeight256BytesY = 1; 337 *BlockHeight256BytesC = 1; 338 } else if (SourcePixelFormat == dm_rgbe_alpha) { 339 *BlockHeight256BytesY = 8; 340 *BlockHeight256BytesC = 16; 341 } else if (SourcePixelFormat == dm_420_8) { 342 *BlockHeight256BytesY = 16; 343 *BlockHeight256BytesC = 8; 344 } else { 345 *BlockHeight256BytesY = 8; 346 *BlockHeight256BytesC = 8; 347 } 348 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 349 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 350 } 351 #ifdef __DML_VBA_DEBUG__ 352 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY); 353 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY); 354 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC); 355 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC); 356 #endif 357 358 if (SurfaceTiling == dm_sw_linear) { 359 *MacroTileHeightY = *BlockHeight256BytesY; 360 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY; 361 *MacroTileHeightC = *BlockHeight256BytesC; 362 if (*MacroTileHeightC == 0) 363 *MacroTileWidthC = 0; 364 else 365 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC; 366 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t || 367 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) { 368 *MacroTileHeightY = 16 * *BlockHeight256BytesY; 369 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY; 370 *MacroTileHeightC = 16 * *BlockHeight256BytesC; 371 if (*MacroTileHeightC == 0) 372 *MacroTileWidthC = 0; 373 else 374 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC; 375 } else { 376 *MacroTileHeightY = 32 * *BlockHeight256BytesY; 377 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY; 378 *MacroTileHeightC = 32 * *BlockHeight256BytesC; 379 if (*MacroTileHeightC == 0) 380 *MacroTileWidthC = 0; 381 else 382 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC; 383 } 384 385 #ifdef __DML_VBA_DEBUG__ 386 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY); 387 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY); 388 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC); 389 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC); 390 #endif 391 } // CalculateBytePerPixelAndBlockSizes 392 393 void dml32_CalculateSwathAndDETConfiguration( 394 struct dml32_CalculateSwathAndDETConfiguration *st_vars, 395 unsigned int DETSizeOverride[], 396 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 397 unsigned int ConfigReturnBufferSizeInKByte, 398 unsigned int MaxTotalDETInKByte, 399 unsigned int MinCompressedBufferSizeInKByte, 400 double ForceSingleDPP, 401 unsigned int NumberOfActiveSurfaces, 402 unsigned int nomDETInKByte, 403 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 404 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment, 405 unsigned int PixelChunkSizeKBytes, 406 unsigned int ROBSizeKBytes, 407 unsigned int CompressedBufferSegmentSizeInkByteFinal, 408 enum output_encoder_class Output[], 409 double ReadBandwidthLuma[], 410 double ReadBandwidthChroma[], 411 double MaximumSwathWidthLuma[], 412 double MaximumSwathWidthChroma[], 413 enum dm_rotation_angle SourceRotation[], 414 bool ViewportStationary[], 415 enum source_format_class SourcePixelFormat[], 416 enum dm_swizzle_mode SurfaceTiling[], 417 unsigned int ViewportWidth[], 418 unsigned int ViewportHeight[], 419 unsigned int ViewportXStart[], 420 unsigned int ViewportYStart[], 421 unsigned int ViewportXStartC[], 422 unsigned int ViewportYStartC[], 423 unsigned int SurfaceWidthY[], 424 unsigned int SurfaceWidthC[], 425 unsigned int SurfaceHeightY[], 426 unsigned int SurfaceHeightC[], 427 unsigned int Read256BytesBlockHeightY[], 428 unsigned int Read256BytesBlockHeightC[], 429 unsigned int Read256BytesBlockWidthY[], 430 unsigned int Read256BytesBlockWidthC[], 431 enum odm_combine_mode ODMMode[], 432 unsigned int BlendingAndTiming[], 433 unsigned int BytePerPixY[], 434 unsigned int BytePerPixC[], 435 double BytePerPixDETY[], 436 double BytePerPixDETC[], 437 unsigned int HActive[], 438 double HRatio[], 439 double HRatioChroma[], 440 unsigned int DPPPerSurface[], 441 442 /* Output */ 443 unsigned int swath_width_luma_ub[], 444 unsigned int swath_width_chroma_ub[], 445 double SwathWidth[], 446 double SwathWidthChroma[], 447 unsigned int SwathHeightY[], 448 unsigned int SwathHeightC[], 449 unsigned int DETBufferSizeInKByte[], 450 unsigned int DETBufferSizeY[], 451 unsigned int DETBufferSizeC[], 452 bool *UnboundedRequestEnabled, 453 unsigned int *CompressedBufferSizeInkByte, 454 unsigned int *CompBufReservedSpaceKBytes, 455 bool *CompBufReservedSpaceNeedAdjustment, 456 bool ViewportSizeSupportPerSurface[], 457 bool *ViewportSizeSupport) 458 { 459 unsigned int k; 460 461 st_vars->TotalActiveDPP = 0; 462 st_vars->NoChromaSurfaces = true; 463 464 #ifdef __DML_VBA_DEBUG__ 465 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 466 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes); 467 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes); 468 #endif 469 dml32_CalculateSwathWidth(ForceSingleDPP, 470 NumberOfActiveSurfaces, 471 SourcePixelFormat, 472 SourceRotation, 473 ViewportStationary, 474 ViewportWidth, 475 ViewportHeight, 476 ViewportXStart, 477 ViewportYStart, 478 ViewportXStartC, 479 ViewportYStartC, 480 SurfaceWidthY, 481 SurfaceWidthC, 482 SurfaceHeightY, 483 SurfaceHeightC, 484 ODMMode, 485 BytePerPixY, 486 BytePerPixC, 487 Read256BytesBlockHeightY, 488 Read256BytesBlockHeightC, 489 Read256BytesBlockWidthY, 490 Read256BytesBlockWidthC, 491 BlendingAndTiming, 492 HActive, 493 HRatio, 494 DPPPerSurface, 495 496 /* Output */ 497 st_vars->SwathWidthdoubleDPP, 498 st_vars->SwathWidthdoubleDPPChroma, 499 SwathWidth, 500 SwathWidthChroma, 501 st_vars->MaximumSwathHeightY, 502 st_vars->MaximumSwathHeightC, 503 swath_width_luma_ub, 504 swath_width_chroma_ub); 505 506 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 507 st_vars->RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * st_vars->MaximumSwathHeightY[k]; 508 st_vars->RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * st_vars->MaximumSwathHeightC[k]; 509 #ifdef __DML_VBA_DEBUG__ 510 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 511 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 512 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]); 513 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, st_vars->MaximumSwathHeightY[k]); 514 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 515 st_vars->RoundedUpMaxSwathSizeBytesY[k]); 516 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 517 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]); 518 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, st_vars->MaximumSwathHeightC[k]); 519 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 520 st_vars->RoundedUpMaxSwathSizeBytesC[k]); 521 #endif 522 523 if (SourcePixelFormat[k] == dm_420_10) { 524 st_vars->RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesY[k], 256); 525 st_vars->RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) st_vars->RoundedUpMaxSwathSizeBytesC[k], 256); 526 } 527 } 528 529 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 530 st_vars->TotalActiveDPP = st_vars->TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]); 531 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 532 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) { 533 st_vars->NoChromaSurfaces = false; 534 } 535 } 536 537 // By default, just set the reserved space to 2 pixel chunks size 538 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2; 539 540 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data 541 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio] 542 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req 543 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (st_vars->RoundedUpMaxSwathSizeBytesY[0]/512); 544 545 if (*CompBufReservedSpaceNeedAdjustment == 1) { 546 *CompBufReservedSpaceKBytes = ROBSizeKBytes - st_vars->RoundedUpMaxSwathSizeBytesY[0]/512; 547 } 548 549 #ifdef __DML_VBA_DEBUG__ 550 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes); 551 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment); 552 #endif 553 554 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, st_vars->TotalActiveDPP, st_vars->NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 555 556 dml32_CalculateDETBufferSize(DETSizeOverride, 557 UseMALLForPStateChange, 558 ForceSingleDPP, 559 NumberOfActiveSurfaces, 560 *UnboundedRequestEnabled, 561 nomDETInKByte, 562 MaxTotalDETInKByte, 563 ConfigReturnBufferSizeInKByte, 564 MinCompressedBufferSizeInKByte, 565 CompressedBufferSegmentSizeInkByteFinal, 566 SourcePixelFormat, 567 ReadBandwidthLuma, 568 ReadBandwidthChroma, 569 st_vars->RoundedUpMaxSwathSizeBytesY, 570 st_vars->RoundedUpMaxSwathSizeBytesC, 571 DPPPerSurface, 572 573 /* Output */ 574 DETBufferSizeInKByte, // per hubp pipe 575 CompressedBufferSizeInkByte); 576 577 #ifdef __DML_VBA_DEBUG__ 578 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, st_vars->TotalActiveDPP); 579 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 580 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 581 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 582 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 583 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 584 #endif 585 586 *ViewportSizeSupport = true; 587 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 588 589 st_vars->DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] == 590 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]); 591 #ifdef __DML_VBA_DEBUG__ 592 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k, 593 st_vars->DETBufferSizeInKByteForSwathCalculation); 594 #endif 595 596 if (st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= 597 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 598 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; 599 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; 600 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; 601 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; 602 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && 603 st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] <= 604 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 605 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; 606 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k]; 607 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; 608 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k]; 609 } else if (st_vars->RoundedUpMaxSwathSizeBytesY[k] < 1.5 * st_vars->RoundedUpMaxSwathSizeBytesC[k] && 610 st_vars->RoundedUpMaxSwathSizeBytesY[k] + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 <= 611 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) { 612 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k]; 613 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; 614 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k]; 615 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; 616 } else { 617 SwathHeightY[k] = st_vars->MaximumSwathHeightY[k] / 2; 618 SwathHeightC[k] = st_vars->MaximumSwathHeightC[k] / 2; 619 st_vars->RoundedUpSwathSizeBytesY = st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2; 620 st_vars->RoundedUpSwathSizeBytesC = st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2; 621 } 622 623 if ((st_vars->RoundedUpMaxSwathSizeBytesY[k] / 2 + st_vars->RoundedUpMaxSwathSizeBytesC[k] / 2 > 624 st_vars->DETBufferSizeInKByteForSwathCalculation * 1024 / 2) 625 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && 626 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 627 *ViewportSizeSupport = false; 628 ViewportSizeSupportPerSurface[k] = false; 629 } else { 630 ViewportSizeSupportPerSurface[k] = true; 631 } 632 633 if (SwathHeightC[k] == 0) { 634 #ifdef __DML_VBA_DEBUG__ 635 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k); 636 #endif 637 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024; 638 DETBufferSizeC[k] = 0; 639 } else if (st_vars->RoundedUpSwathSizeBytesY <= 1.5 * st_vars->RoundedUpSwathSizeBytesC) { 640 #ifdef __DML_VBA_DEBUG__ 641 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k); 642 #endif 643 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2; 644 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2; 645 } else { 646 #ifdef __DML_VBA_DEBUG__ 647 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k); 648 #endif 649 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024); 650 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k]; 651 } 652 653 #ifdef __DML_VBA_DEBUG__ 654 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 655 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]); 656 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, 657 k, st_vars->RoundedUpMaxSwathSizeBytesY[k]); 658 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, 659 k, st_vars->RoundedUpMaxSwathSizeBytesC[k]); 660 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesY); 661 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, st_vars->RoundedUpSwathSizeBytesC); 662 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 663 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 664 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]); 665 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k, 666 ViewportSizeSupportPerSurface[k]); 667 #endif 668 669 } 670 } // CalculateSwathAndDETConfiguration 671 672 void dml32_CalculateSwathWidth( 673 bool ForceSingleDPP, 674 unsigned int NumberOfActiveSurfaces, 675 enum source_format_class SourcePixelFormat[], 676 enum dm_rotation_angle SourceRotation[], 677 bool ViewportStationary[], 678 unsigned int ViewportWidth[], 679 unsigned int ViewportHeight[], 680 unsigned int ViewportXStart[], 681 unsigned int ViewportYStart[], 682 unsigned int ViewportXStartC[], 683 unsigned int ViewportYStartC[], 684 unsigned int SurfaceWidthY[], 685 unsigned int SurfaceWidthC[], 686 unsigned int SurfaceHeightY[], 687 unsigned int SurfaceHeightC[], 688 enum odm_combine_mode ODMMode[], 689 unsigned int BytePerPixY[], 690 unsigned int BytePerPixC[], 691 unsigned int Read256BytesBlockHeightY[], 692 unsigned int Read256BytesBlockHeightC[], 693 unsigned int Read256BytesBlockWidthY[], 694 unsigned int Read256BytesBlockWidthC[], 695 unsigned int BlendingAndTiming[], 696 unsigned int HActive[], 697 double HRatio[], 698 unsigned int DPPPerSurface[], 699 700 /* Output */ 701 double SwathWidthdoubleDPPY[], 702 double SwathWidthdoubleDPPC[], 703 double SwathWidthY[], // per-pipe 704 double SwathWidthC[], // per-pipe 705 unsigned int MaximumSwathHeightY[], 706 unsigned int MaximumSwathHeightC[], 707 unsigned int swath_width_luma_ub[], // per-pipe 708 unsigned int swath_width_chroma_ub[]) // per-pipe 709 { 710 unsigned int k, j; 711 enum odm_combine_mode MainSurfaceODMMode; 712 713 unsigned int surface_width_ub_l; 714 unsigned int surface_height_ub_l; 715 unsigned int surface_width_ub_c; 716 unsigned int surface_height_ub_c; 717 718 #ifdef __DML_VBA_DEBUG__ 719 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 720 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 721 #endif 722 723 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 724 if (!IsVertical(SourceRotation[k])) 725 SwathWidthdoubleDPPY[k] = ViewportWidth[k]; 726 else 727 SwathWidthdoubleDPPY[k] = ViewportHeight[k]; 728 729 #ifdef __DML_VBA_DEBUG__ 730 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 731 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 732 #endif 733 734 MainSurfaceODMMode = ODMMode[k]; 735 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 736 if (BlendingAndTiming[k] == j) 737 MainSurfaceODMMode = ODMMode[j]; 738 } 739 740 if (ForceSingleDPP) { 741 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 742 } else { 743 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) { 744 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 745 dml_round(HActive[k] / 4.0 * HRatio[k])); 746 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) { 747 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k], 748 dml_round(HActive[k] / 2.0 * HRatio[k])); 749 } else if (DPPPerSurface[k] == 2) { 750 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2; 751 } else { 752 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 753 } 754 } 755 756 #ifdef __DML_VBA_DEBUG__ 757 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]); 758 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]); 759 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode); 760 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]); 761 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]); 762 #endif 763 764 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 765 SourcePixelFormat[k] == dm_420_12) { 766 SwathWidthC[k] = SwathWidthY[k] / 2; 767 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2; 768 } else { 769 SwathWidthC[k] = SwathWidthY[k]; 770 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k]; 771 } 772 773 if (ForceSingleDPP == true) { 774 SwathWidthY[k] = SwathWidthdoubleDPPY[k]; 775 SwathWidthC[k] = SwathWidthdoubleDPPC[k]; 776 } 777 778 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 779 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 780 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 781 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 782 783 #ifdef __DML_VBA_DEBUG__ 784 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 785 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l); 786 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c); 787 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c); 788 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]); 789 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]); 790 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]); 791 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]); 792 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]); 793 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]); 794 #endif 795 796 if (!IsVertical(SourceRotation[k])) { 797 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 798 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 799 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 800 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 801 dml_floor(ViewportXStart[k] + 802 SwathWidthY[k] + 803 Read256BytesBlockWidthY[k] - 1, 804 Read256BytesBlockWidthY[k]) - 805 dml_floor(ViewportXStart[k], 806 Read256BytesBlockWidthY[k])); 807 } else { 808 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, 809 dml_ceil(SwathWidthY[k] - 1, 810 Read256BytesBlockWidthY[k]) + 811 Read256BytesBlockWidthY[k]); 812 } 813 if (BytePerPixC[k] > 0) { 814 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 815 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 816 dml_floor(ViewportXStartC[k] + SwathWidthC[k] + 817 Read256BytesBlockWidthC[k] - 1, 818 Read256BytesBlockWidthC[k]) - 819 dml_floor(ViewportXStartC[k], 820 Read256BytesBlockWidthC[k])); 821 } else { 822 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c, 823 dml_ceil(SwathWidthC[k] - 1, 824 Read256BytesBlockWidthC[k]) + 825 Read256BytesBlockWidthC[k]); 826 } 827 } else { 828 swath_width_chroma_ub[k] = 0; 829 } 830 } else { 831 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 832 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 833 834 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] + 836 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, 837 Read256BytesBlockHeightY[k]) - 838 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k])); 839 } else { 840 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1, 841 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 842 } 843 if (BytePerPixC[k] > 0) { 844 if (ViewportStationary[k] && DPPPerSurface[k] == 1) { 845 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 846 dml_floor(ViewportYStartC[k] + SwathWidthC[k] + 847 Read256BytesBlockHeightC[k] - 1, 848 Read256BytesBlockHeightC[k]) - 849 dml_floor(ViewportYStartC[k], 850 Read256BytesBlockHeightC[k])); 851 } else { 852 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c, 853 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + 854 Read256BytesBlockHeightC[k]); 855 } 856 } else { 857 swath_width_chroma_ub[k] = 0; 858 } 859 } 860 861 #ifdef __DML_VBA_DEBUG__ 862 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]); 863 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]); 864 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]); 865 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]); 866 #endif 867 868 } 869 } // CalculateSwathWidth 870 871 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, 872 unsigned int TotalNumberOfActiveDPP, 873 bool NoChroma, 874 enum output_encoder_class Output, 875 enum dm_swizzle_mode SurfaceTiling, 876 bool CompBufReservedSpaceNeedAdjustment, 877 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 878 { 879 bool ret_val = false; 880 881 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && 882 TotalNumberOfActiveDPP == 1 && NoChroma); 883 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 884 ret_val = false; 885 886 if (SurfaceTiling == dm_sw_linear) 887 ret_val = false; 888 889 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment) 890 ret_val = false; 891 892 #ifdef __DML_VBA_DEBUG__ 893 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment); 894 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment); 895 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val); 896 #endif 897 898 return (ret_val); 899 } 900 901 void dml32_CalculateDETBufferSize( 902 unsigned int DETSizeOverride[], 903 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 904 bool ForceSingleDPP, 905 unsigned int NumberOfActiveSurfaces, 906 bool UnboundedRequestEnabled, 907 unsigned int nomDETInKByte, 908 unsigned int MaxTotalDETInKByte, 909 unsigned int ConfigReturnBufferSizeInKByte, 910 unsigned int MinCompressedBufferSizeInKByte, 911 unsigned int CompressedBufferSegmentSizeInkByteFinal, 912 enum source_format_class SourcePixelFormat[], 913 double ReadBandwidthLuma[], 914 double ReadBandwidthChroma[], 915 unsigned int RoundedUpMaxSwathSizeBytesY[], 916 unsigned int RoundedUpMaxSwathSizeBytesC[], 917 unsigned int DPPPerSurface[], 918 /* Output */ 919 unsigned int DETBufferSizeInKByte[], 920 unsigned int *CompressedBufferSizeInkByte) 921 { 922 unsigned int DETBufferSizePoolInKByte; 923 unsigned int NextDETBufferPieceInKByte; 924 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX]; 925 bool NextPotentialSurfaceToAssignDETPieceFound; 926 unsigned int NextSurfaceToAssignDETPiece; 927 double TotalBandwidth; 928 double BandwidthOfSurfacesNotAssignedDETPiece; 929 unsigned int max_minDET; 930 unsigned int minDET; 931 unsigned int minDET_pipe; 932 unsigned int j, k; 933 934 #ifdef __DML_VBA_DEBUG__ 935 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP); 936 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 937 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 938 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 939 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte); 940 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 941 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte); 942 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__, 943 CompressedBufferSegmentSizeInkByteFinal); 944 #endif 945 946 // Note: Will use default det size if that fits 2 swaths 947 if (UnboundedRequestEnabled) { 948 if (DETSizeOverride[0] > 0) { 949 DETBufferSizeInKByte[0] = DETSizeOverride[0]; 950 } else { 951 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 * 952 ((double) RoundedUpMaxSwathSizeBytesY[0] + 953 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0)); 954 } 955 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0]; 956 } else { 957 DETBufferSizePoolInKByte = MaxTotalDETInKByte; 958 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 959 DETBufferSizeInKByte[k] = nomDETInKByte; 960 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || 961 SourcePixelFormat[k] == dm_420_12) { 962 max_minDET = nomDETInKByte - 64; 963 } else { 964 max_minDET = nomDETInKByte; 965 } 966 minDET = 128; 967 minDET_pipe = 0; 968 969 // add DET resource until can hold 2 full swaths 970 while (minDET <= max_minDET && minDET_pipe == 0) { 971 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] + 972 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET) 973 minDET_pipe = minDET; 974 minDET = minDET + 64; 975 } 976 977 #ifdef __DML_VBA_DEBUG__ 978 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET); 979 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET); 980 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe); 981 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k, 982 RoundedUpMaxSwathSizeBytesY[k]); 983 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k, 984 RoundedUpMaxSwathSizeBytesC[k]); 985 #endif 986 987 if (minDET_pipe == 0) { 988 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] + 989 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64)); 990 #ifdef __DML_VBA_DEBUG__ 991 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n", 992 __func__, k, minDET_pipe); 993 #endif 994 } 995 996 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 997 DETBufferSizeInKByte[k] = 0; 998 } else if (DETSizeOverride[k] > 0) { 999 DETBufferSizeInKByte[k] = DETSizeOverride[k]; 1000 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1001 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k]; 1002 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) { 1003 DETBufferSizeInKByte[k] = minDET_pipe; 1004 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - 1005 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe; 1006 } 1007 1008 #ifdef __DML_VBA_DEBUG__ 1009 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 1010 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]); 1011 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1012 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte); 1013 #endif 1014 } 1015 1016 TotalBandwidth = 0; 1017 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1018 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) 1019 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 1020 } 1021 #ifdef __DML_VBA_DEBUG__ 1022 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__); 1023 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) 1024 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]); 1025 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__); 1026 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth); 1027 #endif 1028 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth; 1029 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1030 1031 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) { 1032 DETPieceAssignedToThisSurfaceAlready[k] = true; 1033 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) * 1034 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >= 1035 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) { 1036 DETPieceAssignedToThisSurfaceAlready[k] = true; 1037 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1038 ReadBandwidthLuma[k] - ReadBandwidthChroma[k]; 1039 } else { 1040 DETPieceAssignedToThisSurfaceAlready[k] = false; 1041 } 1042 #ifdef __DML_VBA_DEBUG__ 1043 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k, 1044 DETPieceAssignedToThisSurfaceAlready[k]); 1045 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, 1046 BandwidthOfSurfacesNotAssignedDETPiece); 1047 #endif 1048 } 1049 1050 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 1051 NextPotentialSurfaceToAssignDETPieceFound = false; 1052 NextSurfaceToAssignDETPiece = 0; 1053 1054 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1055 #ifdef __DML_VBA_DEBUG__ 1056 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k, 1057 ReadBandwidthLuma[k]); 1058 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k, 1059 ReadBandwidthChroma[k]); 1060 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, 1061 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, 1063 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1064 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k, 1065 NextSurfaceToAssignDETPiece); 1066 #endif 1067 if (!DETPieceAssignedToThisSurfaceAlready[k] && 1068 (!NextPotentialSurfaceToAssignDETPieceFound || 1069 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < 1070 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1071 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) { 1072 NextSurfaceToAssignDETPiece = k; 1073 NextPotentialSurfaceToAssignDETPieceFound = true; 1074 } 1075 #ifdef __DML_VBA_DEBUG__ 1076 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n", 1077 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]); 1078 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n", 1079 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound); 1080 #endif 1081 } 1082 1083 if (NextPotentialSurfaceToAssignDETPieceFound) { 1084 // Note: To show the banker's rounding behavior in VBA and also the fact 1085 // that the DET buffer size varies due to precision issue 1086 // 1087 //double tmp1 = ((double) DETBufferSizePoolInKByte * 1088 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1089 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1090 // BandwidthOfSurfacesNotAssignedDETPiece / 1091 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1092 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte * 1093 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1094 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1095 //BandwidthOfSurfacesNotAssignedDETPiece / 1096 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1097 // 1098 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1); 1099 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2); 1100 1101 NextDETBufferPieceInKByte = dml_min( 1102 dml_round((double) DETBufferSizePoolInKByte * 1103 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1104 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) / 1105 BandwidthOfSurfacesNotAssignedDETPiece / 1106 ((ForceSingleDPP ? 1 : 1107 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) * 1108 (ForceSingleDPP ? 1 : 1109 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0, 1110 dml_floor((double) DETBufferSizePoolInKByte, 1111 (ForceSingleDPP ? 1 : 1112 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)); 1113 1114 // Above calculation can assign the entire DET buffer allocation to a single pipe. 1115 // We should limit the per-pipe DET size to the nominal / max per pipe. 1116 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1117 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] < 1118 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) { 1119 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) - 1120 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]; 1121 } else { 1122 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1123 // already has the max per-pipe value 1124 NextDETBufferPieceInKByte = 0; 1125 } 1126 } 1127 1128 #ifdef __DML_VBA_DEBUG__ 1129 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j, 1130 DETBufferSizePoolInKByte); 1131 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, 1132 NextSurfaceToAssignDETPiece); 1133 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j, 1134 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]); 1135 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j, 1136 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1137 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", 1138 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece); 1139 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j, 1140 NextDETBufferPieceInKByte); 1141 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ", 1142 __func__, j, NextSurfaceToAssignDETPiece, 1143 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1144 #endif 1145 1146 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] = 1147 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] 1148 + NextDETBufferPieceInKByte 1149 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]); 1150 #ifdef __DML_VBA_DEBUG__ 1151 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]); 1152 #endif 1153 1154 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte; 1155 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true; 1156 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece - 1157 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] + 1158 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]); 1159 } 1160 } 1161 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte; 1162 } 1163 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 1164 1165 #ifdef __DML_VBA_DEBUG__ 1166 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__); 1167 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 1168 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) { 1169 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n", 1170 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 1171 } 1172 #endif 1173 } // CalculateDETBufferSize 1174 1175 void dml32_CalculateODMMode( 1176 unsigned int MaximumPixelsPerLinePerDSCUnit, 1177 unsigned int HActive, 1178 enum output_encoder_class Output, 1179 enum odm_combine_policy ODMUse, 1180 double StateDispclk, 1181 double MaxDispclk, 1182 bool DSCEnable, 1183 unsigned int TotalNumberOfActiveDPP, 1184 unsigned int MaxNumDPP, 1185 double PixelClock, 1186 double DISPCLKDPPCLKDSCCLKDownSpreading, 1187 double DISPCLKRampingMargin, 1188 double DISPCLKDPPCLKVCOSpeed, 1189 1190 /* Output */ 1191 bool *TotalAvailablePipesSupport, 1192 unsigned int *NumberOfDPP, 1193 enum odm_combine_mode *ODMMode, 1194 double *RequiredDISPCLKPerSurface) 1195 { 1196 1197 double SurfaceRequiredDISPCLKWithoutODMCombine; 1198 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1199 double SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1200 1201 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled, 1202 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1203 MaxDispclk); 1204 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1, 1205 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1206 MaxDispclk); 1207 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1, 1208 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed, 1209 MaxDispclk); 1210 *TotalAvailablePipesSupport = true; 1211 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable 1212 1213 if (ODMUse == dm_odm_combine_policy_none) 1214 *ODMMode = dm_odm_combine_mode_disabled; 1215 1216 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine; 1217 *NumberOfDPP = 0; 1218 1219 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care?? 1220 // (ODMUse == "" || ODMUse == "CombineAsNeeded") 1221 1222 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 || 1223 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk || 1224 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) { 1225 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) { 1226 *ODMMode = dm_odm_combine_mode_4to1; 1227 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne; 1228 *NumberOfDPP = 4; 1229 } else { 1230 *TotalAvailablePipesSupport = false; 1231 } 1232 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 || 1233 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk && 1234 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) || 1235 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) { 1236 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) { 1237 *ODMMode = dm_odm_combine_mode_2to1; 1238 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne; 1239 *NumberOfDPP = 2; 1240 } else { 1241 *TotalAvailablePipesSupport = false; 1242 } 1243 } else { 1244 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP) 1245 *NumberOfDPP = 1; 1246 else 1247 *TotalAvailablePipesSupport = false; 1248 } 1249 } 1250 1251 double dml32_CalculateRequiredDispclk( 1252 enum odm_combine_mode ODMMode, 1253 double PixelClock, 1254 double DISPCLKDPPCLKDSCCLKDownSpreading, 1255 double DISPCLKRampingMargin, 1256 double DISPCLKDPPCLKVCOSpeed, 1257 double MaxDispclk) 1258 { 1259 double RequiredDispclk = 0.; 1260 double PixelClockAfterODM; 1261 double DISPCLKWithRampingRoundedToDFSGranularity; 1262 double DISPCLKWithoutRampingRoundedToDFSGranularity; 1263 double MaxDispclkRoundedDownToDFSGranularity; 1264 1265 if (ODMMode == dm_odm_combine_mode_4to1) 1266 PixelClockAfterODM = PixelClock / 4; 1267 else if (ODMMode == dm_odm_combine_mode_2to1) 1268 PixelClockAfterODM = PixelClock / 2; 1269 else 1270 PixelClockAfterODM = PixelClock; 1271 1272 1273 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1274 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100) 1275 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed); 1276 1277 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity( 1278 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed); 1279 1280 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed); 1281 1282 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1283 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity; 1284 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity) 1285 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity; 1286 else 1287 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity; 1288 1289 return RequiredDispclk; 1290 } 1291 1292 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed) 1293 { 1294 if (Clock <= 0.0) 1295 return 0.0; 1296 1297 if (round_up) 1298 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0); 1299 else 1300 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0); 1301 } 1302 1303 void dml32_CalculateOutputLink( 1304 double PHYCLKPerState, 1305 double PHYCLKD18PerState, 1306 double PHYCLKD32PerState, 1307 double Downspreading, 1308 bool IsMainSurfaceUsingTheIndicatedTiming, 1309 enum output_encoder_class Output, 1310 enum output_format_class OutputFormat, 1311 unsigned int HTotal, 1312 unsigned int HActive, 1313 double PixelClockBackEnd, 1314 double ForcedOutputLinkBPP, 1315 unsigned int DSCInputBitPerComponent, 1316 unsigned int NumberOfDSCSlices, 1317 double AudioSampleRate, 1318 unsigned int AudioSampleLayout, 1319 enum odm_combine_mode ODMModeNoDSC, 1320 enum odm_combine_mode ODMModeDSC, 1321 bool DSCEnable, 1322 unsigned int OutputLinkDPLanes, 1323 enum dm_output_link_dp_rate OutputLinkDPRate, 1324 1325 /* Output */ 1326 bool *RequiresDSC, 1327 double *RequiresFEC, 1328 double *OutBpp, 1329 enum dm_output_type *OutputType, 1330 enum dm_output_rate *OutputRate, 1331 unsigned int *RequiredSlots) 1332 { 1333 bool LinkDSCEnable; 1334 unsigned int dummy; 1335 *RequiresDSC = false; 1336 *RequiresFEC = false; 1337 *OutBpp = 0; 1338 *OutputType = dm_output_type_unknown; 1339 *OutputRate = dm_output_rate_unknown; 1340 1341 if (IsMainSurfaceUsingTheIndicatedTiming) { 1342 if (Output == dm_hdmi) { 1343 *RequiresDSC = false; 1344 *RequiresFEC = false; 1345 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive, 1346 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat, 1347 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1348 ODMModeNoDSC, ODMModeDSC, &dummy); 1349 //OutputTypeAndRate = "HDMI"; 1350 *OutputType = dm_output_type_hdmi; 1351 1352 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) { 1353 if (DSCEnable == true) { 1354 *RequiresDSC = true; 1355 LinkDSCEnable = true; 1356 if (Output == dm_dp || Output == dm_dp2p0) 1357 *RequiresFEC = true; 1358 else 1359 *RequiresFEC = false; 1360 } else { 1361 *RequiresDSC = false; 1362 LinkDSCEnable = false; 1363 if (Output == dm_dp2p0) 1364 *RequiresFEC = true; 1365 else 1366 *RequiresFEC = false; 1367 } 1368 if (Output == dm_dp2p0) { 1369 *OutBpp = 0; 1370 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) && 1371 PHYCLKD32PerState >= 10000 / 32) { 1372 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1373 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1374 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1375 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1376 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1377 if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true && 1378 ForcedOutputLinkBPP == 0) { 1379 *RequiresDSC = true; 1380 LinkDSCEnable = true; 1381 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000, 1382 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1383 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1384 OutputFormat, DSCInputBitPerComponent, 1385 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1386 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1387 } 1388 //OutputTypeAndRate = Output & " UHBR10"; 1389 *OutputType = dm_output_type_dp2p0; 1390 *OutputRate = dm_output_rate_dp_rate_uhbr10; 1391 } 1392 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) && 1393 *OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) { 1394 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1395 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1396 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1397 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1398 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1399 1400 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true && 1401 ForcedOutputLinkBPP == 0) { 1402 *RequiresDSC = true; 1403 LinkDSCEnable = true; 1404 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500, 1405 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1406 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1407 OutputFormat, DSCInputBitPerComponent, 1408 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1409 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1410 } 1411 //OutputTypeAndRate = Output & " UHBR13p5"; 1412 *OutputType = dm_output_type_dp2p0; 1413 *OutputRate = dm_output_rate_dp_rate_uhbr13p5; 1414 } 1415 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) && 1416 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) { 1417 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1418 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1419 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1420 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1421 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1422 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1423 *RequiresDSC = true; 1424 LinkDSCEnable = true; 1425 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000, 1426 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1427 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1428 OutputFormat, DSCInputBitPerComponent, 1429 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1430 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1431 } 1432 //OutputTypeAndRate = Output & " UHBR20"; 1433 *OutputType = dm_output_type_dp2p0; 1434 *OutputRate = dm_output_rate_dp_rate_uhbr20; 1435 } 1436 } else { 1437 *OutBpp = 0; 1438 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) && 1439 PHYCLKPerState >= 270) { 1440 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1441 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1442 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1443 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1444 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1445 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true && 1446 ForcedOutputLinkBPP == 0) { 1447 *RequiresDSC = true; 1448 LinkDSCEnable = true; 1449 if (Output == dm_dp) 1450 *RequiresFEC = true; 1451 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700, 1452 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1453 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1454 OutputFormat, DSCInputBitPerComponent, 1455 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1456 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1457 } 1458 //OutputTypeAndRate = Output & " HBR"; 1459 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1460 *OutputRate = dm_output_rate_dp_rate_hbr; 1461 } 1462 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) && 1463 *OutBpp == 0 && PHYCLKPerState >= 540) { 1464 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1465 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1466 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, 1467 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, 1468 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1469 1470 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true && 1471 ForcedOutputLinkBPP == 0) { 1472 *RequiresDSC = true; 1473 LinkDSCEnable = true; 1474 if (Output == dm_dp) 1475 *RequiresFEC = true; 1476 1477 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400, 1478 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1479 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1480 OutputFormat, DSCInputBitPerComponent, 1481 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1482 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1483 } 1484 //OutputTypeAndRate = Output & " HBR2"; 1485 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1486 *OutputRate = dm_output_rate_dp_rate_hbr2; 1487 } 1488 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) { 1489 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1490 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1491 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1492 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, 1493 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, 1494 RequiredSlots); 1495 1496 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) { 1497 *RequiresDSC = true; 1498 LinkDSCEnable = true; 1499 if (Output == dm_dp) 1500 *RequiresFEC = true; 1501 1502 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100, 1503 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, 1504 ForcedOutputLinkBPP, LinkDSCEnable, Output, 1505 OutputFormat, DSCInputBitPerComponent, 1506 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout, 1507 ODMModeNoDSC, ODMModeDSC, RequiredSlots); 1508 } 1509 //OutputTypeAndRate = Output & " HBR3"; 1510 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp; 1511 *OutputRate = dm_output_rate_dp_rate_hbr3; 1512 } 1513 } 1514 } 1515 } 1516 } 1517 1518 void dml32_CalculateDPPCLK( 1519 unsigned int NumberOfActiveSurfaces, 1520 double DISPCLKDPPCLKDSCCLKDownSpreading, 1521 double DISPCLKDPPCLKVCOSpeed, 1522 double DPPCLKUsingSingleDPP[], 1523 unsigned int DPPPerSurface[], 1524 1525 /* output */ 1526 double *GlobalDPPCLK, 1527 double Dppclk[]) 1528 { 1529 unsigned int k; 1530 *GlobalDPPCLK = 0; 1531 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1532 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100); 1533 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]); 1534 } 1535 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed); 1536 for (k = 0; k < NumberOfActiveSurfaces; ++k) 1537 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0); 1538 } 1539 1540 double dml32_TruncToValidBPP( 1541 double LinkBitRate, 1542 unsigned int Lanes, 1543 unsigned int HTotal, 1544 unsigned int HActive, 1545 double PixelClock, 1546 double DesiredBPP, 1547 bool DSCEnable, 1548 enum output_encoder_class Output, 1549 enum output_format_class Format, 1550 unsigned int DSCInputBitPerComponent, 1551 unsigned int DSCSlices, 1552 unsigned int AudioRate, 1553 unsigned int AudioLayout, 1554 enum odm_combine_mode ODMModeNoDSC, 1555 enum odm_combine_mode ODMModeDSC, 1556 /* Output */ 1557 unsigned int *RequiredSlots) 1558 { 1559 double MaxLinkBPP; 1560 unsigned int MinDSCBPP; 1561 double MaxDSCBPP; 1562 unsigned int NonDSCBPP0; 1563 unsigned int NonDSCBPP1; 1564 unsigned int NonDSCBPP2; 1565 unsigned int NonDSCBPP3; 1566 1567 if (Format == dm_420) { 1568 NonDSCBPP0 = 12; 1569 NonDSCBPP1 = 15; 1570 NonDSCBPP2 = 18; 1571 MinDSCBPP = 6; 1572 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 1573 } else if (Format == dm_444) { 1574 NonDSCBPP0 = 18; 1575 NonDSCBPP1 = 24; 1576 NonDSCBPP2 = 30; 1577 NonDSCBPP3 = 36; 1578 MinDSCBPP = 8; 1579 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 1580 } else { 1581 if (Output == dm_hdmi) { 1582 NonDSCBPP0 = 24; 1583 NonDSCBPP1 = 24; 1584 NonDSCBPP2 = 24; 1585 } else { 1586 NonDSCBPP0 = 16; 1587 NonDSCBPP1 = 20; 1588 NonDSCBPP2 = 24; 1589 } 1590 if (Format == dm_n422) { 1591 MinDSCBPP = 7; 1592 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 1593 } else { 1594 MinDSCBPP = 8; 1595 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 1596 } 1597 } 1598 if (Output == dm_dp2p0) { 1599 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540; 1600 } else if (DSCEnable && Output == dm_dp) { 1601 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 1602 } else { 1603 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 1604 } 1605 1606 if (DSCEnable) { 1607 if (ODMModeDSC == dm_odm_combine_mode_4to1) 1608 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1609 else if (ODMModeDSC == dm_odm_combine_mode_2to1) 1610 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1611 else if (ODMModeDSC == dm_odm_split_mode_1to2) 1612 MaxLinkBPP = 2 * MaxLinkBPP; 1613 } else { 1614 if (ODMModeNoDSC == dm_odm_combine_mode_4to1) 1615 MaxLinkBPP = dml_min(MaxLinkBPP, 16); 1616 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1) 1617 MaxLinkBPP = dml_min(MaxLinkBPP, 32); 1618 else if (ODMModeNoDSC == dm_odm_split_mode_1to2) 1619 MaxLinkBPP = 2 * MaxLinkBPP; 1620 } 1621 1622 if (DesiredBPP == 0) { 1623 if (DSCEnable) { 1624 if (MaxLinkBPP < MinDSCBPP) 1625 return BPP_INVALID; 1626 else if (MaxLinkBPP >= MaxDSCBPP) 1627 return MaxDSCBPP; 1628 else 1629 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 1630 } else { 1631 if (MaxLinkBPP >= NonDSCBPP3) 1632 return NonDSCBPP3; 1633 else if (MaxLinkBPP >= NonDSCBPP2) 1634 return NonDSCBPP2; 1635 else if (MaxLinkBPP >= NonDSCBPP1) 1636 return NonDSCBPP1; 1637 else if (MaxLinkBPP >= NonDSCBPP0) 1638 return 16.0; 1639 else 1640 return BPP_INVALID; 1641 } 1642 } else { 1643 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || 1644 DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) || 1645 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) 1646 return BPP_INVALID; 1647 else 1648 return DesiredBPP; 1649 } 1650 1651 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1); 1652 1653 return BPP_INVALID; 1654 } // TruncToValidBPP 1655 1656 double dml32_RequiredDTBCLK( 1657 bool DSCEnable, 1658 double PixelClock, 1659 enum output_format_class OutputFormat, 1660 double OutputBpp, 1661 unsigned int DSCSlices, 1662 unsigned int HTotal, 1663 unsigned int HActive, 1664 unsigned int AudioRate, 1665 unsigned int AudioLayout) 1666 { 1667 double PixelWordRate; 1668 double HCActive; 1669 double HCBlank; 1670 double AverageTribyteRate; 1671 double HActiveTribyteRate; 1672 1673 if (DSCEnable != true) 1674 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0); 1675 1676 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2); 1677 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp * 1678 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1); 1679 HCBlank = 64 + 32 * 1680 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1); 1681 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal; 1682 HActiveTribyteRate = PixelWordRate * HCActive / HActive; 1683 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002; 1684 } 1685 1686 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled, 1687 enum odm_combine_mode ODMMode, 1688 unsigned int DSCInputBitPerComponent, 1689 double OutputBpp, 1690 unsigned int HActive, 1691 unsigned int HTotal, 1692 unsigned int NumberOfDSCSlices, 1693 enum output_format_class OutputFormat, 1694 enum output_encoder_class Output, 1695 double PixelClock, 1696 double PixelClockBackEnd) 1697 { 1698 unsigned int DSCDelayRequirement_val; 1699 1700 if (DSCEnabled == true && OutputBpp != 0) { 1701 if (ODMMode == dm_odm_combine_mode_4to1) { 1702 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1703 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4, 1704 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1705 } else if (ODMMode == dm_odm_combine_mode_2to1) { 1706 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1707 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2, 1708 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output)); 1709 } else { 1710 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, 1711 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices, 1712 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output); 1713 } 1714 1715 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) * 1716 dml_ceil(DSCDelayRequirement_val / HActive, 1); 1717 1718 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd; 1719 1720 } else { 1721 DSCDelayRequirement_val = 0; 1722 } 1723 1724 #ifdef __DML_VBA_DEBUG__ 1725 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled); 1726 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp); 1727 dml_print("DML::%s: HActive = %d\n", __func__, HActive); 1728 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat); 1729 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent); 1730 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices); 1731 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val); 1732 #endif 1733 1734 return DSCDelayRequirement_val; 1735 } 1736 1737 void dml32_CalculateSurfaceSizeInMall( 1738 unsigned int NumberOfActiveSurfaces, 1739 unsigned int MALLAllocatedForDCN, 1740 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1741 bool DCCEnable[], 1742 bool ViewportStationary[], 1743 unsigned int ViewportXStartY[], 1744 unsigned int ViewportYStartY[], 1745 unsigned int ViewportXStartC[], 1746 unsigned int ViewportYStartC[], 1747 unsigned int ViewportWidthY[], 1748 unsigned int ViewportHeightY[], 1749 unsigned int BytesPerPixelY[], 1750 unsigned int ViewportWidthC[], 1751 unsigned int ViewportHeightC[], 1752 unsigned int BytesPerPixelC[], 1753 unsigned int SurfaceWidthY[], 1754 unsigned int SurfaceWidthC[], 1755 unsigned int SurfaceHeightY[], 1756 unsigned int SurfaceHeightC[], 1757 unsigned int Read256BytesBlockWidthY[], 1758 unsigned int Read256BytesBlockWidthC[], 1759 unsigned int Read256BytesBlockHeightY[], 1760 unsigned int Read256BytesBlockHeightC[], 1761 unsigned int ReadBlockWidthY[], 1762 unsigned int ReadBlockWidthC[], 1763 unsigned int ReadBlockHeightY[], 1764 unsigned int ReadBlockHeightC[], 1765 1766 /* Output */ 1767 unsigned int SurfaceSizeInMALL[], 1768 bool *ExceededMALLSize) 1769 { 1770 unsigned int TotalSurfaceSizeInMALL = 0; 1771 unsigned int k; 1772 1773 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1774 if (ViewportStationary[k]) { 1775 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]), 1776 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1, 1777 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k], 1778 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k], 1779 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1780 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) - 1781 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k]; 1782 1783 if (ReadBlockWidthC[k] > 0) { 1784 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1785 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]), 1786 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 1787 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) - 1788 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) * 1789 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]), 1790 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1791 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) - 1792 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) * 1793 BytesPerPixelC[k]; 1794 } 1795 if (DCCEnable[k] == true) { 1796 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1797 dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]), 1798 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 * 1799 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k]) 1800 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k])) 1801 * dml_min(dml_ceil(SurfaceHeightY[k], 8 * 1802 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] + 1803 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 * 1804 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 1805 * Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256; 1806 if (Read256BytesBlockWidthC[k] > 0) { 1807 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1808 dml_min(dml_ceil(SurfaceWidthC[k], 8 * 1809 Read256BytesBlockWidthC[k]), 1810 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8 1811 * Read256BytesBlockWidthC[k] - 1, 8 * 1812 Read256BytesBlockWidthC[k]) - 1813 dml_floor(ViewportXStartC[k], 8 * 1814 Read256BytesBlockWidthC[k])) * 1815 dml_min(dml_ceil(SurfaceHeightC[k], 8 * 1816 Read256BytesBlockHeightC[k]), 1817 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] + 1818 8 * Read256BytesBlockHeightC[k] - 1, 8 * 1819 Read256BytesBlockHeightC[k]) - 1820 dml_floor(ViewportYStartC[k], 8 * 1821 Read256BytesBlockHeightC[k])) * 1822 BytesPerPixelC[k] / 256; 1823 } 1824 } 1825 } else { 1826 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 1827 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) * 1828 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 1829 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * 1830 BytesPerPixelY[k]; 1831 if (ReadBlockWidthC[k] > 0) { 1832 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1833 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 1834 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) * 1835 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 1836 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * 1837 BytesPerPixelC[k]; 1838 } 1839 if (DCCEnable[k] == true) { 1840 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1841 dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 * 1842 Read256BytesBlockWidthY[k] - 1), 8 * 1843 Read256BytesBlockWidthY[k]) * 1844 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 * 1845 Read256BytesBlockHeightY[k] - 1), 8 * 1846 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256; 1847 1848 if (Read256BytesBlockWidthC[k] > 0) { 1849 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] + 1850 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 * 1851 Read256BytesBlockWidthC[k] - 1), 8 * 1852 Read256BytesBlockWidthC[k]) * 1853 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 * 1854 Read256BytesBlockHeightC[k] - 1), 8 * 1855 Read256BytesBlockHeightC[k]) * 1856 BytesPerPixelC[k] / 256; 1857 } 1858 } 1859 } 1860 } 1861 1862 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1863 if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable) 1864 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 1865 } 1866 *ExceededMALLSize = (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true); 1867 } // CalculateSurfaceSizeInMall 1868 1869 void dml32_CalculateVMRowAndSwath( 1870 struct dml32_CalculateVMRowAndSwath *st_vars, 1871 unsigned int NumberOfActiveSurfaces, 1872 DmlPipe myPipe[], 1873 unsigned int SurfaceSizeInMALL[], 1874 unsigned int PTEBufferSizeInRequestsLuma, 1875 unsigned int PTEBufferSizeInRequestsChroma, 1876 unsigned int DCCMetaBufferSizeBytes, 1877 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[], 1878 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 1879 unsigned int MALLAllocatedForDCN, 1880 double SwathWidthY[], 1881 double SwathWidthC[], 1882 bool GPUVMEnable, 1883 bool HostVMEnable, 1884 unsigned int HostVMMaxNonCachedPageTableLevels, 1885 unsigned int GPUVMMaxPageTableLevels, 1886 unsigned int GPUVMMinPageSizeKBytes[], 1887 unsigned int HostVMMinPageSize, 1888 1889 /* Output */ 1890 bool PTEBufferSizeNotExceeded[], 1891 bool DCCMetaBufferSizeNotExceeded[], 1892 unsigned int dpte_row_width_luma_ub[], 1893 unsigned int dpte_row_width_chroma_ub[], 1894 unsigned int dpte_row_height_luma[], 1895 unsigned int dpte_row_height_chroma[], 1896 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA 1897 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA 1898 unsigned int meta_req_width[], 1899 unsigned int meta_req_width_chroma[], 1900 unsigned int meta_req_height[], 1901 unsigned int meta_req_height_chroma[], 1902 unsigned int meta_row_width[], 1903 unsigned int meta_row_width_chroma[], 1904 unsigned int meta_row_height[], 1905 unsigned int meta_row_height_chroma[], 1906 unsigned int vm_group_bytes[], 1907 unsigned int dpte_group_bytes[], 1908 unsigned int PixelPTEReqWidthY[], 1909 unsigned int PixelPTEReqHeightY[], 1910 unsigned int PTERequestSizeY[], 1911 unsigned int PixelPTEReqWidthC[], 1912 unsigned int PixelPTEReqHeightC[], 1913 unsigned int PTERequestSizeC[], 1914 unsigned int dpde0_bytes_per_frame_ub_l[], 1915 unsigned int meta_pte_bytes_per_frame_ub_l[], 1916 unsigned int dpde0_bytes_per_frame_ub_c[], 1917 unsigned int meta_pte_bytes_per_frame_ub_c[], 1918 double PrefetchSourceLinesY[], 1919 double PrefetchSourceLinesC[], 1920 double VInitPreFillY[], 1921 double VInitPreFillC[], 1922 unsigned int MaxNumSwathY[], 1923 unsigned int MaxNumSwathC[], 1924 double meta_row_bw[], 1925 double dpte_row_bw[], 1926 double PixelPTEBytesPerRow[], 1927 double PDEAndMetaPTEBytesFrame[], 1928 double MetaRowByte[], 1929 bool use_one_row_for_frame[], 1930 bool use_one_row_for_frame_flip[], 1931 bool UsesMALLForStaticScreen[], 1932 bool PTE_BUFFER_MODE[], 1933 unsigned int BIGK_FRAGMENT_SIZE[]) 1934 { 1935 unsigned int k; 1936 1937 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 1938 if (HostVMEnable == true) { 1939 vm_group_bytes[k] = 512; 1940 dpte_group_bytes[k] = 512; 1941 } else if (GPUVMEnable == true) { 1942 vm_group_bytes[k] = 2048; 1943 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation)) 1944 dpte_group_bytes[k] = 512; 1945 else 1946 dpte_group_bytes[k] = 2048; 1947 } else { 1948 vm_group_bytes[k] = 0; 1949 dpte_group_bytes[k] = 0; 1950 } 1951 1952 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 || 1953 myPipe[k].SourcePixelFormat == dm_420_12 || 1954 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) { 1955 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) && 1956 !IsVertical(myPipe[k].SourceRotation)) { 1957 st_vars->PTEBufferSizeInRequestsForLuma[k] = 1958 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2; 1959 st_vars->PTEBufferSizeInRequestsForChroma[k] = st_vars->PTEBufferSizeInRequestsForLuma[k]; 1960 } else { 1961 st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma; 1962 st_vars->PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma; 1963 } 1964 1965 st_vars->PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes( 1966 myPipe[k].ViewportStationary, 1967 myPipe[k].DCCEnable, 1968 myPipe[k].DPPPerSurface, 1969 myPipe[k].BlockHeight256BytesC, 1970 myPipe[k].BlockWidth256BytesC, 1971 myPipe[k].SourcePixelFormat, 1972 myPipe[k].SurfaceTiling, 1973 myPipe[k].BytePerPixelC, 1974 myPipe[k].SourceRotation, 1975 SwathWidthC[k], 1976 myPipe[k].ViewportHeightChroma, 1977 myPipe[k].ViewportXStartC, 1978 myPipe[k].ViewportYStartC, 1979 GPUVMEnable, 1980 HostVMEnable, 1981 HostVMMaxNonCachedPageTableLevels, 1982 GPUVMMaxPageTableLevels, 1983 GPUVMMinPageSizeKBytes[k], 1984 HostVMMinPageSize, 1985 st_vars->PTEBufferSizeInRequestsForChroma[k], 1986 myPipe[k].PitchC, 1987 myPipe[k].DCCMetaPitchC, 1988 myPipe[k].BlockWidthC, 1989 myPipe[k].BlockHeightC, 1990 1991 /* Output */ 1992 &st_vars->MetaRowByteC[k], 1993 &st_vars->PixelPTEBytesPerRowC[k], 1994 &dpte_row_width_chroma_ub[k], 1995 &dpte_row_height_chroma[k], 1996 &dpte_row_height_linear_chroma[k], 1997 &st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k], 1998 &st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k], 1999 &st_vars->dpte_row_height_chroma_one_row_per_frame[k], 2000 &meta_req_width_chroma[k], 2001 &meta_req_height_chroma[k], 2002 &meta_row_width_chroma[k], 2003 &meta_row_height_chroma[k], 2004 &PixelPTEReqWidthC[k], 2005 &PixelPTEReqHeightC[k], 2006 &PTERequestSizeC[k], 2007 &dpde0_bytes_per_frame_ub_c[k], 2008 &meta_pte_bytes_per_frame_ub_c[k]); 2009 2010 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines( 2011 myPipe[k].VRatioChroma, 2012 myPipe[k].VTapsChroma, 2013 myPipe[k].InterlaceEnable, 2014 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2015 myPipe[k].SwathHeightC, 2016 myPipe[k].SourceRotation, 2017 myPipe[k].ViewportStationary, 2018 SwathWidthC[k], 2019 myPipe[k].ViewportHeightChroma, 2020 myPipe[k].ViewportXStartC, 2021 myPipe[k].ViewportYStartC, 2022 2023 /* Output */ 2024 &VInitPreFillC[k], 2025 &MaxNumSwathC[k]); 2026 } else { 2027 st_vars->PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma; 2028 st_vars->PTEBufferSizeInRequestsForChroma[k] = 0; 2029 st_vars->PixelPTEBytesPerRowC[k] = 0; 2030 st_vars->PDEAndMetaPTEBytesFrameC = 0; 2031 st_vars->MetaRowByteC[k] = 0; 2032 MaxNumSwathC[k] = 0; 2033 PrefetchSourceLinesC[k] = 0; 2034 st_vars->dpte_row_height_chroma_one_row_per_frame[k] = 0; 2035 st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0; 2036 st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0; 2037 } 2038 2039 st_vars->PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes( 2040 myPipe[k].ViewportStationary, 2041 myPipe[k].DCCEnable, 2042 myPipe[k].DPPPerSurface, 2043 myPipe[k].BlockHeight256BytesY, 2044 myPipe[k].BlockWidth256BytesY, 2045 myPipe[k].SourcePixelFormat, 2046 myPipe[k].SurfaceTiling, 2047 myPipe[k].BytePerPixelY, 2048 myPipe[k].SourceRotation, 2049 SwathWidthY[k], 2050 myPipe[k].ViewportHeight, 2051 myPipe[k].ViewportXStart, 2052 myPipe[k].ViewportYStart, 2053 GPUVMEnable, 2054 HostVMEnable, 2055 HostVMMaxNonCachedPageTableLevels, 2056 GPUVMMaxPageTableLevels, 2057 GPUVMMinPageSizeKBytes[k], 2058 HostVMMinPageSize, 2059 st_vars->PTEBufferSizeInRequestsForLuma[k], 2060 myPipe[k].PitchY, 2061 myPipe[k].DCCMetaPitchY, 2062 myPipe[k].BlockWidthY, 2063 myPipe[k].BlockHeightY, 2064 2065 /* Output */ 2066 &st_vars->MetaRowByteY[k], 2067 &st_vars->PixelPTEBytesPerRowY[k], 2068 &dpte_row_width_luma_ub[k], 2069 &dpte_row_height_luma[k], 2070 &dpte_row_height_linear_luma[k], 2071 &st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k], 2072 &st_vars->dpte_row_width_luma_ub_one_row_per_frame[k], 2073 &st_vars->dpte_row_height_luma_one_row_per_frame[k], 2074 &meta_req_width[k], 2075 &meta_req_height[k], 2076 &meta_row_width[k], 2077 &meta_row_height[k], 2078 &PixelPTEReqWidthY[k], 2079 &PixelPTEReqHeightY[k], 2080 &PTERequestSizeY[k], 2081 &dpde0_bytes_per_frame_ub_l[k], 2082 &meta_pte_bytes_per_frame_ub_l[k]); 2083 2084 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines( 2085 myPipe[k].VRatio, 2086 myPipe[k].VTaps, 2087 myPipe[k].InterlaceEnable, 2088 myPipe[k].ProgressiveToInterlaceUnitInOPP, 2089 myPipe[k].SwathHeightY, 2090 myPipe[k].SourceRotation, 2091 myPipe[k].ViewportStationary, 2092 SwathWidthY[k], 2093 myPipe[k].ViewportHeight, 2094 myPipe[k].ViewportXStart, 2095 myPipe[k].ViewportYStart, 2096 2097 /* Output */ 2098 &VInitPreFillY[k], 2099 &MaxNumSwathY[k]); 2100 2101 PDEAndMetaPTEBytesFrame[k] = st_vars->PDEAndMetaPTEBytesFrameY + st_vars->PDEAndMetaPTEBytesFrameC; 2102 MetaRowByte[k] = st_vars->MetaRowByteY[k] + st_vars->MetaRowByteC[k]; 2103 2104 if (st_vars->PixelPTEBytesPerRowY[k] <= 64 * st_vars->PTEBufferSizeInRequestsForLuma[k] && 2105 st_vars->PixelPTEBytesPerRowC[k] <= 64 * st_vars->PTEBufferSizeInRequestsForChroma[k]) { 2106 PTEBufferSizeNotExceeded[k] = true; 2107 } else { 2108 PTEBufferSizeNotExceeded[k] = false; 2109 } 2110 2111 st_vars->one_row_per_frame_fits_in_buffer[k] = (st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * 2112 st_vars->PTEBufferSizeInRequestsForLuma[k] && 2113 st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * st_vars->PTEBufferSizeInRequestsForChroma[k]); 2114 } 2115 2116 dml32_CalculateMALLUseForStaticScreen( 2117 NumberOfActiveSurfaces, 2118 MALLAllocatedForDCN, 2119 UseMALLForStaticScreen, // mode 2120 SurfaceSizeInMALL, 2121 st_vars->one_row_per_frame_fits_in_buffer, 2122 /* Output */ 2123 UsesMALLForStaticScreen); // boolen 2124 2125 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2126 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2127 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2128 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2129 (GPUVMMinPageSizeKBytes[k] > 64); 2130 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12; 2131 } 2132 2133 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2134 #ifdef __DML_VBA_DEBUG__ 2135 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]); 2136 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2137 #endif 2138 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] || 2139 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) || 2140 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) || 2141 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation)); 2142 2143 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] && 2144 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame); 2145 2146 if (use_one_row_for_frame[k]) { 2147 dpte_row_height_luma[k] = st_vars->dpte_row_height_luma_one_row_per_frame[k]; 2148 dpte_row_width_luma_ub[k] = st_vars->dpte_row_width_luma_ub_one_row_per_frame[k]; 2149 st_vars->PixelPTEBytesPerRowY[k] = st_vars->PixelPTEBytesPerRowY_one_row_per_frame[k]; 2150 dpte_row_height_chroma[k] = st_vars->dpte_row_height_chroma_one_row_per_frame[k]; 2151 dpte_row_width_chroma_ub[k] = st_vars->dpte_row_width_chroma_ub_one_row_per_frame[k]; 2152 st_vars->PixelPTEBytesPerRowC[k] = st_vars->PixelPTEBytesPerRowC_one_row_per_frame[k]; 2153 PTEBufferSizeNotExceeded[k] = st_vars->one_row_per_frame_fits_in_buffer[k]; 2154 } 2155 2156 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes) 2157 DCCMetaBufferSizeNotExceeded[k] = true; 2158 else 2159 DCCMetaBufferSizeNotExceeded[k] = false; 2160 2161 PixelPTEBytesPerRow[k] = st_vars->PixelPTEBytesPerRowY[k] + st_vars->PixelPTEBytesPerRowC[k]; 2162 if (use_one_row_for_frame[k]) 2163 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2; 2164 2165 dml32_CalculateRowBandwidth( 2166 GPUVMEnable, 2167 myPipe[k].SourcePixelFormat, 2168 myPipe[k].VRatio, 2169 myPipe[k].VRatioChroma, 2170 myPipe[k].DCCEnable, 2171 myPipe[k].HTotal / myPipe[k].PixelClock, 2172 st_vars->MetaRowByteY[k], st_vars->MetaRowByteC[k], 2173 meta_row_height[k], 2174 meta_row_height_chroma[k], 2175 st_vars->PixelPTEBytesPerRowY[k], 2176 st_vars->PixelPTEBytesPerRowC[k], 2177 dpte_row_height_luma[k], 2178 dpte_row_height_chroma[k], 2179 2180 /* Output */ 2181 &meta_row_bw[k], 2182 &dpte_row_bw[k]); 2183 #ifdef __DML_VBA_DEBUG__ 2184 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]); 2185 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n", 2186 __func__, k, use_one_row_for_frame_flip[k]); 2187 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n", 2188 __func__, k, UseMALLForPStateChange[k]); 2189 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]); 2190 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n", 2191 __func__, k, dpte_row_width_luma_ub[k]); 2192 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowY[k]); 2193 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n", 2194 __func__, k, dpte_row_height_chroma[k]); 2195 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n", 2196 __func__, k, dpte_row_width_chroma_ub[k]); 2197 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, st_vars->PixelPTEBytesPerRowC[k]); 2198 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]); 2199 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n", 2200 __func__, k, PTEBufferSizeNotExceeded[k]); 2201 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]); 2202 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]); 2203 #endif 2204 } 2205 } // CalculateVMRowAndSwath 2206 2207 unsigned int dml32_CalculateVMAndRowBytes( 2208 bool ViewportStationary, 2209 bool DCCEnable, 2210 unsigned int NumberOfDPPs, 2211 unsigned int BlockHeight256Bytes, 2212 unsigned int BlockWidth256Bytes, 2213 enum source_format_class SourcePixelFormat, 2214 unsigned int SurfaceTiling, 2215 unsigned int BytePerPixel, 2216 enum dm_rotation_angle SourceRotation, 2217 double SwathWidth, 2218 unsigned int ViewportHeight, 2219 unsigned int ViewportXStart, 2220 unsigned int ViewportYStart, 2221 bool GPUVMEnable, 2222 bool HostVMEnable, 2223 unsigned int HostVMMaxNonCachedPageTableLevels, 2224 unsigned int GPUVMMaxPageTableLevels, 2225 unsigned int GPUVMMinPageSizeKBytes, 2226 unsigned int HostVMMinPageSize, 2227 unsigned int PTEBufferSizeInRequests, 2228 unsigned int Pitch, 2229 unsigned int DCCMetaPitch, 2230 unsigned int MacroTileWidth, 2231 unsigned int MacroTileHeight, 2232 2233 /* Output */ 2234 unsigned int *MetaRowByte, 2235 unsigned int *PixelPTEBytesPerRow, 2236 unsigned int *dpte_row_width_ub, 2237 unsigned int *dpte_row_height, 2238 unsigned int *dpte_row_height_linear, 2239 unsigned int *PixelPTEBytesPerRow_one_row_per_frame, 2240 unsigned int *dpte_row_width_ub_one_row_per_frame, 2241 unsigned int *dpte_row_height_one_row_per_frame, 2242 unsigned int *MetaRequestWidth, 2243 unsigned int *MetaRequestHeight, 2244 unsigned int *meta_row_width, 2245 unsigned int *meta_row_height, 2246 unsigned int *PixelPTEReqWidth, 2247 unsigned int *PixelPTEReqHeight, 2248 unsigned int *PTERequestSize, 2249 unsigned int *DPDE0BytesFrame, 2250 unsigned int *MetaPTEBytesFrame) 2251 { 2252 unsigned int MPDEBytesFrame; 2253 unsigned int DCCMetaSurfaceBytes; 2254 unsigned int ExtraDPDEBytesFrame; 2255 unsigned int PDEAndMetaPTEBytesFrame; 2256 unsigned int HostVMDynamicLevels = 0; 2257 unsigned int MacroTileSizeBytes; 2258 unsigned int vp_height_meta_ub; 2259 unsigned int vp_height_dpte_ub; 2260 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this 2261 2262 if (GPUVMEnable == true && HostVMEnable == true) { 2263 if (HostVMMinPageSize < 2048) 2264 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 2265 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 2266 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 2267 else 2268 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 2269 } 2270 2271 *MetaRequestHeight = 8 * BlockHeight256Bytes; 2272 *MetaRequestWidth = 8 * BlockWidth256Bytes; 2273 if (SurfaceTiling == dm_sw_linear) { 2274 *meta_row_height = 32; 2275 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth) 2276 - dml_floor(ViewportXStart, *MetaRequestWidth); 2277 } else if (!IsVertical(SourceRotation)) { 2278 *meta_row_height = *MetaRequestHeight; 2279 if (ViewportStationary && NumberOfDPPs == 1) { 2280 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, 2281 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth); 2282 } else { 2283 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 2284 } 2285 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 2286 } else { 2287 *meta_row_height = *MetaRequestWidth; 2288 if (ViewportStationary && NumberOfDPPs == 1) { 2289 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1, 2290 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight); 2291 } else { 2292 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 2293 } 2294 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 2295 } 2296 2297 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2298 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1, 2299 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes); 2300 } else if (!IsVertical(SourceRotation)) { 2301 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2302 } else { 2303 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes; 2304 } 2305 2306 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0; 2307 2308 if (GPUVMEnable == true) { 2309 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / 2310 (8 * 4.0 * 1024), 1) + 1) * 64; 2311 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1); 2312 } else { 2313 *MetaPTEBytesFrame = 0; 2314 MPDEBytesFrame = 0; 2315 } 2316 2317 if (DCCEnable != true) { 2318 *MetaPTEBytesFrame = 0; 2319 MPDEBytesFrame = 0; 2320 *MetaRowByte = 0; 2321 } 2322 2323 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight; 2324 2325 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) { 2326 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) { 2327 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight + 2328 MacroTileHeight - 1, MacroTileHeight) - 2329 dml_floor(ViewportYStart, MacroTileHeight); 2330 } else if (!IsVertical(SourceRotation)) { 2331 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight; 2332 } else { 2333 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight; 2334 } 2335 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) / 2336 (8 * 2097152), 1) + 1); 2337 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2); 2338 } else { 2339 *DPDE0BytesFrame = 0; 2340 ExtraDPDEBytesFrame = 0; 2341 vp_height_dpte_ub = 0; 2342 } 2343 2344 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2345 2346 #ifdef __DML_VBA_DEBUG__ 2347 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 2348 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 2349 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear); 2350 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel); 2351 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels); 2352 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes); 2353 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes); 2354 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight); 2355 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth); 2356 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2357 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2358 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2359 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2360 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2361 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight); 2362 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth); 2363 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub); 2364 #endif 2365 2366 if (HostVMEnable == true) 2367 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2368 2369 if (SurfaceTiling == dm_sw_linear) { 2370 *PixelPTEReqHeight = 1; 2371 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2372 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel; 2373 *PTERequestSize = 64; 2374 } else if (GPUVMMinPageSizeKBytes == 4) { 2375 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2376 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2377 *PTERequestSize = 128; 2378 } else { 2379 *PixelPTEReqHeight = MacroTileHeight; 2380 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel); 2381 *PTERequestSize = 64; 2382 } 2383 #ifdef __DML_VBA_DEBUG__ 2384 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2385 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame); 2386 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight); 2387 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth); 2388 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear); 2389 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize); 2390 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch); 2391 #endif 2392 2393 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub; 2394 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame / 2395 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) * 2396 (double) *PixelPTEReqWidth; 2397 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth * 2398 *PTERequestSize; 2399 2400 if (SurfaceTiling == dm_sw_linear) { 2401 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2402 *PixelPTEReqWidth / Pitch), 1)); 2403 #ifdef __DML_VBA_DEBUG__ 2404 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__, 2405 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch); 2406 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__, 2407 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch)); 2408 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__, 2409 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2410 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__, 2411 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2412 *PixelPTEReqWidth / Pitch), 1)); 2413 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2414 #endif 2415 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1), 2416 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth; 2417 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize; 2418 2419 // VBA_DELTA, VBA doesn't have programming value for pte row height linear. 2420 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * 2421 PixelPTEReqWidth_linear / Pitch), 1); 2422 if (*dpte_row_height_linear > 128) 2423 *dpte_row_height_linear = 128; 2424 2425 } else if (!IsVertical(SourceRotation)) { 2426 *dpte_row_height = *PixelPTEReqHeight; 2427 2428 if (GPUVMMinPageSizeKBytes > 64) { 2429 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) / 2430 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2431 } else if (ViewportStationary && (NumberOfDPPs == 1)) { 2432 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth + 2433 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) - 2434 dml_floor(ViewportXStart, *PixelPTEReqWidth); 2435 } else { 2436 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * 2437 *PixelPTEReqWidth; 2438 } 2439 2440 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2441 } else { 2442 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth); 2443 2444 if (ViewportStationary && (NumberOfDPPs == 1)) { 2445 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1, 2446 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight); 2447 } else { 2448 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) 2449 * *PixelPTEReqHeight; 2450 } 2451 2452 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2453 } 2454 2455 if (GPUVMEnable != true) 2456 *PixelPTEBytesPerRow = 0; 2457 if (HostVMEnable == true) 2458 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2459 2460 #ifdef __DML_VBA_DEBUG__ 2461 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes); 2462 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height); 2463 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear); 2464 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub); 2465 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow); 2466 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests); 2467 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame); 2468 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n", 2469 __func__, *dpte_row_width_ub_one_row_per_frame); 2470 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n", 2471 __func__, *PixelPTEBytesPerRow_one_row_per_frame); 2472 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", 2473 *MetaPTEBytesFrame); 2474 #endif 2475 2476 return PDEAndMetaPTEBytesFrame; 2477 } // CalculateVMAndRowBytes 2478 2479 double dml32_CalculatePrefetchSourceLines( 2480 double VRatio, 2481 unsigned int VTaps, 2482 bool Interlace, 2483 bool ProgressiveToInterlaceUnitInOPP, 2484 unsigned int SwathHeight, 2485 enum dm_rotation_angle SourceRotation, 2486 bool ViewportStationary, 2487 double SwathWidth, 2488 unsigned int ViewportHeight, 2489 unsigned int ViewportXStart, 2490 unsigned int ViewportYStart, 2491 2492 /* Output */ 2493 double *VInitPreFill, 2494 unsigned int *MaxNumSwath) 2495 { 2496 2497 unsigned int vp_start_rot; 2498 unsigned int sw0_tmp; 2499 unsigned int MaxPartialSwath; 2500 double numLines; 2501 2502 #ifdef __DML_VBA_DEBUG__ 2503 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 2504 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps); 2505 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart); 2506 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart); 2507 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary); 2508 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 2509 #endif 2510 if (ProgressiveToInterlaceUnitInOPP) 2511 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1); 2512 else 2513 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 2514 2515 if (ViewportStationary) { 2516 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) { 2517 vp_start_rot = SwathHeight - 2518 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1); 2519 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) { 2520 vp_start_rot = ViewportXStart; 2521 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) { 2522 vp_start_rot = SwathHeight - 2523 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1); 2524 } else { 2525 vp_start_rot = ViewportYStart; 2526 } 2527 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight); 2528 if (sw0_tmp < *VInitPreFill) 2529 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1; 2530 else 2531 *MaxNumSwath = 1; 2532 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight); 2533 } else { 2534 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1; 2535 if (*VInitPreFill > 1) 2536 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight); 2537 else 2538 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight); 2539 } 2540 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath; 2541 2542 #ifdef __DML_VBA_DEBUG__ 2543 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot); 2544 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill); 2545 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 2546 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 2547 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines); 2548 #endif 2549 return numLines; 2550 2551 } // CalculatePrefetchSourceLines 2552 2553 void dml32_CalculateMALLUseForStaticScreen( 2554 unsigned int NumberOfActiveSurfaces, 2555 unsigned int MALLAllocatedForDCNFinal, 2556 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen, 2557 unsigned int SurfaceSizeInMALL[], 2558 bool one_row_per_frame_fits_in_buffer[], 2559 2560 /* output */ 2561 bool UsesMALLForStaticScreen[]) 2562 { 2563 unsigned int k; 2564 unsigned int SurfaceToAddToMALL; 2565 bool CanAddAnotherSurfaceToMALL; 2566 unsigned int TotalSurfaceSizeInMALL; 2567 2568 TotalSurfaceSizeInMALL = 0; 2569 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2570 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable); 2571 if (UsesMALLForStaticScreen[k]) 2572 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k]; 2573 #ifdef __DML_VBA_DEBUG__ 2574 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]); 2575 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL); 2576 #endif 2577 } 2578 2579 SurfaceToAddToMALL = 0; 2580 CanAddAnotherSurfaceToMALL = true; 2581 while (CanAddAnotherSurfaceToMALL) { 2582 CanAddAnotherSurfaceToMALL = false; 2583 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2584 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 && 2585 !UsesMALLForStaticScreen[k] && 2586 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable && 2587 one_row_per_frame_fits_in_buffer[k] && 2588 (!CanAddAnotherSurfaceToMALL || 2589 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) { 2590 CanAddAnotherSurfaceToMALL = true; 2591 SurfaceToAddToMALL = k; 2592 #ifdef __DML_VBA_DEBUG__ 2593 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n", 2594 __func__, k, UseMALLForStaticScreen[k]); 2595 #endif 2596 } 2597 } 2598 if (CanAddAnotherSurfaceToMALL) { 2599 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true; 2600 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL]; 2601 2602 #ifdef __DML_VBA_DEBUG__ 2603 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL); 2604 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL); 2605 #endif 2606 2607 } 2608 } 2609 } 2610 2611 void dml32_CalculateRowBandwidth( 2612 bool GPUVMEnable, 2613 enum source_format_class SourcePixelFormat, 2614 double VRatio, 2615 double VRatioChroma, 2616 bool DCCEnable, 2617 double LineTime, 2618 unsigned int MetaRowByteLuma, 2619 unsigned int MetaRowByteChroma, 2620 unsigned int meta_row_height_luma, 2621 unsigned int meta_row_height_chroma, 2622 unsigned int PixelPTEBytesPerRowLuma, 2623 unsigned int PixelPTEBytesPerRowChroma, 2624 unsigned int dpte_row_height_luma, 2625 unsigned int dpte_row_height_chroma, 2626 /* Output */ 2627 double *meta_row_bw, 2628 double *dpte_row_bw) 2629 { 2630 if (DCCEnable != true) { 2631 *meta_row_bw = 0; 2632 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2633 SourcePixelFormat == dm_rgbe_alpha) { 2634 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * 2635 MetaRowByteChroma / (meta_row_height_chroma * LineTime); 2636 } else { 2637 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 2638 } 2639 2640 if (GPUVMEnable != true) { 2641 *dpte_row_bw = 0; 2642 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || 2643 SourcePixelFormat == dm_rgbe_alpha) { 2644 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + 2645 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 2646 } else { 2647 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 2648 } 2649 } 2650 2651 double dml32_CalculateUrgentLatency( 2652 double UrgentLatencyPixelDataOnly, 2653 double UrgentLatencyPixelMixedWithVMData, 2654 double UrgentLatencyVMDataOnly, 2655 bool DoUrgentLatencyAdjustment, 2656 double UrgentLatencyAdjustmentFabricClockComponent, 2657 double UrgentLatencyAdjustmentFabricClockReference, 2658 double FabricClock) 2659 { 2660 double ret; 2661 2662 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 2663 if (DoUrgentLatencyAdjustment == true) { 2664 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * 2665 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 2666 } 2667 return ret; 2668 } 2669 2670 void dml32_CalculateUrgentBurstFactor( 2671 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 2672 unsigned int swath_width_luma_ub, 2673 unsigned int swath_width_chroma_ub, 2674 unsigned int SwathHeightY, 2675 unsigned int SwathHeightC, 2676 double LineTime, 2677 double UrgentLatency, 2678 double CursorBufferSize, 2679 unsigned int CursorWidth, 2680 unsigned int CursorBPP, 2681 double VRatio, 2682 double VRatioC, 2683 double BytePerPixelInDETY, 2684 double BytePerPixelInDETC, 2685 unsigned int DETBufferSizeY, 2686 unsigned int DETBufferSizeC, 2687 /* Output */ 2688 double *UrgentBurstFactorCursor, 2689 double *UrgentBurstFactorLuma, 2690 double *UrgentBurstFactorChroma, 2691 bool *NotEnoughUrgentLatencyHiding) 2692 { 2693 double LinesInDETLuma; 2694 double LinesInDETChroma; 2695 unsigned int LinesInCursorBuffer; 2696 double CursorBufferSizeInTime; 2697 double DETBufferSizeInTimeLuma; 2698 double DETBufferSizeInTimeChroma; 2699 2700 *NotEnoughUrgentLatencyHiding = 0; 2701 2702 if (CursorWidth > 0) { 2703 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / 2704 (CursorWidth * CursorBPP / 8.0)), 1.0); 2705 if (VRatio > 0) { 2706 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 2707 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 2708 *NotEnoughUrgentLatencyHiding = 1; 2709 *UrgentBurstFactorCursor = 0; 2710 } else { 2711 *UrgentBurstFactorCursor = CursorBufferSizeInTime / 2712 (CursorBufferSizeInTime - UrgentLatency); 2713 } 2714 } else { 2715 *UrgentBurstFactorCursor = 1; 2716 } 2717 } 2718 2719 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 : 2720 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub; 2721 2722 if (VRatio > 0) { 2723 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 2724 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 2725 *NotEnoughUrgentLatencyHiding = 1; 2726 *UrgentBurstFactorLuma = 0; 2727 } else { 2728 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 2729 } 2730 } else { 2731 *UrgentBurstFactorLuma = 1; 2732 } 2733 2734 if (BytePerPixelInDETC > 0) { 2735 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 2736 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC 2737 / swath_width_chroma_ub; 2738 2739 if (VRatio > 0) { 2740 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 2741 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 2742 *NotEnoughUrgentLatencyHiding = 1; 2743 *UrgentBurstFactorChroma = 0; 2744 } else { 2745 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma 2746 / (DETBufferSizeInTimeChroma - UrgentLatency); 2747 } 2748 } else { 2749 *UrgentBurstFactorChroma = 1; 2750 } 2751 } 2752 } // CalculateUrgentBurstFactor 2753 2754 void dml32_CalculateDCFCLKDeepSleep( 2755 unsigned int NumberOfActiveSurfaces, 2756 unsigned int BytePerPixelY[], 2757 unsigned int BytePerPixelC[], 2758 double VRatio[], 2759 double VRatioChroma[], 2760 double SwathWidthY[], 2761 double SwathWidthC[], 2762 unsigned int DPPPerSurface[], 2763 double HRatio[], 2764 double HRatioChroma[], 2765 double PixelClock[], 2766 double PSCL_THROUGHPUT[], 2767 double PSCL_THROUGHPUT_CHROMA[], 2768 double Dppclk[], 2769 double ReadBandwidthLuma[], 2770 double ReadBandwidthChroma[], 2771 unsigned int ReturnBusWidth, 2772 2773 /* Output */ 2774 double *DCFClkDeepSleep) 2775 { 2776 unsigned int k; 2777 double DisplayPipeLineDeliveryTimeLuma; 2778 double DisplayPipeLineDeliveryTimeChroma; 2779 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX]; 2780 double ReadBandwidth = 0.0; 2781 2782 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2783 2784 if (VRatio[k] <= 1) { 2785 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k] 2786 / PixelClock[k]; 2787 } else { 2788 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 2789 } 2790 if (BytePerPixelC[k] == 0) { 2791 DisplayPipeLineDeliveryTimeChroma = 0; 2792 } else { 2793 if (VRatioChroma[k] <= 1) { 2794 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * 2795 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 2796 } else { 2797 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] 2798 / Dppclk[k]; 2799 } 2800 } 2801 2802 if (BytePerPixelC[k] > 0) { 2803 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * 2804 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 2805 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 2806 32.0 / DisplayPipeLineDeliveryTimeChroma); 2807 } else { 2808 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 2809 64.0 / DisplayPipeLineDeliveryTimeLuma; 2810 } 2811 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16); 2812 2813 #ifdef __DML_VBA_DEBUG__ 2814 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]); 2815 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]); 2816 #endif 2817 } 2818 2819 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2820 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 2821 2822 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth); 2823 2824 #ifdef __DML_VBA_DEBUG__ 2825 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__); 2826 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth); 2827 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth); 2828 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep); 2829 #endif 2830 2831 for (k = 0; k < NumberOfActiveSurfaces; ++k) 2832 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]); 2833 #ifdef __DML_VBA_DEBUG__ 2834 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep); 2835 #endif 2836 } // CalculateDCFCLKDeepSleep 2837 2838 double dml32_CalculateWriteBackDelay( 2839 enum source_format_class WritebackPixelFormat, 2840 double WritebackHRatio, 2841 double WritebackVRatio, 2842 unsigned int WritebackVTaps, 2843 unsigned int WritebackDestinationWidth, 2844 unsigned int WritebackDestinationHeight, 2845 unsigned int WritebackSourceHeight, 2846 unsigned int HTotal) 2847 { 2848 double CalculateWriteBackDelay; 2849 double Line_length; 2850 double Output_lines_last_notclamped; 2851 double WritebackVInit; 2852 2853 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 2854 Line_length = dml_max((double) WritebackDestinationWidth, 2855 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps); 2856 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - 2857 dml_ceil(((double)WritebackSourceHeight - 2858 (double) WritebackVInit) / (double)WritebackVRatio, 1.0); 2859 if (Output_lines_last_notclamped < 0) { 2860 CalculateWriteBackDelay = 0; 2861 } else { 2862 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + 2863 (HTotal - WritebackDestinationWidth) + 80; 2864 } 2865 return CalculateWriteBackDelay; 2866 } 2867 2868 void dml32_UseMinimumDCFCLK( 2869 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 2870 bool DRRDisplay[], 2871 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 2872 unsigned int MaxInterDCNTileRepeaters, 2873 unsigned int MaxPrefetchMode, 2874 double DRAMClockChangeLatencyFinal, 2875 double FCLKChangeLatency, 2876 double SREnterPlusExitTime, 2877 unsigned int ReturnBusWidth, 2878 unsigned int RoundTripPingLatencyCycles, 2879 unsigned int ReorderingBytes, 2880 unsigned int PixelChunkSizeInKByte, 2881 unsigned int MetaChunkSize, 2882 bool GPUVMEnable, 2883 unsigned int GPUVMMaxPageTableLevels, 2884 bool HostVMEnable, 2885 unsigned int NumberOfActiveSurfaces, 2886 double HostVMMinPageSize, 2887 unsigned int HostVMMaxNonCachedPageTableLevels, 2888 bool DynamicMetadataVMEnabled, 2889 bool ImmediateFlipRequirement, 2890 bool ProgressiveToInterlaceUnitInOPP, 2891 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation, 2892 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency, 2893 unsigned int VTotal[], 2894 unsigned int VActive[], 2895 unsigned int DynamicMetadataTransmittedBytes[], 2896 unsigned int DynamicMetadataLinesBeforeActiveRequired[], 2897 bool Interlace[], 2898 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX], 2899 double RequiredDISPCLK[][2], 2900 double UrgLatency[], 2901 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 2902 double ProjectedDCFClkDeepSleep[][2], 2903 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 2904 unsigned int TotalNumberOfActiveDPP[][2], 2905 unsigned int TotalNumberOfDCCActiveDPP[][2], 2906 unsigned int dpte_group_bytes[], 2907 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 2908 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 2909 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 2910 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 2911 unsigned int BytePerPixelY[], 2912 unsigned int BytePerPixelC[], 2913 unsigned int HTotal[], 2914 double PixelClock[], 2915 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 2916 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 2917 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 2918 bool DynamicMetadataEnable[], 2919 double ReadBandwidthLuma[], 2920 double ReadBandwidthChroma[], 2921 double DCFCLKPerState[], 2922 /* Output */ 2923 double DCFCLKState[][2]) 2924 { 2925 unsigned int i, j, k; 2926 unsigned int dummy1; 2927 double dummy2, dummy3; 2928 double NormalEfficiency; 2929 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 2930 2931 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0; 2932 for (i = 0; i < DC__VOLTAGE_STATES; ++i) { 2933 for (j = 0; j <= 1; ++j) { 2934 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 2935 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 2936 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX]; 2937 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 2938 double MinimumTWait = 0.0; 2939 double DPTEBandwidth; 2940 double DCFCLKRequiredForAverageBandwidth; 2941 unsigned int ExtraLatencyBytes; 2942 double ExtraLatencyCycles; 2943 double DCFCLKRequiredForPeakBandwidth; 2944 unsigned int NoOfDPPState[DC__NUM_DPP__MAX]; 2945 double MinimumTvmPlus2Tr0; 2946 2947 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 2948 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2949 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 2950 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] 2951 / (15.75 * HTotal[k] / PixelClock[k]); 2952 } 2953 2954 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) 2955 NoOfDPPState[k] = NoOfDPP[i][j][k]; 2956 2957 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]; 2958 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth); 2959 2960 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes, 2961 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte, 2962 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable, 2963 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize, 2964 HostVMMaxNonCachedPageTableLevels); 2965 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ 2966 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 2967 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 2968 double DCFCLKCyclesRequiredInPrefetch; 2969 double PrefetchTime; 2970 2971 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] 2972 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 2973 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] 2974 * BytePerPixelC[k]) / NormalEfficiency 2975 / ReturnBusWidth; 2976 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 2977 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency 2978 / NormalEfficiency / ReturnBusWidth 2979 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 2980 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency 2981 / ReturnBusWidth 2982 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth 2983 + PixelDCFCLKCyclesRequiredInPrefetch[k]; 2984 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) 2985 * HTotal[k] / PixelClock[k]; 2986 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true && 2987 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 2988 UrgLatency[i] * GPUVMMaxPageTableLevels * 2989 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 2990 2991 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode, 2992 UseMALLForPStateChange[k], 2993 SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 2994 DRRDisplay[k], 2995 DRAMClockChangeLatencyFinal, 2996 FCLKChangeLatency, 2997 UrgLatency[i], 2998 SREnterPlusExitTime); 2999 3000 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - 3001 MinimumTWait - UrgLatency[i] * 3002 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : 3003 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ? 3004 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - 3005 DynamicMetadataVMExtraLatency[k]; 3006 3007 if (PrefetchTime > 0) { 3008 double ExpectedVRatioPrefetch; 3009 3010 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * 3011 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3012 DCFCLKCyclesRequiredInPrefetch); 3013 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] * 3014 PixelDCFCLKCyclesRequiredInPrefetch[k] / 3015 PrefetchPixelLinesTime[k] * 3016 dml_max(1.0, ExpectedVRatioPrefetch) * 3017 dml_max(1.0, ExpectedVRatioPrefetch / 4); 3018 if (HostVMEnable == true || ImmediateFlipRequirement == true) { 3019 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3020 DCFCLKRequiredForPeakBandwidthPerSurface[k] + 3021 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / 3022 NormalEfficiency / ReturnBusWidth; 3023 } 3024 } else { 3025 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3026 } 3027 if (DynamicMetadataEnable[k] == true) { 3028 double TSetupPipe; 3029 double TdmbfPipe; 3030 double TdmsksPipe; 3031 double TdmecPipe; 3032 double AllowedTimeForUrgentExtraLatency; 3033 3034 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3035 MaxInterDCNTileRepeaters, 3036 RequiredDPPCLKPerSurface[i][j][k], 3037 RequiredDISPCLK[i][j], 3038 ProjectedDCFClkDeepSleep[i][j], 3039 PixelClock[k], 3040 HTotal[k], 3041 VTotal[k] - VActive[k], 3042 DynamicMetadataTransmittedBytes[k], 3043 DynamicMetadataLinesBeforeActiveRequired[k], 3044 Interlace[k], 3045 ProgressiveToInterlaceUnitInOPP, 3046 3047 /* output */ 3048 &TSetupPipe, 3049 &TdmbfPipe, 3050 &TdmecPipe, 3051 &TdmsksPipe, 3052 &dummy1, 3053 &dummy2, 3054 &dummy3); 3055 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / 3056 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - 3057 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 3058 if (AllowedTimeForUrgentExtraLatency > 0) 3059 DCFCLKRequiredForPeakBandwidthPerSurface[k] = 3060 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k], 3061 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 3062 else 3063 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i]; 3064 } 3065 } 3066 DCFCLKRequiredForPeakBandwidth = 0; 3067 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) { 3068 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + 3069 DCFCLKRequiredForPeakBandwidthPerSurface[k]; 3070 } 3071 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ? 3072 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) * 3073 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0); 3074 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3075 double MaximumTvmPlus2Tr0PlusTsw; 3076 3077 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / 3078 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 3079 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 3080 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 3081 } else { 3082 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth, 3083 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - 3084 MinimumTvmPlus2Tr0 - 3085 PrefetchPixelLinesTime[k] / 4), 3086 (2 * ExtraLatencyCycles + 3087 PixelDCFCLKCyclesRequiredInPrefetch[k]) / 3088 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 3089 } 3090 } 3091 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * 3092 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 3093 } 3094 } 3095 } 3096 3097 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes, 3098 unsigned int TotalNumberOfActiveDPP, 3099 unsigned int PixelChunkSizeInKByte, 3100 unsigned int TotalNumberOfDCCActiveDPP, 3101 unsigned int MetaChunkSize, 3102 bool GPUVMEnable, 3103 bool HostVMEnable, 3104 unsigned int NumberOfActiveSurfaces, 3105 unsigned int NumberOfDPP[], 3106 unsigned int dpte_group_bytes[], 3107 double HostVMInefficiencyFactor, 3108 double HostVMMinPageSize, 3109 unsigned int HostVMMaxNonCachedPageTableLevels) 3110 { 3111 unsigned int k; 3112 double ret; 3113 unsigned int HostVMDynamicLevels; 3114 3115 if (GPUVMEnable == true && HostVMEnable == true) { 3116 if (HostVMMinPageSize < 2048) 3117 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 3118 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 3119 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 3120 else 3121 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 3122 } else { 3123 HostVMDynamicLevels = 0; 3124 } 3125 3126 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + 3127 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 3128 3129 if (GPUVMEnable == true) { 3130 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 3131 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * 3132 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 3133 } 3134 } 3135 return ret; 3136 } 3137 3138 void dml32_CalculateVUpdateAndDynamicMetadataParameters( 3139 unsigned int MaxInterDCNTileRepeaters, 3140 double Dppclk, 3141 double Dispclk, 3142 double DCFClkDeepSleep, 3143 double PixelClock, 3144 unsigned int HTotal, 3145 unsigned int VBlank, 3146 unsigned int DynamicMetadataTransmittedBytes, 3147 unsigned int DynamicMetadataLinesBeforeActiveRequired, 3148 unsigned int InterlaceEnable, 3149 bool ProgressiveToInterlaceUnitInOPP, 3150 3151 /* output */ 3152 double *TSetup, 3153 double *Tdmbf, 3154 double *Tdmec, 3155 double *Tdmsks, 3156 unsigned int *VUpdateOffsetPix, 3157 double *VUpdateWidthPix, 3158 double *VReadyOffsetPix) 3159 { 3160 double TotalRepeaterDelayTime; 3161 3162 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk); 3163 *VUpdateWidthPix = 3164 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0); 3165 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk, 3166 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0); 3167 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0); 3168 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3169 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk; 3170 *Tdmec = HTotal / PixelClock; 3171 3172 if (DynamicMetadataLinesBeforeActiveRequired == 0) 3173 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3174 else 3175 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3176 3177 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) 3178 *Tdmsks = *Tdmsks / 2; 3179 #ifdef __DML_VBA_DEBUG__ 3180 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3181 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3182 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3183 3184 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n", 3185 __func__, DynamicMetadataLinesBeforeActiveRequired); 3186 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank); 3187 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal); 3188 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock); 3189 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks); 3190 #endif 3191 } 3192 3193 double dml32_CalculateTWait( 3194 unsigned int PrefetchMode, 3195 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange, 3196 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 3197 bool DRRDisplay, 3198 double DRAMClockChangeLatency, 3199 double FCLKChangeLatency, 3200 double UrgentLatency, 3201 double SREnterPlusExitTime) 3202 { 3203 double TWait = 0.0; 3204 3205 if (PrefetchMode == 0 && 3206 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) && 3207 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) && 3208 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) && 3209 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) { 3210 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3211 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3212 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency); 3213 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) { 3214 TWait = dml_max(SREnterPlusExitTime, UrgentLatency); 3215 } else { 3216 TWait = UrgentLatency; 3217 } 3218 3219 #ifdef __DML_VBA_DEBUG__ 3220 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode); 3221 dml_print("DML::%s: TWait = %f\n", __func__, TWait); 3222 #endif 3223 return TWait; 3224 } // CalculateTWait 3225 3226 // Function: get_return_bw_mbps 3227 // Megabyte per second 3228 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc, 3229 const int VoltageLevel, 3230 const bool HostVMEnable, 3231 const double DCFCLK, 3232 const double FabricClock, 3233 const double DRAMSpeed) 3234 { 3235 double ReturnBW = 0.; 3236 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK; 3237 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes; 3238 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes; 3239 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3240 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3241 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3242 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3243 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100, 3244 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100, 3245 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe : 3246 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100); 3247 3248 if (HostVMEnable != true) 3249 ReturnBW = PixelDataOnlyReturnBW; 3250 else 3251 ReturnBW = PixelMixedWithVMDataReturnBW; 3252 3253 #ifdef __DML_VBA_DEBUG__ 3254 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3255 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable); 3256 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3257 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3258 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3259 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth); 3260 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth); 3261 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth); 3262 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW); 3263 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW); 3264 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW); 3265 #endif 3266 return ReturnBW; 3267 } 3268 3269 // Function: get_return_bw_mbps_vm_only 3270 // Megabyte per second 3271 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc, 3272 const int VoltageLevel, 3273 const double DCFCLK, 3274 const double FabricClock, 3275 const double DRAMSpeed) 3276 { 3277 double VMDataOnlyReturnBW = dml_min3( 3278 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3279 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes 3280 * soc->pct_ideal_sdp_bw_after_urgent / 100.0, 3281 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes 3282 * (VoltageLevel < 2 ? 3283 soc->pct_ideal_dram_bw_after_urgent_strobe : 3284 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0); 3285 #ifdef __DML_VBA_DEBUG__ 3286 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel); 3287 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 3288 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock); 3289 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed); 3290 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 3291 #endif 3292 return VMDataOnlyReturnBW; 3293 } 3294 3295 double dml32_CalculateExtraLatency( 3296 unsigned int RoundTripPingLatencyCycles, 3297 unsigned int ReorderingBytes, 3298 double DCFCLK, 3299 unsigned int TotalNumberOfActiveDPP, 3300 unsigned int PixelChunkSizeInKByte, 3301 unsigned int TotalNumberOfDCCActiveDPP, 3302 unsigned int MetaChunkSize, 3303 double ReturnBW, 3304 bool GPUVMEnable, 3305 bool HostVMEnable, 3306 unsigned int NumberOfActiveSurfaces, 3307 unsigned int NumberOfDPP[], 3308 unsigned int dpte_group_bytes[], 3309 double HostVMInefficiencyFactor, 3310 double HostVMMinPageSize, 3311 unsigned int HostVMMaxNonCachedPageTableLevels) 3312 { 3313 double ExtraLatencyBytes; 3314 double ExtraLatency; 3315 3316 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes( 3317 ReorderingBytes, 3318 TotalNumberOfActiveDPP, 3319 PixelChunkSizeInKByte, 3320 TotalNumberOfDCCActiveDPP, 3321 MetaChunkSize, 3322 GPUVMEnable, 3323 HostVMEnable, 3324 NumberOfActiveSurfaces, 3325 NumberOfDPP, 3326 dpte_group_bytes, 3327 HostVMInefficiencyFactor, 3328 HostVMMinPageSize, 3329 HostVMMaxNonCachedPageTableLevels); 3330 3331 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 3332 3333 #ifdef __DML_VBA_DEBUG__ 3334 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 3335 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 3336 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 3337 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 3338 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 3339 #endif 3340 3341 return ExtraLatency; 3342 } // CalculateExtraLatency 3343 3344 bool dml32_CalculatePrefetchSchedule( 3345 struct dml32_CalculatePrefetchSchedule *st_vars, 3346 double HostVMInefficiencyFactor, 3347 DmlPipe *myPipe, 3348 unsigned int DSCDelay, 3349 double DPPCLKDelaySubtotalPlusCNVCFormater, 3350 double DPPCLKDelaySCL, 3351 double DPPCLKDelaySCLLBOnly, 3352 double DPPCLKDelayCNVCCursor, 3353 double DISPCLKDelaySubtotal, 3354 unsigned int DPP_RECOUT_WIDTH, 3355 enum output_format_class OutputFormat, 3356 unsigned int MaxInterDCNTileRepeaters, 3357 unsigned int VStartup, 3358 unsigned int MaxVStartup, 3359 unsigned int GPUVMPageTableLevels, 3360 bool GPUVMEnable, 3361 bool HostVMEnable, 3362 unsigned int HostVMMaxNonCachedPageTableLevels, 3363 double HostVMMinPageSize, 3364 bool DynamicMetadataEnable, 3365 bool DynamicMetadataVMEnabled, 3366 int DynamicMetadataLinesBeforeActiveRequired, 3367 unsigned int DynamicMetadataTransmittedBytes, 3368 double UrgentLatency, 3369 double UrgentExtraLatency, 3370 double TCalc, 3371 unsigned int PDEAndMetaPTEBytesFrame, 3372 unsigned int MetaRowByte, 3373 unsigned int PixelPTEBytesPerRow, 3374 double PrefetchSourceLinesY, 3375 unsigned int SwathWidthY, 3376 unsigned int VInitPreFillY, 3377 unsigned int MaxNumSwathY, 3378 double PrefetchSourceLinesC, 3379 unsigned int SwathWidthC, 3380 unsigned int VInitPreFillC, 3381 unsigned int MaxNumSwathC, 3382 unsigned int swath_width_luma_ub, 3383 unsigned int swath_width_chroma_ub, 3384 unsigned int SwathHeightY, 3385 unsigned int SwathHeightC, 3386 double TWait, 3387 /* Output */ 3388 double *DSTXAfterScaler, 3389 double *DSTYAfterScaler, 3390 double *DestinationLinesForPrefetch, 3391 double *PrefetchBandwidth, 3392 double *DestinationLinesToRequestVMInVBlank, 3393 double *DestinationLinesToRequestRowInVBlank, 3394 double *VRatioPrefetchY, 3395 double *VRatioPrefetchC, 3396 double *RequiredPrefetchPixDataBWLuma, 3397 double *RequiredPrefetchPixDataBWChroma, 3398 bool *NotEnoughTimeForDynamicMetadata, 3399 double *Tno_bw, 3400 double *prefetch_vmrow_bw, 3401 double *Tdmdl_vm, 3402 double *Tdmdl, 3403 double *TSetup, 3404 unsigned int *VUpdateOffsetPix, 3405 double *VUpdateWidthPix, 3406 double *VReadyOffsetPix) 3407 { 3408 bool MyError = false; 3409 3410 st_vars->TimeForFetchingMetaPTE = 0; 3411 st_vars->TimeForFetchingRowInVBlank = 0; 3412 st_vars->LinesToRequestPrefetchPixelData = 0; 3413 st_vars->max_vratio_pre = __DML_MAX_VRATIO_PRE__; 3414 st_vars->Tsw_est1 = 0; 3415 st_vars->Tsw_est3 = 0; 3416 3417 if (GPUVMEnable == true && HostVMEnable == true) 3418 st_vars->HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3419 else 3420 st_vars->HostVMDynamicLevelsTrips = 0; 3421 #ifdef __DML_VBA_DEBUG__ 3422 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 3423 dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels); 3424 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable); 3425 dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n", 3426 __func__, HostVMEnable, HostVMInefficiencyFactor); 3427 #endif 3428 dml32_CalculateVUpdateAndDynamicMetadataParameters( 3429 MaxInterDCNTileRepeaters, 3430 myPipe->Dppclk, 3431 myPipe->Dispclk, 3432 myPipe->DCFClkDeepSleep, 3433 myPipe->PixelClock, 3434 myPipe->HTotal, 3435 myPipe->VBlank, 3436 DynamicMetadataTransmittedBytes, 3437 DynamicMetadataLinesBeforeActiveRequired, 3438 myPipe->InterlaceEnable, 3439 myPipe->ProgressiveToInterlaceUnitInOPP, 3440 TSetup, 3441 3442 /* output */ 3443 &st_vars->Tdmbf, 3444 &st_vars->Tdmec, 3445 &st_vars->Tdmsks, 3446 VUpdateOffsetPix, 3447 VUpdateWidthPix, 3448 VReadyOffsetPix); 3449 3450 st_vars->LineTime = myPipe->HTotal / myPipe->PixelClock; 3451 st_vars->trip_to_mem = UrgentLatency; 3452 st_vars->Tvm_trips = UrgentExtraLatency + st_vars->trip_to_mem * (GPUVMPageTableLevels * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); 3453 3454 if (DynamicMetadataVMEnabled == true) 3455 *Tdmdl = TWait + st_vars->Tvm_trips + st_vars->trip_to_mem; 3456 else 3457 *Tdmdl = TWait + UrgentExtraLatency; 3458 3459 #ifdef __DML_VBA_ALLOW_DELTA__ 3460 if (DynamicMetadataEnable == false) 3461 *Tdmdl = 0.0; 3462 #endif 3463 3464 if (DynamicMetadataEnable == true) { 3465 if (VStartup * st_vars->LineTime < *TSetup + *Tdmdl + st_vars->Tdmbf + st_vars->Tdmec + st_vars->Tdmsks) { 3466 *NotEnoughTimeForDynamicMetadata = true; 3467 #ifdef __DML_VBA_DEBUG__ 3468 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 3469 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", 3470 __func__, st_vars->Tdmbf); 3471 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); 3472 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", 3473 __func__, st_vars->Tdmsks); 3474 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", 3475 __func__, *Tdmdl); 3476 #endif 3477 } else { 3478 *NotEnoughTimeForDynamicMetadata = false; 3479 } 3480 } else { 3481 *NotEnoughTimeForDynamicMetadata = false; 3482 } 3483 3484 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && 3485 GPUVMEnable == true ? TWait + st_vars->Tvm_trips : 0); 3486 3487 if (myPipe->ScalerEnabled) 3488 st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 3489 else 3490 st_vars->DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 3491 3492 st_vars->DPPCycles = st_vars->DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 3493 3494 st_vars->DISPCLKCycles = DISPCLKDelaySubtotal; 3495 3496 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0) 3497 return true; 3498 3499 *DSTXAfterScaler = st_vars->DPPCycles * myPipe->PixelClock / myPipe->Dppclk + st_vars->DISPCLKCycles * 3500 myPipe->PixelClock / myPipe->Dispclk + DSCDelay; 3501 3502 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0) 3503 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH 3504 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ? 3505 myPipe->HActive / 2 : 0) 3506 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0); 3507 3508 #ifdef __DML_VBA_DEBUG__ 3509 dml_print("DML::%s: DPPCycles: %d\n", __func__, st_vars->DPPCycles); 3510 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 3511 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk); 3512 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, st_vars->DISPCLKCycles); 3513 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk); 3514 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 3515 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode); 3516 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH); 3517 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 3518 #endif 3519 3520 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 3521 *DSTYAfterScaler = 1; 3522 else 3523 *DSTYAfterScaler = 0; 3524 3525 st_vars->DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 3526 *DSTYAfterScaler = dml_floor(st_vars->DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 3527 *DSTXAfterScaler = st_vars->DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 3528 #ifdef __DML_VBA_DEBUG__ 3529 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 3530 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler); 3531 #endif 3532 3533 MyError = false; 3534 3535 st_vars->Tr0_trips = st_vars->trip_to_mem * (st_vars->HostVMDynamicLevelsTrips + 1); 3536 3537 if (GPUVMEnable == true) { 3538 st_vars->Tvm_trips_rounded = dml_ceil(4.0 * st_vars->Tvm_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; 3539 st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; 3540 if (GPUVMPageTableLevels >= 3) { 3541 *Tno_bw = UrgentExtraLatency + st_vars->trip_to_mem * 3542 (double) ((GPUVMPageTableLevels - 2) * (st_vars->HostVMDynamicLevelsTrips + 1) - 1); 3543 } else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) { 3544 st_vars->Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / st_vars->LineTime, 1.0) / 3545 4.0 * st_vars->LineTime; // VBA_ERROR 3546 *Tno_bw = UrgentExtraLatency; 3547 } else { 3548 *Tno_bw = 0; 3549 } 3550 } else if (myPipe->DCCEnable == true) { 3551 st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; 3552 st_vars->Tr0_trips_rounded = dml_ceil(4.0 * st_vars->Tr0_trips / st_vars->LineTime, 1.0) / 4.0 * st_vars->LineTime; 3553 *Tno_bw = 0; 3554 } else { 3555 st_vars->Tvm_trips_rounded = st_vars->LineTime / 4.0; 3556 st_vars->Tr0_trips_rounded = st_vars->LineTime / 2.0; 3557 *Tno_bw = 0; 3558 } 3559 st_vars->Tvm_trips_rounded = dml_max(st_vars->Tvm_trips_rounded, st_vars->LineTime / 4.0); 3560 st_vars->Tr0_trips_rounded = dml_max(st_vars->Tr0_trips_rounded, st_vars->LineTime / 4.0); 3561 3562 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 3563 || myPipe->SourcePixelFormat == dm_420_12) { 3564 st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 3565 } else { 3566 st_vars->bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 3567 } 3568 3569 st_vars->prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY 3570 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 3571 st_vars->prefetch_bw_oto = dml_max(st_vars->bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface, 3572 st_vars->prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * st_vars->LineTime)); 3573 3574 st_vars->min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / st_vars->max_vratio_pre; 3575 st_vars->min_Lsw = dml_max(st_vars->min_Lsw, 1.0); 3576 st_vars->Lsw_oto = dml_ceil(4.0 * dml_max(st_vars->prefetch_sw_bytes / st_vars->prefetch_bw_oto / st_vars->LineTime, st_vars->min_Lsw), 1.0) / 4.0; 3577 3578 if (GPUVMEnable == true) { 3579 st_vars->Tvm_oto = dml_max3( 3580 st_vars->Tvm_trips, 3581 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / st_vars->prefetch_bw_oto, 3582 st_vars->LineTime / 4.0); 3583 } else 3584 st_vars->Tvm_oto = st_vars->LineTime / 4.0; 3585 3586 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 3587 st_vars->Tr0_oto = dml_max4( 3588 st_vars->Tr0_trips, 3589 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto, 3590 (st_vars->LineTime - st_vars->Tvm_oto)/2.0, 3591 st_vars->LineTime / 4.0); 3592 #ifdef __DML_VBA_DEBUG__ 3593 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, 3594 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / st_vars->prefetch_bw_oto); 3595 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, st_vars->Tr0_trips); 3596 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, st_vars->LineTime - st_vars->Tvm_oto); 3597 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, st_vars->LineTime / 4); 3598 #endif 3599 } else 3600 st_vars->Tr0_oto = (st_vars->LineTime - st_vars->Tvm_oto) / 2.0; 3601 3602 st_vars->Tvm_oto_lines = dml_ceil(4.0 * st_vars->Tvm_oto / st_vars->LineTime, 1) / 4.0; 3603 st_vars->Tr0_oto_lines = dml_ceil(4.0 * st_vars->Tr0_oto / st_vars->LineTime, 1) / 4.0; 3604 st_vars->dst_y_prefetch_oto = st_vars->Tvm_oto_lines + 2 * st_vars->Tr0_oto_lines + st_vars->Lsw_oto; 3605 3606 st_vars->dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / st_vars->LineTime - 3607 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal); 3608 3609 #ifdef __DML_VBA_DEBUG__ 3610 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal); 3611 dml_print("DML::%s: min_Lsw = %f\n", __func__, st_vars->min_Lsw); 3612 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw); 3613 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency); 3614 dml_print("DML::%s: trip_to_mem = %f\n", __func__, st_vars->trip_to_mem); 3615 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3616 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3617 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3618 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC); 3619 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3620 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub); 3621 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, st_vars->prefetch_sw_bytes); 3622 dml_print("DML::%s: bytes_pp = %f\n", __func__, st_vars->bytes_pp); 3623 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3624 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3625 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3626 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3627 dml_print("DML::%s: Tvm_trips = %f\n", __func__, st_vars->Tvm_trips); 3628 dml_print("DML::%s: Tr0_trips = %f\n", __func__, st_vars->Tr0_trips); 3629 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, st_vars->prefetch_bw_oto); 3630 dml_print("DML::%s: Tr0_oto = %f\n", __func__, st_vars->Tr0_oto); 3631 dml_print("DML::%s: Tvm_oto = %f\n", __func__, st_vars->Tvm_oto); 3632 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, st_vars->Tvm_oto_lines); 3633 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, st_vars->Tr0_oto_lines); 3634 dml_print("DML::%s: Lsw_oto = %f\n", __func__, st_vars->Lsw_oto); 3635 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, st_vars->dst_y_prefetch_oto); 3636 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, st_vars->dst_y_prefetch_equ); 3637 #endif 3638 3639 st_vars->dst_y_prefetch_equ = dml_floor(4.0 * (st_vars->dst_y_prefetch_equ + 0.125), 1) / 4.0; 3640 st_vars->Tpre_rounded = st_vars->dst_y_prefetch_equ * st_vars->LineTime; 3641 #ifdef __DML_VBA_DEBUG__ 3642 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, st_vars->dst_y_prefetch_equ); 3643 dml_print("DML::%s: LineTime: %f\n", __func__, st_vars->LineTime); 3644 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup); 3645 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", 3646 __func__, VStartup * st_vars->LineTime); 3647 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup); 3648 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc); 3649 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, st_vars->Tdmbf); 3650 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, st_vars->Tdmec); 3651 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm); 3652 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 3653 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n", 3654 __func__, *DSTYAfterScaler); 3655 #endif 3656 st_vars->dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, 3657 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 3658 3659 if (st_vars->prefetch_sw_bytes < st_vars->dep_bytes) 3660 st_vars->prefetch_sw_bytes = 2 * st_vars->dep_bytes; 3661 3662 *PrefetchBandwidth = 0; 3663 *DestinationLinesToRequestVMInVBlank = 0; 3664 *DestinationLinesToRequestRowInVBlank = 0; 3665 *VRatioPrefetchY = 0; 3666 *VRatioPrefetchC = 0; 3667 *RequiredPrefetchPixDataBWLuma = 0; 3668 if (st_vars->dst_y_prefetch_equ > 1) { 3669 double PrefetchBandwidth1; 3670 double PrefetchBandwidth2; 3671 double PrefetchBandwidth3; 3672 double PrefetchBandwidth4; 3673 3674 if (st_vars->Tpre_rounded - *Tno_bw > 0) { 3675 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3676 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3677 + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - *Tno_bw); 3678 st_vars->Tsw_est1 = st_vars->prefetch_sw_bytes / PrefetchBandwidth1; 3679 } else 3680 PrefetchBandwidth1 = 0; 3681 3682 if (VStartup == MaxVStartup && (st_vars->Tsw_est1 / st_vars->LineTime < st_vars->min_Lsw) 3683 && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw > 0) { 3684 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte 3685 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3686 / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - *Tno_bw); 3687 } 3688 3689 if (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded > 0) 3690 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + st_vars->prefetch_sw_bytes) / 3691 (st_vars->Tpre_rounded - *Tno_bw - 2 * st_vars->Tr0_trips_rounded); 3692 else 3693 PrefetchBandwidth2 = 0; 3694 3695 if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded > 0) { 3696 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 3697 + st_vars->prefetch_sw_bytes) / (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded); 3698 st_vars->Tsw_est3 = st_vars->prefetch_sw_bytes / PrefetchBandwidth3; 3699 } else 3700 PrefetchBandwidth3 = 0; 3701 3702 3703 if (VStartup == MaxVStartup && 3704 (st_vars->Tsw_est3 / st_vars->LineTime < st_vars->min_Lsw) && st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * 3705 st_vars->LineTime - st_vars->Tvm_trips_rounded > 0) { 3706 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3707 / (st_vars->Tpre_rounded - st_vars->min_Lsw * st_vars->LineTime - 0.75 * st_vars->LineTime - st_vars->Tvm_trips_rounded); 3708 } 3709 3710 if (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded > 0) { 3711 PrefetchBandwidth4 = st_vars->prefetch_sw_bytes / 3712 (st_vars->Tpre_rounded - st_vars->Tvm_trips_rounded - 2 * st_vars->Tr0_trips_rounded); 3713 } else { 3714 PrefetchBandwidth4 = 0; 3715 } 3716 3717 #ifdef __DML_VBA_DEBUG__ 3718 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, st_vars->Tpre_rounded); 3719 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw); 3720 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, st_vars->Tvm_trips_rounded); 3721 dml_print("DML::%s: Tsw_est1: %f\n", __func__, st_vars->Tsw_est1); 3722 dml_print("DML::%s: Tsw_est3: %f\n", __func__, st_vars->Tsw_est3); 3723 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1); 3724 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2); 3725 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 3726 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4); 3727 #endif 3728 { 3729 bool Case1OK; 3730 bool Case2OK; 3731 bool Case3OK; 3732 3733 if (PrefetchBandwidth1 > 0) { 3734 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 3735 >= st_vars->Tvm_trips_rounded 3736 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3737 / PrefetchBandwidth1 >= st_vars->Tr0_trips_rounded) { 3738 Case1OK = true; 3739 } else { 3740 Case1OK = false; 3741 } 3742 } else { 3743 Case1OK = false; 3744 } 3745 3746 if (PrefetchBandwidth2 > 0) { 3747 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 3748 >= st_vars->Tvm_trips_rounded 3749 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) 3750 / PrefetchBandwidth2 < st_vars->Tr0_trips_rounded) { 3751 Case2OK = true; 3752 } else { 3753 Case2OK = false; 3754 } 3755 } else { 3756 Case2OK = false; 3757 } 3758 3759 if (PrefetchBandwidth3 > 0) { 3760 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < 3761 st_vars->Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * 3762 HostVMInefficiencyFactor) / PrefetchBandwidth3 >= 3763 st_vars->Tr0_trips_rounded) { 3764 Case3OK = true; 3765 } else { 3766 Case3OK = false; 3767 } 3768 } else { 3769 Case3OK = false; 3770 } 3771 3772 if (Case1OK) 3773 st_vars->prefetch_bw_equ = PrefetchBandwidth1; 3774 else if (Case2OK) 3775 st_vars->prefetch_bw_equ = PrefetchBandwidth2; 3776 else if (Case3OK) 3777 st_vars->prefetch_bw_equ = PrefetchBandwidth3; 3778 else 3779 st_vars->prefetch_bw_equ = PrefetchBandwidth4; 3780 3781 #ifdef __DML_VBA_DEBUG__ 3782 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 3783 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 3784 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 3785 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, st_vars->prefetch_bw_equ); 3786 #endif 3787 3788 if (st_vars->prefetch_bw_equ > 0) { 3789 if (GPUVMEnable == true) { 3790 st_vars->Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * 3791 HostVMInefficiencyFactor / st_vars->prefetch_bw_equ, 3792 st_vars->Tvm_trips, st_vars->LineTime / 4); 3793 } else { 3794 st_vars->Tvm_equ = st_vars->LineTime / 4; 3795 } 3796 3797 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 3798 st_vars->Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow * 3799 HostVMInefficiencyFactor) / st_vars->prefetch_bw_equ, st_vars->Tr0_trips, 3800 (st_vars->LineTime - st_vars->Tvm_equ) / 2, st_vars->LineTime / 4); 3801 } else { 3802 st_vars->Tr0_equ = (st_vars->LineTime - st_vars->Tvm_equ) / 2; 3803 } 3804 } else { 3805 st_vars->Tvm_equ = 0; 3806 st_vars->Tr0_equ = 0; 3807 #ifdef __DML_VBA_DEBUG__ 3808 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 3809 #endif 3810 } 3811 } 3812 3813 if (st_vars->dst_y_prefetch_oto < st_vars->dst_y_prefetch_equ) { 3814 *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_oto; 3815 st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_oto; 3816 st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_oto; 3817 *PrefetchBandwidth = st_vars->prefetch_bw_oto; 3818 } else { 3819 *DestinationLinesForPrefetch = st_vars->dst_y_prefetch_equ; 3820 st_vars->TimeForFetchingMetaPTE = st_vars->Tvm_equ; 3821 st_vars->TimeForFetchingRowInVBlank = st_vars->Tr0_equ; 3822 *PrefetchBandwidth = st_vars->prefetch_bw_equ; 3823 } 3824 3825 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * st_vars->TimeForFetchingMetaPTE / st_vars->LineTime, 1.0) / 4.0; 3826 3827 *DestinationLinesToRequestRowInVBlank = 3828 dml_ceil(4.0 * st_vars->TimeForFetchingRowInVBlank / st_vars->LineTime, 1.0) / 4.0; 3829 3830 st_vars->LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - 3831 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 3832 3833 #ifdef __DML_VBA_DEBUG__ 3834 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 3835 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3836 __func__, *DestinationLinesToRequestVMInVBlank); 3837 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, st_vars->TimeForFetchingRowInVBlank); 3838 dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); 3839 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3840 __func__, *DestinationLinesToRequestRowInVBlank); 3841 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3842 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, st_vars->LinesToRequestPrefetchPixelData); 3843 #endif 3844 3845 if (st_vars->LinesToRequestPrefetchPixelData >= 1 && st_vars->prefetch_bw_equ > 0) { 3846 *VRatioPrefetchY = (double) PrefetchSourceLinesY / st_vars->LinesToRequestPrefetchPixelData; 3847 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3848 #ifdef __DML_VBA_DEBUG__ 3849 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3850 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 3851 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY); 3852 #endif 3853 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 3854 if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 3855 *VRatioPrefetchY = 3856 dml_max((double) PrefetchSourceLinesY / 3857 st_vars->LinesToRequestPrefetchPixelData, 3858 (double) MaxNumSwathY * SwathHeightY / 3859 (st_vars->LinesToRequestPrefetchPixelData - 3860 (VInitPreFillY - 3.0) / 2.0)); 3861 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 3862 } else { 3863 MyError = true; 3864 *VRatioPrefetchY = 0; 3865 } 3866 #ifdef __DML_VBA_DEBUG__ 3867 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 3868 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 3869 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 3870 #endif 3871 } 3872 3873 *VRatioPrefetchC = (double) PrefetchSourceLinesC / st_vars->LinesToRequestPrefetchPixelData; 3874 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3875 3876 #ifdef __DML_VBA_DEBUG__ 3877 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3878 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 3879 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC); 3880 #endif 3881 if ((SwathHeightC > 4)) { 3882 if (st_vars->LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 3883 *VRatioPrefetchC = 3884 dml_max(*VRatioPrefetchC, 3885 (double) MaxNumSwathC * SwathHeightC / 3886 (st_vars->LinesToRequestPrefetchPixelData - 3887 (VInitPreFillC - 3.0) / 2.0)); 3888 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 3889 } else { 3890 MyError = true; 3891 *VRatioPrefetchC = 0; 3892 } 3893 #ifdef __DML_VBA_DEBUG__ 3894 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 3895 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 3896 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 3897 #endif 3898 } 3899 3900 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY 3901 / st_vars->LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub 3902 / st_vars->LineTime; 3903 3904 #ifdef __DML_VBA_DEBUG__ 3905 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 3906 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 3907 dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); 3908 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", 3909 __func__, *RequiredPrefetchPixDataBWLuma); 3910 #endif 3911 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / 3912 st_vars->LinesToRequestPrefetchPixelData 3913 * myPipe->BytePerPixelC 3914 * swath_width_chroma_ub / st_vars->LineTime; 3915 } else { 3916 MyError = true; 3917 #ifdef __DML_VBA_DEBUG__ 3918 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", 3919 __func__, st_vars->LinesToRequestPrefetchPixelData); 3920 #endif 3921 *VRatioPrefetchY = 0; 3922 *VRatioPrefetchC = 0; 3923 *RequiredPrefetchPixDataBWLuma = 0; 3924 *RequiredPrefetchPixDataBWChroma = 0; 3925 } 3926 #ifdef __DML_VBA_DEBUG__ 3927 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 3928 (double)st_vars->LinesToRequestPrefetchPixelData * st_vars->LineTime + 3929 2.0*st_vars->TimeForFetchingRowInVBlank + st_vars->TimeForFetchingMetaPTE); 3930 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", st_vars->TimeForFetchingMetaPTE); 3931 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 3932 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime); 3933 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 3934 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * st_vars->LineTime - 3935 st_vars->TimeForFetchingMetaPTE - 2*st_vars->TimeForFetchingRowInVBlank - (*DSTYAfterScaler + 3936 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * st_vars->LineTime - TWait - TCalc - *TSetup); 3937 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", 3938 PixelPTEBytesPerRow); 3939 #endif 3940 } else { 3941 MyError = true; 3942 #ifdef __DML_VBA_DEBUG__ 3943 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", 3944 __func__, st_vars->dst_y_prefetch_equ); 3945 #endif 3946 } 3947 3948 { 3949 double prefetch_vm_bw; 3950 double prefetch_row_bw; 3951 3952 if (PDEAndMetaPTEBytesFrame == 0) { 3953 prefetch_vm_bw = 0; 3954 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 3955 #ifdef __DML_VBA_DEBUG__ 3956 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 3957 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 3958 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", 3959 __func__, *DestinationLinesToRequestVMInVBlank); 3960 dml_print("DML::%s: LineTime = %f\n", __func__, st_vars->LineTime); 3961 #endif 3962 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / 3963 (*DestinationLinesToRequestVMInVBlank * st_vars->LineTime); 3964 #ifdef __DML_VBA_DEBUG__ 3965 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 3966 #endif 3967 } else { 3968 prefetch_vm_bw = 0; 3969 MyError = true; 3970 #ifdef __DML_VBA_DEBUG__ 3971 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", 3972 __func__, *DestinationLinesToRequestVMInVBlank); 3973 #endif 3974 } 3975 3976 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 3977 prefetch_row_bw = 0; 3978 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 3979 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / 3980 (*DestinationLinesToRequestRowInVBlank * st_vars->LineTime); 3981 3982 #ifdef __DML_VBA_DEBUG__ 3983 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 3984 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 3985 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", 3986 __func__, *DestinationLinesToRequestRowInVBlank); 3987 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 3988 #endif 3989 } else { 3990 prefetch_row_bw = 0; 3991 MyError = true; 3992 #ifdef __DML_VBA_DEBUG__ 3993 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", 3994 __func__, *DestinationLinesToRequestRowInVBlank); 3995 #endif 3996 } 3997 3998 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 3999 } 4000 4001 if (MyError) { 4002 *PrefetchBandwidth = 0; 4003 st_vars->TimeForFetchingMetaPTE = 0; 4004 st_vars->TimeForFetchingRowInVBlank = 0; 4005 *DestinationLinesToRequestVMInVBlank = 0; 4006 *DestinationLinesToRequestRowInVBlank = 0; 4007 *DestinationLinesForPrefetch = 0; 4008 st_vars->LinesToRequestPrefetchPixelData = 0; 4009 *VRatioPrefetchY = 0; 4010 *VRatioPrefetchC = 0; 4011 *RequiredPrefetchPixDataBWLuma = 0; 4012 *RequiredPrefetchPixDataBWChroma = 0; 4013 } 4014 4015 return MyError; 4016 } // CalculatePrefetchSchedule 4017 4018 void dml32_CalculateFlipSchedule( 4019 double HostVMInefficiencyFactor, 4020 double UrgentExtraLatency, 4021 double UrgentLatency, 4022 unsigned int GPUVMMaxPageTableLevels, 4023 bool HostVMEnable, 4024 unsigned int HostVMMaxNonCachedPageTableLevels, 4025 bool GPUVMEnable, 4026 double HostVMMinPageSize, 4027 double PDEAndMetaPTEBytesPerFrame, 4028 double MetaRowBytes, 4029 double DPTEBytesPerRow, 4030 double BandwidthAvailableForImmediateFlip, 4031 unsigned int TotImmediateFlipBytes, 4032 enum source_format_class SourcePixelFormat, 4033 double LineTime, 4034 double VRatio, 4035 double VRatioChroma, 4036 double Tno_bw, 4037 bool DCCEnable, 4038 unsigned int dpte_row_height, 4039 unsigned int meta_row_height, 4040 unsigned int dpte_row_height_chroma, 4041 unsigned int meta_row_height_chroma, 4042 bool use_one_row_for_frame_flip, 4043 4044 /* Output */ 4045 double *DestinationLinesToRequestVMInImmediateFlip, 4046 double *DestinationLinesToRequestRowInImmediateFlip, 4047 double *final_flip_bw, 4048 bool *ImmediateFlipSupportedForPipe) 4049 { 4050 double min_row_time = 0.0; 4051 unsigned int HostVMDynamicLevelsTrips; 4052 double TimeForFetchingMetaPTEImmediateFlip; 4053 double TimeForFetchingRowInVBlankImmediateFlip; 4054 double ImmediateFlipBW; 4055 4056 if (GPUVMEnable == true && HostVMEnable == true) 4057 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 4058 else 4059 HostVMDynamicLevelsTrips = 0; 4060 4061 #ifdef __DML_VBA_DEBUG__ 4062 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes); 4063 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip); 4064 #endif 4065 4066 if (TotImmediateFlipBytes > 0) { 4067 if (use_one_row_for_frame_flip) { 4068 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) * 4069 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4070 } else { 4071 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * 4072 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 4073 } 4074 if (GPUVMEnable == true) { 4075 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame * 4076 HostVMInefficiencyFactor / ImmediateFlipBW, 4077 UrgentExtraLatency + UrgentLatency * 4078 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 4079 LineTime / 4.0); 4080 } else { 4081 TimeForFetchingMetaPTEImmediateFlip = 0; 4082 } 4083 if ((GPUVMEnable == true || DCCEnable == true)) { 4084 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 4085 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 4086 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0); 4087 } else { 4088 TimeForFetchingRowInVBlankImmediateFlip = 0; 4089 } 4090 4091 *DestinationLinesToRequestVMInImmediateFlip = 4092 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0; 4093 *DestinationLinesToRequestRowInImmediateFlip = 4094 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0; 4095 4096 if (GPUVMEnable == true) { 4097 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / 4098 (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 4099 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4100 (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 4101 } else if ((GPUVMEnable == true || DCCEnable == true)) { 4102 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / 4103 (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 4104 } else { 4105 *final_flip_bw = 0; 4106 } 4107 } else { 4108 TimeForFetchingMetaPTEImmediateFlip = 0; 4109 TimeForFetchingRowInVBlankImmediateFlip = 0; 4110 *DestinationLinesToRequestVMInImmediateFlip = 0; 4111 *DestinationLinesToRequestRowInImmediateFlip = 0; 4112 *final_flip_bw = 0; 4113 } 4114 4115 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 4116 if (GPUVMEnable == true && DCCEnable != true) { 4117 min_row_time = dml_min(dpte_row_height * 4118 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 4119 } else if (GPUVMEnable != true && DCCEnable == true) { 4120 min_row_time = dml_min(meta_row_height * 4121 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 4122 } else { 4123 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height * 4124 LineTime / VRatio, dpte_row_height_chroma * LineTime / 4125 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma); 4126 } 4127 } else { 4128 if (GPUVMEnable == true && DCCEnable != true) { 4129 min_row_time = dpte_row_height * LineTime / VRatio; 4130 } else if (GPUVMEnable != true && DCCEnable == true) { 4131 min_row_time = meta_row_height * LineTime / VRatio; 4132 } else { 4133 min_row_time = 4134 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 4135 } 4136 } 4137 4138 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 4139 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip 4140 > min_row_time) { 4141 *ImmediateFlipSupportedForPipe = false; 4142 } else { 4143 *ImmediateFlipSupportedForPipe = true; 4144 } 4145 4146 #ifdef __DML_VBA_DEBUG__ 4147 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 4148 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable); 4149 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", 4150 __func__, *DestinationLinesToRequestVMInImmediateFlip); 4151 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", 4152 __func__, *DestinationLinesToRequestRowInImmediateFlip); 4153 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 4154 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", 4155 __func__, TimeForFetchingRowInVBlankImmediateFlip); 4156 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 4157 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 4158 #endif 4159 } // CalculateFlipSchedule 4160 4161 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( 4162 struct dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport *st_vars, 4163 bool USRRetrainingRequiredFinal, 4164 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 4165 unsigned int PrefetchMode, 4166 unsigned int NumberOfActiveSurfaces, 4167 unsigned int MaxLineBufferLines, 4168 unsigned int LineBufferSize, 4169 unsigned int WritebackInterfaceBufferSize, 4170 double DCFCLK, 4171 double ReturnBW, 4172 bool SynchronizeTimingsFinal, 4173 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal, 4174 bool DRRDisplay[], 4175 unsigned int dpte_group_bytes[], 4176 unsigned int meta_row_height[], 4177 unsigned int meta_row_height_chroma[], 4178 SOCParametersList mmSOCParameters, 4179 unsigned int WritebackChunkSize, 4180 double SOCCLK, 4181 double DCFClkDeepSleep, 4182 unsigned int DETBufferSizeY[], 4183 unsigned int DETBufferSizeC[], 4184 unsigned int SwathHeightY[], 4185 unsigned int SwathHeightC[], 4186 unsigned int LBBitPerPixel[], 4187 double SwathWidthY[], 4188 double SwathWidthC[], 4189 double HRatio[], 4190 double HRatioChroma[], 4191 unsigned int VTaps[], 4192 unsigned int VTapsChroma[], 4193 double VRatio[], 4194 double VRatioChroma[], 4195 unsigned int HTotal[], 4196 unsigned int VTotal[], 4197 unsigned int VActive[], 4198 double PixelClock[], 4199 unsigned int BlendingAndTiming[], 4200 unsigned int DPPPerSurface[], 4201 double BytePerPixelDETY[], 4202 double BytePerPixelDETC[], 4203 double DSTXAfterScaler[], 4204 double DSTYAfterScaler[], 4205 bool WritebackEnable[], 4206 enum source_format_class WritebackPixelFormat[], 4207 double WritebackDestinationWidth[], 4208 double WritebackDestinationHeight[], 4209 double WritebackSourceHeight[], 4210 bool UnboundedRequestEnabled, 4211 unsigned int CompressedBufferSizeInkByte, 4212 4213 /* Output */ 4214 Watermarks *Watermark, 4215 enum clock_change_support *DRAMClockChangeSupport, 4216 double MaxActiveDRAMClockChangeLatencySupported[], 4217 unsigned int SubViewportLinesNeededInMALL[], 4218 enum dm_fclock_change_support *FCLKChangeSupport, 4219 double *MinActiveFCLKChangeLatencySupported, 4220 bool *USRRetrainingSupport, 4221 double ActiveDRAMClockChangeLatencyMargin[]) 4222 { 4223 unsigned int i, j, k; 4224 4225 st_vars->SurfaceWithMinActiveFCLKChangeMargin = 0; 4226 st_vars->DRAMClockChangeSupportNumber = 0; 4227 st_vars->DRAMClockChangeMethod = 0; 4228 st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false; 4229 st_vars->MinActiveFCLKChangeMargin = 0.; 4230 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.; 4231 st_vars->TotalPixelBW = 0.0; 4232 st_vars->TotalActiveWriteback = 0; 4233 4234 Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency; 4235 Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency 4236 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency; 4237 Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark; 4238 Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark; 4239 Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency 4240 + 10 / DCFClkDeepSleep; 4241 Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency 4242 + 10 / DCFClkDeepSleep; 4243 Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency 4244 + 10 / DCFClkDeepSleep; 4245 Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time 4246 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep; 4247 4248 #ifdef __DML_VBA_DEBUG__ 4249 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency); 4250 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency); 4251 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency); 4252 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark); 4253 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark); 4254 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark); 4255 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark); 4256 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark); 4257 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark); 4258 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark); 4259 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", 4260 __func__, Watermark->Z8StutterEnterPlusExitWatermark); 4261 #endif 4262 4263 4264 st_vars->TotalActiveWriteback = 0; 4265 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4266 if (WritebackEnable[k] == true) 4267 st_vars->TotalActiveWriteback = st_vars->TotalActiveWriteback + 1; 4268 } 4269 4270 if (st_vars->TotalActiveWriteback <= 1) { 4271 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency; 4272 } else { 4273 Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency 4274 + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4275 } 4276 if (USRRetrainingRequiredFinal) 4277 Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark 4278 + mmSOCParameters.USRRetrainingLatency; 4279 4280 if (st_vars->TotalActiveWriteback <= 1) { 4281 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4282 + mmSOCParameters.WritebackLatency; 4283 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4284 + mmSOCParameters.WritebackLatency; 4285 } else { 4286 Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency 4287 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 4288 Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency 4289 + mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK; 4290 } 4291 4292 if (USRRetrainingRequiredFinal) 4293 Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark 4294 + mmSOCParameters.USRRetrainingLatency; 4295 4296 if (USRRetrainingRequiredFinal) 4297 Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark 4298 + mmSOCParameters.USRRetrainingLatency; 4299 4300 #ifdef __DML_VBA_DEBUG__ 4301 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", 4302 __func__, Watermark->WritebackDRAMClockChangeWatermark); 4303 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark); 4304 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark); 4305 dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal); 4306 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency); 4307 #endif 4308 4309 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4310 st_vars->TotalPixelBW = st_vars->TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + 4311 SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]); 4312 } 4313 4314 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4315 4316 st_vars->LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1); 4317 st_vars->LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1); 4318 4319 4320 #ifdef __DML_VBA_DEBUG__ 4321 dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines); 4322 dml_print("DML::%s: k=%d, LineBufferSize = %d\n", __func__, k, LineBufferSize); 4323 dml_print("DML::%s: k=%d, LBBitPerPixel = %d\n", __func__, k, LBBitPerPixel[k]); 4324 dml_print("DML::%s: k=%d, HRatio = %f\n", __func__, k, HRatio[k]); 4325 dml_print("DML::%s: k=%d, VTaps = %d\n", __func__, k, VTaps[k]); 4326 #endif 4327 4328 st_vars->EffectiveLBLatencyHidingY = st_vars->LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]); 4329 st_vars->EffectiveLBLatencyHidingC = st_vars->LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); 4330 st_vars->EffectiveDETBufferSizeY = DETBufferSizeY[k]; 4331 4332 if (UnboundedRequestEnabled) { 4333 st_vars->EffectiveDETBufferSizeY = st_vars->EffectiveDETBufferSizeY 4334 + CompressedBufferSizeInkByte * 1024 4335 * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k]) 4336 / (HTotal[k] / PixelClock[k]) / st_vars->TotalPixelBW; 4337 } 4338 4339 st_vars->LinesInDETY[k] = (double) st_vars->EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 4340 st_vars->LinesInDETYRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETY[k], SwathHeightY[k]); 4341 st_vars->FullDETBufferingTimeY = st_vars->LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; 4342 4343 st_vars->ActiveClockChangeLatencyHidingY = st_vars->EffectiveLBLatencyHidingY + st_vars->FullDETBufferingTimeY 4344 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k]; 4345 4346 if (NumberOfActiveSurfaces > 1) { 4347 st_vars->ActiveClockChangeLatencyHidingY = st_vars->ActiveClockChangeLatencyHidingY 4348 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k] 4349 / PixelClock[k] / VRatio[k]; 4350 } 4351 4352 if (BytePerPixelDETC[k] > 0) { 4353 st_vars->LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 4354 st_vars->LinesInDETCRoundedDownToSwath[k] = dml_floor(st_vars->LinesInDETC[k], SwathHeightC[k]); 4355 st_vars->FullDETBufferingTimeC = st_vars->LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) 4356 / VRatioChroma[k]; 4357 st_vars->ActiveClockChangeLatencyHidingC = st_vars->EffectiveLBLatencyHidingC + st_vars->FullDETBufferingTimeC 4358 - (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] 4359 / PixelClock[k]; 4360 if (NumberOfActiveSurfaces > 1) { 4361 st_vars->ActiveClockChangeLatencyHidingC = st_vars->ActiveClockChangeLatencyHidingC 4362 - (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k] 4363 / PixelClock[k] / VRatioChroma[k]; 4364 } 4365 st_vars->ActiveClockChangeLatencyHiding = dml_min(st_vars->ActiveClockChangeLatencyHidingY, 4366 st_vars->ActiveClockChangeLatencyHidingC); 4367 } else { 4368 st_vars->ActiveClockChangeLatencyHiding = st_vars->ActiveClockChangeLatencyHidingY; 4369 } 4370 4371 ActiveDRAMClockChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark 4372 - Watermark->DRAMClockChangeWatermark; 4373 st_vars->ActiveFCLKChangeLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark 4374 - Watermark->FCLKChangeWatermark; 4375 st_vars->USRRetrainingLatencyMargin[k] = st_vars->ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark; 4376 4377 if (WritebackEnable[k]) { 4378 st_vars->WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024 4379 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] 4380 / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); 4381 if (WritebackPixelFormat[k] == dm_444_64) 4382 st_vars->WritebackLatencyHiding = st_vars->WritebackLatencyHiding / 2; 4383 4384 st_vars->WritebackDRAMClockChangeLatencyMargin = st_vars->WritebackLatencyHiding 4385 - Watermark->WritebackDRAMClockChangeWatermark; 4386 4387 st_vars->WritebackFCLKChangeLatencyMargin = st_vars->WritebackLatencyHiding 4388 - Watermark->WritebackFCLKChangeWatermark; 4389 4390 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k], 4391 st_vars->WritebackFCLKChangeLatencyMargin); 4392 st_vars->ActiveFCLKChangeLatencyMargin[k] = dml_min(st_vars->ActiveFCLKChangeLatencyMargin[k], 4393 st_vars->WritebackDRAMClockChangeLatencyMargin); 4394 } 4395 MaxActiveDRAMClockChangeLatencySupported[k] = 4396 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ? 4397 0 : 4398 (ActiveDRAMClockChangeLatencyMargin[k] 4399 + mmSOCParameters.DRAMClockChangeLatency); 4400 } 4401 4402 for (i = 0; i < NumberOfActiveSurfaces; ++i) { 4403 for (j = 0; j < NumberOfActiveSurfaces; ++j) { 4404 if (i == j || 4405 (BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) || 4406 (BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) || 4407 (BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) || 4408 (SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] && 4409 HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] && 4410 VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal && 4411 (DRRDisplay[i] || DRRDisplay[j]))) { 4412 st_vars->SynchronizedSurfaces[i][j] = true; 4413 } else { 4414 st_vars->SynchronizedSurfaces[i][j] = false; 4415 } 4416 } 4417 } 4418 4419 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4420 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4421 (!st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin || 4422 st_vars->ActiveFCLKChangeLatencyMargin[k] < st_vars->MinActiveFCLKChangeMargin)) { 4423 st_vars->FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true; 4424 st_vars->MinActiveFCLKChangeMargin = st_vars->ActiveFCLKChangeLatencyMargin[k]; 4425 st_vars->SurfaceWithMinActiveFCLKChangeMargin = k; 4426 } 4427 } 4428 4429 *MinActiveFCLKChangeLatencySupported = st_vars->MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency; 4430 4431 st_vars->SameTimingForFCLKChange = true; 4432 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4433 if (!st_vars->SynchronizedSurfaces[k][st_vars->SurfaceWithMinActiveFCLKChangeMargin]) { 4434 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4435 (st_vars->SameTimingForFCLKChange || 4436 st_vars->ActiveFCLKChangeLatencyMargin[k] < 4437 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) { 4438 st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = st_vars->ActiveFCLKChangeLatencyMargin[k]; 4439 } 4440 st_vars->SameTimingForFCLKChange = false; 4441 } 4442 } 4443 4444 if (st_vars->MinActiveFCLKChangeMargin > 0) { 4445 *FCLKChangeSupport = dm_fclock_change_vactive; 4446 } else if ((st_vars->SameTimingForFCLKChange || st_vars->SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) && 4447 (PrefetchMode <= 1)) { 4448 *FCLKChangeSupport = dm_fclock_change_vblank; 4449 } else { 4450 *FCLKChangeSupport = dm_fclock_change_unsupported; 4451 } 4452 4453 *USRRetrainingSupport = true; 4454 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4455 if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) && 4456 (st_vars->USRRetrainingLatencyMargin[k] < 0)) { 4457 *USRRetrainingSupport = false; 4458 } 4459 } 4460 4461 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4462 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame && 4463 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport && 4464 UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe && 4465 ActiveDRAMClockChangeLatencyMargin[k] < 0) { 4466 if (PrefetchMode > 0) { 4467 st_vars->DRAMClockChangeSupportNumber = 2; 4468 } else if (st_vars->DRAMClockChangeSupportNumber == 0) { 4469 st_vars->DRAMClockChangeSupportNumber = 1; 4470 st_vars->LastSurfaceWithoutMargin = k; 4471 } else if (st_vars->DRAMClockChangeSupportNumber == 1 && 4472 !st_vars->SynchronizedSurfaces[st_vars->LastSurfaceWithoutMargin][k]) { 4473 st_vars->DRAMClockChangeSupportNumber = 2; 4474 } 4475 } 4476 } 4477 4478 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4479 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame) 4480 st_vars->DRAMClockChangeMethod = 1; 4481 else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) 4482 st_vars->DRAMClockChangeMethod = 2; 4483 } 4484 4485 if (st_vars->DRAMClockChangeMethod == 0) { 4486 if (st_vars->DRAMClockChangeSupportNumber == 0) 4487 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 4488 else if (st_vars->DRAMClockChangeSupportNumber == 1) 4489 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 4490 else 4491 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4492 } else if (st_vars->DRAMClockChangeMethod == 1) { 4493 if (st_vars->DRAMClockChangeSupportNumber == 0) 4494 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame; 4495 else if (st_vars->DRAMClockChangeSupportNumber == 1) 4496 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame; 4497 else 4498 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4499 } else { 4500 if (st_vars->DRAMClockChangeSupportNumber == 0) 4501 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp; 4502 else if (st_vars->DRAMClockChangeSupportNumber == 1) 4503 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp; 4504 else 4505 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 4506 } 4507 4508 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4509 unsigned int dst_y_pstate; 4510 unsigned int src_y_pstate_l; 4511 unsigned int src_y_pstate_c; 4512 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c; 4513 4514 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1); 4515 src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]); 4516 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + st_vars->LBLatencyHidingSourceLinesY[k]; 4517 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k]; 4518 4519 #ifdef __DML_VBA_DEBUG__ 4520 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 4521 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 4522 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 4523 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]); 4524 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, st_vars->LBLatencyHidingSourceLinesY[k]); 4525 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate); 4526 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l); 4527 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l); 4528 dml_print("DML::%s: k=%d, meta_row_height = %d\n", __func__, k, meta_row_height[k]); 4529 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l); 4530 #endif 4531 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l; 4532 4533 if (BytePerPixelDETC[k] > 0) { 4534 src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]); 4535 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + st_vars->LBLatencyHidingSourceLinesC[k]; 4536 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k]; 4537 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c); 4538 4539 #ifdef __DML_VBA_DEBUG__ 4540 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c); 4541 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c); 4542 dml_print("DML::%s: k=%d, meta_row_height_chroma = %d\n", __func__, k, meta_row_height_chroma[k]); 4543 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c); 4544 #endif 4545 } 4546 } 4547 #ifdef __DML_VBA_DEBUG__ 4548 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport); 4549 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport); 4550 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n", 4551 __func__, *MinActiveFCLKChangeLatencySupported); 4552 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport); 4553 #endif 4554 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport 4555 4556 double dml32_CalculateWriteBackDISPCLK( 4557 enum source_format_class WritebackPixelFormat, 4558 double PixelClock, 4559 double WritebackHRatio, 4560 double WritebackVRatio, 4561 unsigned int WritebackHTaps, 4562 unsigned int WritebackVTaps, 4563 unsigned int WritebackSourceWidth, 4564 unsigned int WritebackDestinationWidth, 4565 unsigned int HTotal, 4566 unsigned int WritebackLineBufferSize, 4567 double DISPCLKDPPCLKVCOSpeed) 4568 { 4569 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 4570 4571 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 4572 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 4573 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * 4574 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 4575 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed); 4576 } 4577 4578 void dml32_CalculateMinAndMaxPrefetchMode( 4579 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal, 4580 unsigned int *MinPrefetchMode, 4581 unsigned int *MaxPrefetchMode) 4582 { 4583 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) { 4584 *MinPrefetchMode = 3; 4585 *MaxPrefetchMode = 3; 4586 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) { 4587 *MinPrefetchMode = 2; 4588 *MaxPrefetchMode = 2; 4589 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) { 4590 *MinPrefetchMode = 1; 4591 *MaxPrefetchMode = 1; 4592 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) { 4593 *MinPrefetchMode = 0; 4594 *MaxPrefetchMode = 0; 4595 } else if (AllowForPStateChangeOrStutterInVBlankFinal == 4596 dm_prefetch_support_uclk_fclk_and_stutter_if_possible) { 4597 *MinPrefetchMode = 0; 4598 *MaxPrefetchMode = 3; 4599 } else { 4600 *MinPrefetchMode = 0; 4601 *MaxPrefetchMode = 3; 4602 } 4603 } // CalculateMinAndMaxPrefetchMode 4604 4605 void dml32_CalculatePixelDeliveryTimes( 4606 unsigned int NumberOfActiveSurfaces, 4607 double VRatio[], 4608 double VRatioChroma[], 4609 double VRatioPrefetchY[], 4610 double VRatioPrefetchC[], 4611 unsigned int swath_width_luma_ub[], 4612 unsigned int swath_width_chroma_ub[], 4613 unsigned int DPPPerSurface[], 4614 double HRatio[], 4615 double HRatioChroma[], 4616 double PixelClock[], 4617 double PSCL_THROUGHPUT[], 4618 double PSCL_THROUGHPUT_CHROMA[], 4619 double Dppclk[], 4620 unsigned int BytePerPixelC[], 4621 enum dm_rotation_angle SourceRotation[], 4622 unsigned int NumberOfCursors[], 4623 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 4624 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 4625 unsigned int BlockWidth256BytesY[], 4626 unsigned int BlockHeight256BytesY[], 4627 unsigned int BlockWidth256BytesC[], 4628 unsigned int BlockHeight256BytesC[], 4629 4630 /* Output */ 4631 double DisplayPipeLineDeliveryTimeLuma[], 4632 double DisplayPipeLineDeliveryTimeChroma[], 4633 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 4634 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 4635 double DisplayPipeRequestDeliveryTimeLuma[], 4636 double DisplayPipeRequestDeliveryTimeChroma[], 4637 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 4638 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 4639 double CursorRequestDeliveryTime[], 4640 double CursorRequestDeliveryTimePrefetch[]) 4641 { 4642 double req_per_swath_ub; 4643 unsigned int k; 4644 4645 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4646 4647 #ifdef __DML_VBA_DEBUG__ 4648 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 4649 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 4650 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 4651 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 4652 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]); 4653 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]); 4654 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]); 4655 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]); 4656 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]); 4657 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]); 4658 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]); 4659 #endif 4660 4661 if (VRatio[k] <= 1) { 4662 DisplayPipeLineDeliveryTimeLuma[k] = 4663 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4664 } else { 4665 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4666 } 4667 4668 if (BytePerPixelC[k] == 0) { 4669 DisplayPipeLineDeliveryTimeChroma[k] = 0; 4670 } else { 4671 if (VRatioChroma[k] <= 1) { 4672 DisplayPipeLineDeliveryTimeChroma[k] = 4673 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4674 } else { 4675 DisplayPipeLineDeliveryTimeChroma[k] = 4676 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4677 } 4678 } 4679 4680 if (VRatioPrefetchY[k] <= 1) { 4681 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4682 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k]; 4683 } else { 4684 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = 4685 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k]; 4686 } 4687 4688 if (BytePerPixelC[k] == 0) { 4689 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 4690 } else { 4691 if (VRatioPrefetchC[k] <= 1) { 4692 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * 4693 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k]; 4694 } else { 4695 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 4696 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k]; 4697 } 4698 } 4699 #ifdef __DML_VBA_DEBUG__ 4700 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", 4701 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 4702 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", 4703 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 4704 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", 4705 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 4706 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", 4707 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 4708 #endif 4709 } 4710 4711 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4712 if (!IsVertical(SourceRotation[k])) 4713 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 4714 else 4715 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 4716 #ifdef __DML_VBA_DEBUG__ 4717 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub); 4718 #endif 4719 4720 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 4721 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = 4722 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 4723 if (BytePerPixelC[k] == 0) { 4724 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 4725 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 4726 } else { 4727 if (!IsVertical(SourceRotation[k])) 4728 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 4729 else 4730 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 4731 #ifdef __DML_VBA_DEBUG__ 4732 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub); 4733 #endif 4734 DisplayPipeRequestDeliveryTimeChroma[k] = 4735 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 4736 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 4737 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 4738 } 4739 #ifdef __DML_VBA_DEBUG__ 4740 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", 4741 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 4742 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", 4743 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 4744 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", 4745 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 4746 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", 4747 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 4748 #endif 4749 } 4750 4751 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4752 unsigned int cursor_req_per_width; 4753 4754 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] / 4755 256.0 / 8.0, 1.0); 4756 if (NumberOfCursors[k] > 0) { 4757 if (VRatio[k] <= 1) { 4758 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4759 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4760 } else { 4761 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] / 4762 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4763 } 4764 if (VRatioPrefetchY[k] <= 1) { 4765 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4766 HRatio[k] / PixelClock[k] / cursor_req_per_width; 4767 } else { 4768 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] / 4769 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width; 4770 } 4771 } else { 4772 CursorRequestDeliveryTime[k] = 0; 4773 CursorRequestDeliveryTimePrefetch[k] = 0; 4774 } 4775 #ifdef __DML_VBA_DEBUG__ 4776 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", 4777 __func__, k, NumberOfCursors[k]); 4778 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", 4779 __func__, k, CursorRequestDeliveryTime[k]); 4780 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", 4781 __func__, k, CursorRequestDeliveryTimePrefetch[k]); 4782 #endif 4783 } 4784 } // CalculatePixelDeliveryTimes 4785 4786 void dml32_CalculateMetaAndPTETimes( 4787 bool use_one_row_for_frame[], 4788 unsigned int NumberOfActiveSurfaces, 4789 bool GPUVMEnable, 4790 unsigned int MetaChunkSize, 4791 unsigned int MinMetaChunkSizeBytes, 4792 unsigned int HTotal[], 4793 double VRatio[], 4794 double VRatioChroma[], 4795 double DestinationLinesToRequestRowInVBlank[], 4796 double DestinationLinesToRequestRowInImmediateFlip[], 4797 bool DCCEnable[], 4798 double PixelClock[], 4799 unsigned int BytePerPixelY[], 4800 unsigned int BytePerPixelC[], 4801 enum dm_rotation_angle SourceRotation[], 4802 unsigned int dpte_row_height[], 4803 unsigned int dpte_row_height_chroma[], 4804 unsigned int meta_row_width[], 4805 unsigned int meta_row_width_chroma[], 4806 unsigned int meta_row_height[], 4807 unsigned int meta_row_height_chroma[], 4808 unsigned int meta_req_width[], 4809 unsigned int meta_req_width_chroma[], 4810 unsigned int meta_req_height[], 4811 unsigned int meta_req_height_chroma[], 4812 unsigned int dpte_group_bytes[], 4813 unsigned int PTERequestSizeY[], 4814 unsigned int PTERequestSizeC[], 4815 unsigned int PixelPTEReqWidthY[], 4816 unsigned int PixelPTEReqHeightY[], 4817 unsigned int PixelPTEReqWidthC[], 4818 unsigned int PixelPTEReqHeightC[], 4819 unsigned int dpte_row_width_luma_ub[], 4820 unsigned int dpte_row_width_chroma_ub[], 4821 4822 /* Output */ 4823 double DST_Y_PER_PTE_ROW_NOM_L[], 4824 double DST_Y_PER_PTE_ROW_NOM_C[], 4825 double DST_Y_PER_META_ROW_NOM_L[], 4826 double DST_Y_PER_META_ROW_NOM_C[], 4827 double TimePerMetaChunkNominal[], 4828 double TimePerChromaMetaChunkNominal[], 4829 double TimePerMetaChunkVBlank[], 4830 double TimePerChromaMetaChunkVBlank[], 4831 double TimePerMetaChunkFlip[], 4832 double TimePerChromaMetaChunkFlip[], 4833 double time_per_pte_group_nom_luma[], 4834 double time_per_pte_group_vblank_luma[], 4835 double time_per_pte_group_flip_luma[], 4836 double time_per_pte_group_nom_chroma[], 4837 double time_per_pte_group_vblank_chroma[], 4838 double time_per_pte_group_flip_chroma[]) 4839 { 4840 unsigned int meta_chunk_width; 4841 unsigned int min_meta_chunk_width; 4842 unsigned int meta_chunk_per_row_int; 4843 unsigned int meta_row_remainder; 4844 unsigned int meta_chunk_threshold; 4845 unsigned int meta_chunks_per_row_ub; 4846 unsigned int meta_chunk_width_chroma; 4847 unsigned int min_meta_chunk_width_chroma; 4848 unsigned int meta_chunk_per_row_int_chroma; 4849 unsigned int meta_row_remainder_chroma; 4850 unsigned int meta_chunk_threshold_chroma; 4851 unsigned int meta_chunks_per_row_ub_chroma; 4852 unsigned int dpte_group_width_luma; 4853 unsigned int dpte_groups_per_row_luma_ub; 4854 unsigned int dpte_group_width_chroma; 4855 unsigned int dpte_groups_per_row_chroma_ub; 4856 unsigned int k; 4857 4858 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4859 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 4860 if (BytePerPixelC[k] == 0) 4861 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 4862 else 4863 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 4864 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 4865 if (BytePerPixelC[k] == 0) 4866 DST_Y_PER_META_ROW_NOM_C[k] = 0; 4867 else 4868 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 4869 } 4870 4871 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4872 if (DCCEnable[k] == true) { 4873 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 4874 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 4875 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 4876 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 4877 if (!IsVertical(SourceRotation[k])) 4878 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 4879 else 4880 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 4881 4882 if (meta_row_remainder <= meta_chunk_threshold) 4883 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 4884 else 4885 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 4886 4887 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * 4888 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4889 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4890 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4891 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4892 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 4893 if (BytePerPixelC[k] == 0) { 4894 TimePerChromaMetaChunkNominal[k] = 0; 4895 TimePerChromaMetaChunkVBlank[k] = 0; 4896 TimePerChromaMetaChunkFlip[k] = 0; 4897 } else { 4898 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / 4899 meta_row_height_chroma[k]; 4900 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / 4901 meta_row_height_chroma[k]; 4902 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / 4903 meta_chunk_width_chroma; 4904 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 4905 if (!IsVertical(SourceRotation[k])) { 4906 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4907 meta_req_width_chroma[k]; 4908 } else { 4909 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - 4910 meta_req_height_chroma[k]; 4911 } 4912 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) 4913 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 4914 else 4915 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 4916 4917 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * 4918 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4919 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * 4920 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4921 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4922 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 4923 } 4924 } else { 4925 TimePerMetaChunkNominal[k] = 0; 4926 TimePerMetaChunkVBlank[k] = 0; 4927 TimePerMetaChunkFlip[k] = 0; 4928 TimePerChromaMetaChunkNominal[k] = 0; 4929 TimePerChromaMetaChunkVBlank[k] = 0; 4930 TimePerChromaMetaChunkFlip[k] = 0; 4931 } 4932 } 4933 4934 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 4935 if (GPUVMEnable == true) { 4936 if (!IsVertical(SourceRotation[k])) { 4937 dpte_group_width_luma = (double) dpte_group_bytes[k] / 4938 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 4939 } else { 4940 dpte_group_width_luma = (double) dpte_group_bytes[k] / 4941 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 4942 } 4943 4944 if (use_one_row_for_frame[k]) { 4945 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 4946 (double) dpte_group_width_luma / 2.0, 1.0); 4947 } else { 4948 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] / 4949 (double) dpte_group_width_luma, 1.0); 4950 } 4951 #ifdef __DML_VBA_DEBUG__ 4952 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n", 4953 __func__, k, use_one_row_for_frame[k]); 4954 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n", 4955 __func__, k, dpte_group_bytes[k]); 4956 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n", 4957 __func__, k, PTERequestSizeY[k]); 4958 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n", 4959 __func__, k, PixelPTEReqWidthY[k]); 4960 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n", 4961 __func__, k, PixelPTEReqHeightY[k]); 4962 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n", 4963 __func__, k, dpte_row_width_luma_ub[k]); 4964 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n", 4965 __func__, k, dpte_group_width_luma); 4966 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n", 4967 __func__, k, dpte_groups_per_row_luma_ub); 4968 #endif 4969 4970 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * 4971 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 4972 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * 4973 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 4974 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 4975 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 4976 if (BytePerPixelC[k] == 0) { 4977 time_per_pte_group_nom_chroma[k] = 0; 4978 time_per_pte_group_vblank_chroma[k] = 0; 4979 time_per_pte_group_flip_chroma[k] = 0; 4980 } else { 4981 if (!IsVertical(SourceRotation[k])) { 4982 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 4983 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 4984 } else { 4985 dpte_group_width_chroma = (double) dpte_group_bytes[k] / 4986 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 4987 } 4988 4989 if (use_one_row_for_frame[k]) { 4990 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 4991 (double) dpte_group_width_chroma / 2.0, 1.0); 4992 } else { 4993 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] / 4994 (double) dpte_group_width_chroma, 1.0); 4995 } 4996 #ifdef __DML_VBA_DEBUG__ 4997 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n", 4998 __func__, k, dpte_row_width_chroma_ub[k]); 4999 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n", 5000 __func__, k, dpte_group_width_chroma); 5001 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n", 5002 __func__, k, dpte_groups_per_row_chroma_ub); 5003 #endif 5004 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * 5005 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5006 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * 5007 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5008 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * 5009 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 5010 } 5011 } else { 5012 time_per_pte_group_nom_luma[k] = 0; 5013 time_per_pte_group_vblank_luma[k] = 0; 5014 time_per_pte_group_flip_luma[k] = 0; 5015 time_per_pte_group_nom_chroma[k] = 0; 5016 time_per_pte_group_vblank_chroma[k] = 0; 5017 time_per_pte_group_flip_chroma[k] = 0; 5018 } 5019 #ifdef __DML_VBA_DEBUG__ 5020 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n", 5021 __func__, k, DestinationLinesToRequestRowInVBlank[k]); 5022 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n", 5023 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]); 5024 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n", 5025 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]); 5026 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n", 5027 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]); 5028 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n", 5029 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]); 5030 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n", 5031 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]); 5032 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n", 5033 __func__, k, TimePerMetaChunkNominal[k]); 5034 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n", 5035 __func__, k, TimePerMetaChunkVBlank[k]); 5036 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n", 5037 __func__, k, TimePerMetaChunkFlip[k]); 5038 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n", 5039 __func__, k, TimePerChromaMetaChunkNominal[k]); 5040 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n", 5041 __func__, k, TimePerChromaMetaChunkVBlank[k]); 5042 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n", 5043 __func__, k, TimePerChromaMetaChunkFlip[k]); 5044 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n", 5045 __func__, k, time_per_pte_group_nom_luma[k]); 5046 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n", 5047 __func__, k, time_per_pte_group_vblank_luma[k]); 5048 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n", 5049 __func__, k, time_per_pte_group_flip_luma[k]); 5050 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n", 5051 __func__, k, time_per_pte_group_nom_chroma[k]); 5052 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n", 5053 __func__, k, time_per_pte_group_vblank_chroma[k]); 5054 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n", 5055 __func__, k, time_per_pte_group_flip_chroma[k]); 5056 #endif 5057 } 5058 } // CalculateMetaAndPTETimes 5059 5060 void dml32_CalculateVMGroupAndRequestTimes( 5061 unsigned int NumberOfActiveSurfaces, 5062 bool GPUVMEnable, 5063 unsigned int GPUVMMaxPageTableLevels, 5064 unsigned int HTotal[], 5065 unsigned int BytePerPixelC[], 5066 double DestinationLinesToRequestVMInVBlank[], 5067 double DestinationLinesToRequestVMInImmediateFlip[], 5068 bool DCCEnable[], 5069 double PixelClock[], 5070 unsigned int dpte_row_width_luma_ub[], 5071 unsigned int dpte_row_width_chroma_ub[], 5072 unsigned int vm_group_bytes[], 5073 unsigned int dpde0_bytes_per_frame_ub_l[], 5074 unsigned int dpde0_bytes_per_frame_ub_c[], 5075 unsigned int meta_pte_bytes_per_frame_ub_l[], 5076 unsigned int meta_pte_bytes_per_frame_ub_c[], 5077 5078 /* Output */ 5079 double TimePerVMGroupVBlank[], 5080 double TimePerVMGroupFlip[], 5081 double TimePerVMRequestVBlank[], 5082 double TimePerVMRequestFlip[]) 5083 { 5084 unsigned int k; 5085 unsigned int num_group_per_lower_vm_stage; 5086 unsigned int num_req_per_lower_vm_stage; 5087 5088 #ifdef __DML_VBA_DEBUG__ 5089 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces); 5090 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable); 5091 #endif 5092 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5093 5094 #ifdef __DML_VBA_DEBUG__ 5095 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]); 5096 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]); 5097 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n", 5098 __func__, k, dpde0_bytes_per_frame_ub_l[k]); 5099 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n", 5100 __func__, k, dpde0_bytes_per_frame_ub_c[k]); 5101 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n", 5102 __func__, k, meta_pte_bytes_per_frame_ub_l[k]); 5103 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n", 5104 __func__, k, meta_pte_bytes_per_frame_ub_c[k]); 5105 #endif 5106 5107 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 5108 if (DCCEnable[k] == false) { 5109 if (BytePerPixelC[k] > 0) { 5110 num_group_per_lower_vm_stage = dml_ceil( 5111 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5112 (double) (vm_group_bytes[k]), 1.0) + 5113 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5114 (double) (vm_group_bytes[k]), 1.0); 5115 } else { 5116 num_group_per_lower_vm_stage = dml_ceil( 5117 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5118 (double) (vm_group_bytes[k]), 1.0); 5119 } 5120 } else { 5121 if (GPUVMMaxPageTableLevels == 1) { 5122 if (BytePerPixelC[k] > 0) { 5123 num_group_per_lower_vm_stage = dml_ceil( 5124 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5125 (double) (vm_group_bytes[k]), 1.0) + 5126 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5127 (double) (vm_group_bytes[k]), 1.0); 5128 } else { 5129 num_group_per_lower_vm_stage = dml_ceil( 5130 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5131 (double) (vm_group_bytes[k]), 1.0); 5132 } 5133 } else { 5134 if (BytePerPixelC[k] > 0) { 5135 num_group_per_lower_vm_stage = 2 + dml_ceil( 5136 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5137 (double) (vm_group_bytes[k]), 1) + 5138 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / 5139 (double) (vm_group_bytes[k]), 1) + 5140 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / 5141 (double) (vm_group_bytes[k]), 1) + 5142 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / 5143 (double) (vm_group_bytes[k]), 1); 5144 } else { 5145 num_group_per_lower_vm_stage = 1 + dml_ceil( 5146 (double) (dpde0_bytes_per_frame_ub_l[k]) / 5147 (double) (vm_group_bytes[k]), 1) + dml_ceil( 5148 (double) (meta_pte_bytes_per_frame_ub_l[k]) / 5149 (double) (vm_group_bytes[k]), 1); 5150 } 5151 } 5152 } 5153 5154 if (DCCEnable[k] == false) { 5155 if (BytePerPixelC[k] > 0) { 5156 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + 5157 dpde0_bytes_per_frame_ub_c[k] / 64; 5158 } else { 5159 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 5160 } 5161 } else { 5162 if (GPUVMMaxPageTableLevels == 1) { 5163 if (BytePerPixelC[k] > 0) { 5164 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + 5165 meta_pte_bytes_per_frame_ub_c[k] / 64; 5166 } else { 5167 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 5168 } 5169 } else { 5170 if (BytePerPixelC[k] > 0) { 5171 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5172 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + 5173 meta_pte_bytes_per_frame_ub_l[k] / 64 + 5174 meta_pte_bytes_per_frame_ub_c[k] / 64; 5175 } else { 5176 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 5177 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 5178 } 5179 } 5180 } 5181 5182 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5183 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5184 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5185 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 5186 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * 5187 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5188 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * 5189 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 5190 5191 if (GPUVMMaxPageTableLevels > 2) { 5192 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 5193 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 5194 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 5195 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 5196 } 5197 5198 } else { 5199 TimePerVMGroupVBlank[k] = 0; 5200 TimePerVMGroupFlip[k] = 0; 5201 TimePerVMRequestVBlank[k] = 0; 5202 TimePerVMRequestFlip[k] = 0; 5203 } 5204 5205 #ifdef __DML_VBA_DEBUG__ 5206 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]); 5207 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]); 5208 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]); 5209 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]); 5210 #endif 5211 } 5212 } // CalculateVMGroupAndRequestTimes 5213 5214 void dml32_CalculateDCCConfiguration( 5215 bool DCCEnabled, 5216 bool DCCProgrammingAssumesScanDirectionUnknown, 5217 enum source_format_class SourcePixelFormat, 5218 unsigned int SurfaceWidthLuma, 5219 unsigned int SurfaceWidthChroma, 5220 unsigned int SurfaceHeightLuma, 5221 unsigned int SurfaceHeightChroma, 5222 unsigned int nomDETInKByte, 5223 unsigned int RequestHeight256ByteLuma, 5224 unsigned int RequestHeight256ByteChroma, 5225 enum dm_swizzle_mode TilingFormat, 5226 unsigned int BytePerPixelY, 5227 unsigned int BytePerPixelC, 5228 double BytePerPixelDETY, 5229 double BytePerPixelDETC, 5230 enum dm_rotation_angle SourceRotation, 5231 /* Output */ 5232 unsigned int *MaxUncompressedBlockLuma, 5233 unsigned int *MaxUncompressedBlockChroma, 5234 unsigned int *MaxCompressedBlockLuma, 5235 unsigned int *MaxCompressedBlockChroma, 5236 unsigned int *IndependentBlockLuma, 5237 unsigned int *IndependentBlockChroma) 5238 { 5239 typedef enum { 5240 REQ_256Bytes, 5241 REQ_128BytesNonContiguous, 5242 REQ_128BytesContiguous, 5243 REQ_NA 5244 } RequestType; 5245 5246 RequestType RequestLuma; 5247 RequestType RequestChroma; 5248 5249 unsigned int segment_order_horz_contiguous_luma; 5250 unsigned int segment_order_horz_contiguous_chroma; 5251 unsigned int segment_order_vert_contiguous_luma; 5252 unsigned int segment_order_vert_contiguous_chroma; 5253 unsigned int req128_horz_wc_l; 5254 unsigned int req128_horz_wc_c; 5255 unsigned int req128_vert_wc_l; 5256 unsigned int req128_vert_wc_c; 5257 unsigned int MAS_vp_horz_limit; 5258 unsigned int MAS_vp_vert_limit; 5259 unsigned int max_vp_horz_width; 5260 unsigned int max_vp_vert_height; 5261 unsigned int eff_surf_width_l; 5262 unsigned int eff_surf_width_c; 5263 unsigned int eff_surf_height_l; 5264 unsigned int eff_surf_height_c; 5265 unsigned int full_swath_bytes_horz_wc_l; 5266 unsigned int full_swath_bytes_horz_wc_c; 5267 unsigned int full_swath_bytes_vert_wc_l; 5268 unsigned int full_swath_bytes_vert_wc_c; 5269 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024; 5270 5271 unsigned int yuv420; 5272 unsigned int horz_div_l; 5273 unsigned int horz_div_c; 5274 unsigned int vert_div_l; 5275 unsigned int vert_div_c; 5276 5277 unsigned int swath_buf_size; 5278 double detile_buf_vp_horz_limit; 5279 double detile_buf_vp_vert_limit; 5280 5281 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || 5282 SourcePixelFormat == dm_420_12) ? 1 : 0); 5283 horz_div_l = 1; 5284 horz_div_c = 1; 5285 vert_div_l = 1; 5286 vert_div_c = 1; 5287 5288 if (BytePerPixelY == 1) 5289 vert_div_l = 0; 5290 if (BytePerPixelC == 1) 5291 vert_div_c = 0; 5292 5293 if (BytePerPixelC == 0) { 5294 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256; 5295 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5296 BytePerPixelY / (1 + horz_div_l)); 5297 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5298 (1 + vert_div_l)); 5299 } else { 5300 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256; 5301 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * 5302 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * 5303 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 5304 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / 5305 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / 5306 (1 + vert_div_c) / (1 + yuv420)); 5307 } 5308 5309 if (SourcePixelFormat == dm_420_10) { 5310 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 5311 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 5312 } 5313 5314 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 5315 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 5316 5317 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144; 5318 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144); 5319 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 5320 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 5321 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 5322 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 5323 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 5324 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 5325 5326 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 5327 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 5328 if (BytePerPixelC > 0) { 5329 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 5330 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 5331 } else { 5332 full_swath_bytes_horz_wc_c = 0; 5333 full_swath_bytes_vert_wc_c = 0; 5334 } 5335 5336 if (SourcePixelFormat == dm_420_10) { 5337 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0); 5338 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0); 5339 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0); 5340 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0); 5341 } 5342 5343 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5344 req128_horz_wc_l = 0; 5345 req128_horz_wc_c = 0; 5346 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + 5347 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5348 req128_horz_wc_l = 0; 5349 req128_horz_wc_c = 1; 5350 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * 5351 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) { 5352 req128_horz_wc_l = 1; 5353 req128_horz_wc_c = 0; 5354 } else { 5355 req128_horz_wc_l = 1; 5356 req128_horz_wc_c = 1; 5357 } 5358 5359 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5360 req128_vert_wc_l = 0; 5361 req128_vert_wc_c = 0; 5362 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * 5363 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5364 req128_vert_wc_l = 0; 5365 req128_vert_wc_c = 1; 5366 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && 5367 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) { 5368 req128_vert_wc_l = 1; 5369 req128_vert_wc_c = 0; 5370 } else { 5371 req128_vert_wc_l = 1; 5372 req128_vert_wc_c = 1; 5373 } 5374 5375 if (BytePerPixelY == 2) { 5376 segment_order_horz_contiguous_luma = 0; 5377 segment_order_vert_contiguous_luma = 1; 5378 } else { 5379 segment_order_horz_contiguous_luma = 1; 5380 segment_order_vert_contiguous_luma = 0; 5381 } 5382 5383 if (BytePerPixelC == 2) { 5384 segment_order_horz_contiguous_chroma = 0; 5385 segment_order_vert_contiguous_chroma = 1; 5386 } else { 5387 segment_order_horz_contiguous_chroma = 1; 5388 segment_order_vert_contiguous_chroma = 0; 5389 } 5390 #ifdef __DML_VBA_DEBUG__ 5391 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled); 5392 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte); 5393 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC); 5394 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l); 5395 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c); 5396 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l); 5397 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c); 5398 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma); 5399 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n", 5400 __func__, segment_order_horz_contiguous_chroma); 5401 #endif 5402 5403 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 5404 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) 5405 RequestLuma = REQ_256Bytes; 5406 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || 5407 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) 5408 RequestLuma = REQ_128BytesNonContiguous; 5409 else 5410 RequestLuma = REQ_128BytesContiguous; 5411 5412 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) 5413 RequestChroma = REQ_256Bytes; 5414 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || 5415 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) 5416 RequestChroma = REQ_128BytesNonContiguous; 5417 else 5418 RequestChroma = REQ_128BytesContiguous; 5419 5420 } else if (!IsVertical(SourceRotation)) { 5421 if (req128_horz_wc_l == 0) 5422 RequestLuma = REQ_256Bytes; 5423 else if (segment_order_horz_contiguous_luma == 0) 5424 RequestLuma = REQ_128BytesNonContiguous; 5425 else 5426 RequestLuma = REQ_128BytesContiguous; 5427 5428 if (req128_horz_wc_c == 0) 5429 RequestChroma = REQ_256Bytes; 5430 else if (segment_order_horz_contiguous_chroma == 0) 5431 RequestChroma = REQ_128BytesNonContiguous; 5432 else 5433 RequestChroma = REQ_128BytesContiguous; 5434 5435 } else { 5436 if (req128_vert_wc_l == 0) 5437 RequestLuma = REQ_256Bytes; 5438 else if (segment_order_vert_contiguous_luma == 0) 5439 RequestLuma = REQ_128BytesNonContiguous; 5440 else 5441 RequestLuma = REQ_128BytesContiguous; 5442 5443 if (req128_vert_wc_c == 0) 5444 RequestChroma = REQ_256Bytes; 5445 else if (segment_order_vert_contiguous_chroma == 0) 5446 RequestChroma = REQ_128BytesNonContiguous; 5447 else 5448 RequestChroma = REQ_128BytesContiguous; 5449 } 5450 5451 if (RequestLuma == REQ_256Bytes) { 5452 *MaxUncompressedBlockLuma = 256; 5453 *MaxCompressedBlockLuma = 256; 5454 *IndependentBlockLuma = 0; 5455 } else if (RequestLuma == REQ_128BytesContiguous) { 5456 *MaxUncompressedBlockLuma = 256; 5457 *MaxCompressedBlockLuma = 128; 5458 *IndependentBlockLuma = 128; 5459 } else { 5460 *MaxUncompressedBlockLuma = 256; 5461 *MaxCompressedBlockLuma = 64; 5462 *IndependentBlockLuma = 64; 5463 } 5464 5465 if (RequestChroma == REQ_256Bytes) { 5466 *MaxUncompressedBlockChroma = 256; 5467 *MaxCompressedBlockChroma = 256; 5468 *IndependentBlockChroma = 0; 5469 } else if (RequestChroma == REQ_128BytesContiguous) { 5470 *MaxUncompressedBlockChroma = 256; 5471 *MaxCompressedBlockChroma = 128; 5472 *IndependentBlockChroma = 128; 5473 } else { 5474 *MaxUncompressedBlockChroma = 256; 5475 *MaxCompressedBlockChroma = 64; 5476 *IndependentBlockChroma = 64; 5477 } 5478 5479 if (DCCEnabled != true || BytePerPixelC == 0) { 5480 *MaxUncompressedBlockChroma = 0; 5481 *MaxCompressedBlockChroma = 0; 5482 *IndependentBlockChroma = 0; 5483 } 5484 5485 if (DCCEnabled != true) { 5486 *MaxUncompressedBlockLuma = 0; 5487 *MaxCompressedBlockLuma = 0; 5488 *IndependentBlockLuma = 0; 5489 } 5490 5491 #ifdef __DML_VBA_DEBUG__ 5492 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma); 5493 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma); 5494 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma); 5495 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma); 5496 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma); 5497 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma); 5498 #endif 5499 5500 } // CalculateDCCConfiguration 5501 5502 void dml32_CalculateStutterEfficiency( 5503 unsigned int CompressedBufferSizeInkByte, 5504 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[], 5505 bool UnboundedRequestEnabled, 5506 unsigned int MetaFIFOSizeInKEntries, 5507 unsigned int ZeroSizeBufferEntries, 5508 unsigned int PixelChunkSizeInKByte, 5509 unsigned int NumberOfActiveSurfaces, 5510 unsigned int ROBBufferSizeInKByte, 5511 double TotalDataReadBandwidth, 5512 double DCFCLK, 5513 double ReturnBW, 5514 unsigned int CompbufReservedSpace64B, 5515 unsigned int CompbufReservedSpaceZs, 5516 double SRExitTime, 5517 double SRExitZ8Time, 5518 bool SynchronizeTimingsFinal, 5519 unsigned int BlendingAndTiming[], 5520 double StutterEnterPlusExitWatermark, 5521 double Z8StutterEnterPlusExitWatermark, 5522 bool ProgressiveToInterlaceUnitInOPP, 5523 bool Interlace[], 5524 double MinTTUVBlank[], 5525 unsigned int DPPPerSurface[], 5526 unsigned int DETBufferSizeY[], 5527 unsigned int BytePerPixelY[], 5528 double BytePerPixelDETY[], 5529 double SwathWidthY[], 5530 unsigned int SwathHeightY[], 5531 unsigned int SwathHeightC[], 5532 double NetDCCRateLuma[], 5533 double NetDCCRateChroma[], 5534 double DCCFractionOfZeroSizeRequestsLuma[], 5535 double DCCFractionOfZeroSizeRequestsChroma[], 5536 unsigned int HTotal[], 5537 unsigned int VTotal[], 5538 double PixelClock[], 5539 double VRatio[], 5540 enum dm_rotation_angle SourceRotation[], 5541 unsigned int BlockHeight256BytesY[], 5542 unsigned int BlockWidth256BytesY[], 5543 unsigned int BlockHeight256BytesC[], 5544 unsigned int BlockWidth256BytesC[], 5545 unsigned int DCCYMaxUncompressedBlock[], 5546 unsigned int DCCCMaxUncompressedBlock[], 5547 unsigned int VActive[], 5548 bool DCCEnable[], 5549 bool WritebackEnable[], 5550 double ReadBandwidthSurfaceLuma[], 5551 double ReadBandwidthSurfaceChroma[], 5552 double meta_row_bw[], 5553 double dpte_row_bw[], 5554 5555 /* Output */ 5556 double *StutterEfficiencyNotIncludingVBlank, 5557 double *StutterEfficiency, 5558 unsigned int *NumberOfStutterBurstsPerFrame, 5559 double *Z8StutterEfficiencyNotIncludingVBlank, 5560 double *Z8StutterEfficiency, 5561 unsigned int *Z8NumberOfStutterBurstsPerFrame, 5562 double *StutterPeriod, 5563 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE) 5564 { 5565 5566 bool FoundCriticalSurface = false; 5567 unsigned int SwathSizeCriticalSurface = 0; 5568 unsigned int LastChunkOfSwathSize; 5569 unsigned int MissingPartOfLastSwathOfDETSize; 5570 double LastZ8StutterPeriod = 0.0; 5571 double LastStutterPeriod = 0.0; 5572 unsigned int TotalNumberOfActiveOTG = 0; 5573 double doublePixelClock; 5574 unsigned int doubleHTotal; 5575 unsigned int doubleVTotal; 5576 bool SameTiming = true; 5577 double DETBufferingTimeY; 5578 double SwathWidthYCriticalSurface = 0.0; 5579 double SwathHeightYCriticalSurface = 0.0; 5580 double VActiveTimeCriticalSurface = 0.0; 5581 double FrameTimeCriticalSurface = 0.0; 5582 unsigned int BytePerPixelYCriticalSurface = 0; 5583 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0; 5584 unsigned int DETBufferSizeYCriticalSurface = 0; 5585 double MinTTUVBlankCriticalSurface = 0.0; 5586 unsigned int BlockWidth256BytesYCriticalSurface = 0; 5587 bool doublePlaneCriticalSurface = 0; 5588 bool doublePipeCriticalSurface = 0; 5589 double TotalCompressedReadBandwidth; 5590 double TotalRowReadBandwidth; 5591 double AverageDCCCompressionRate; 5592 double EffectiveCompressedBufferSize; 5593 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 5594 double StutterBurstTime; 5595 unsigned int TotalActiveWriteback; 5596 double LinesInDETY; 5597 double LinesInDETYRoundedDownToSwath; 5598 double MaximumEffectiveCompressionLuma; 5599 double MaximumEffectiveCompressionChroma; 5600 double TotalZeroSizeRequestReadBandwidth; 5601 double TotalZeroSizeCompressedReadBandwidth; 5602 double AverageDCCZeroSizeFraction; 5603 double AverageZeroSizeCompressionRate; 5604 unsigned int k; 5605 5606 TotalZeroSizeRequestReadBandwidth = 0; 5607 TotalZeroSizeCompressedReadBandwidth = 0; 5608 TotalRowReadBandwidth = 0; 5609 TotalCompressedReadBandwidth = 0; 5610 5611 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5612 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5613 if (DCCEnable[k] == true) { 5614 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k]) 5615 || (!IsVertical(SourceRotation[k]) 5616 && BlockHeight256BytesY[k] > SwathHeightY[k]) 5617 || DCCYMaxUncompressedBlock[k] < 256) { 5618 MaximumEffectiveCompressionLuma = 2; 5619 } else { 5620 MaximumEffectiveCompressionLuma = 4; 5621 } 5622 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5623 + ReadBandwidthSurfaceLuma[k] 5624 / dml_min(NetDCCRateLuma[k], 5625 MaximumEffectiveCompressionLuma); 5626 #ifdef __DML_VBA_DEBUG__ 5627 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5628 __func__, k, ReadBandwidthSurfaceLuma[k]); 5629 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n", 5630 __func__, k, NetDCCRateLuma[k]); 5631 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n", 5632 __func__, k, MaximumEffectiveCompressionLuma); 5633 #endif 5634 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5635 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 5636 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5637 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] 5638 / MaximumEffectiveCompressionLuma; 5639 5640 if (ReadBandwidthSurfaceChroma[k] > 0) { 5641 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k]) 5642 || (!IsVertical(SourceRotation[k]) 5643 && BlockHeight256BytesC[k] > SwathHeightC[k]) 5644 || DCCCMaxUncompressedBlock[k] < 256) { 5645 MaximumEffectiveCompressionChroma = 2; 5646 } else { 5647 MaximumEffectiveCompressionChroma = 4; 5648 } 5649 TotalCompressedReadBandwidth = 5650 TotalCompressedReadBandwidth 5651 + ReadBandwidthSurfaceChroma[k] 5652 / dml_min(NetDCCRateChroma[k], 5653 MaximumEffectiveCompressionChroma); 5654 #ifdef __DML_VBA_DEBUG__ 5655 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n", 5656 __func__, k, ReadBandwidthSurfaceChroma[k]); 5657 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n", 5658 __func__, k, NetDCCRateChroma[k]); 5659 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n", 5660 __func__, k, MaximumEffectiveCompressionChroma); 5661 #endif 5662 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth 5663 + ReadBandwidthSurfaceChroma[k] 5664 * DCCFractionOfZeroSizeRequestsChroma[k]; 5665 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 5666 + ReadBandwidthSurfaceChroma[k] 5667 * DCCFractionOfZeroSizeRequestsChroma[k] 5668 / MaximumEffectiveCompressionChroma; 5669 } 5670 } else { 5671 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 5672 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k]; 5673 } 5674 TotalRowReadBandwidth = TotalRowReadBandwidth 5675 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]); 5676 } 5677 } 5678 5679 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 5680 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 5681 5682 #ifdef __DML_VBA_DEBUG__ 5683 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled); 5684 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 5685 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 5686 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", 5687 __func__, TotalZeroSizeCompressedReadBandwidth); 5688 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 5689 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 5690 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5691 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 5692 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B); 5693 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs); 5694 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 5695 #endif 5696 if (AverageDCCZeroSizeFraction == 1) { 5697 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5698 / TotalZeroSizeCompressedReadBandwidth; 5699 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64 5700 * AverageZeroSizeCompressionRate 5701 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5702 * AverageZeroSizeCompressionRate; 5703 } else if (AverageDCCZeroSizeFraction > 0) { 5704 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth 5705 / TotalZeroSizeCompressedReadBandwidth; 5706 EffectiveCompressedBufferSize = dml_min( 5707 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5708 (double) MetaFIFOSizeInKEntries * 1024 * 64 5709 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate 5710 + 1 / AverageDCCCompressionRate)) 5711 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5712 * AverageDCCCompressionRate, 5713 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 5714 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5715 5716 #ifdef __DML_VBA_DEBUG__ 5717 dml_print("DML::%s: min 1 = %f\n", __func__, 5718 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5719 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / 5720 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / 5721 AverageDCCCompressionRate)); 5722 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 - 5723 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate); 5724 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 / 5725 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 5726 #endif 5727 } else { 5728 EffectiveCompressedBufferSize = dml_min( 5729 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 5730 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) 5731 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64) 5732 * AverageDCCCompressionRate; 5733 5734 #ifdef __DML_VBA_DEBUG__ 5735 dml_print("DML::%s: min 1 = %f\n", __func__, 5736 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 5737 dml_print("DML::%s: min 2 = %f\n", __func__, 5738 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 5739 #endif 5740 } 5741 5742 #ifdef __DML_VBA_DEBUG__ 5743 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 5744 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 5745 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5746 #endif 5747 5748 *StutterPeriod = 0; 5749 5750 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5751 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5752 LinesInDETY = ((double) DETBufferSizeY[k] 5753 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) 5754 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth) 5755 / BytePerPixelDETY[k] / SwathWidthY[k]; 5756 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 5757 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k]) 5758 / VRatio[k]; 5759 #ifdef __DML_VBA_DEBUG__ 5760 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]); 5761 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 5762 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]); 5763 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n", 5764 __func__, k, ReadBandwidthSurfaceLuma[k]); 5765 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 5766 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY); 5767 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n", 5768 __func__, k, LinesInDETYRoundedDownToSwath); 5769 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]); 5770 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5771 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]); 5772 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 5773 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]); 5774 #endif 5775 5776 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) { 5777 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 5778 5779 FoundCriticalSurface = true; 5780 *StutterPeriod = DETBufferingTimeY; 5781 FrameTimeCriticalSurface = ( 5782 isInterlaceTiming ? 5783 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k]) 5784 * (double) HTotal[k] / PixelClock[k]; 5785 VActiveTimeCriticalSurface = ( 5786 isInterlaceTiming ? 5787 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k]) 5788 * (double) HTotal[k] / PixelClock[k]; 5789 BytePerPixelYCriticalSurface = BytePerPixelY[k]; 5790 SwathWidthYCriticalSurface = SwathWidthY[k]; 5791 SwathHeightYCriticalSurface = SwathHeightY[k]; 5792 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k]; 5793 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k] 5794 - (LinesInDETY - LinesInDETYRoundedDownToSwath); 5795 DETBufferSizeYCriticalSurface = DETBufferSizeY[k]; 5796 MinTTUVBlankCriticalSurface = MinTTUVBlank[k]; 5797 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0); 5798 doublePipeCriticalSurface = (DPPPerSurface[k] == 1); 5799 5800 #ifdef __DML_VBA_DEBUG__ 5801 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n", 5802 __func__, k, FoundCriticalSurface); 5803 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n", 5804 __func__, k, *StutterPeriod); 5805 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n", 5806 __func__, k, MinTTUVBlankCriticalSurface); 5807 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n", 5808 __func__, k, FrameTimeCriticalSurface); 5809 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n", 5810 __func__, k, VActiveTimeCriticalSurface); 5811 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n", 5812 __func__, k, BytePerPixelYCriticalSurface); 5813 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n", 5814 __func__, k, SwathWidthYCriticalSurface); 5815 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n", 5816 __func__, k, SwathHeightYCriticalSurface); 5817 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n", 5818 __func__, k, BlockWidth256BytesYCriticalSurface); 5819 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n", 5820 __func__, k, doublePlaneCriticalSurface); 5821 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n", 5822 __func__, k, doublePipeCriticalSurface); 5823 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n", 5824 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface); 5825 #endif 5826 } 5827 } 5828 } 5829 5830 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, 5831 EffectiveCompressedBufferSize); 5832 #ifdef __DML_VBA_DEBUG__ 5833 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 5834 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 5835 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5836 __func__, *StutterPeriod * TotalDataReadBandwidth); 5837 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 5838 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, 5839 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 5840 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 5841 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 5842 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 5843 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 5844 #endif 5845 5846 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate 5847 / ReturnBW 5848 + (*StutterPeriod * TotalDataReadBandwidth 5849 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 5850 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 5851 #ifdef __DML_VBA_DEBUG__ 5852 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 5853 AverageDCCCompressionRate / ReturnBW); 5854 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", 5855 __func__, (*StutterPeriod * TotalDataReadBandwidth)); 5856 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - 5857 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 5858 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 5859 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 5860 #endif 5861 StutterBurstTime = dml_max(StutterBurstTime, 5862 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface 5863 * SwathWidthYCriticalSurface / ReturnBW); 5864 5865 #ifdef __DML_VBA_DEBUG__ 5866 dml_print("DML::%s: Time to finish residue swath=%f\n", 5867 __func__, 5868 LinesToFinishSwathTransferStutterCriticalSurface * 5869 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW); 5870 #endif 5871 5872 TotalActiveWriteback = 0; 5873 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5874 if (WritebackEnable[k]) 5875 TotalActiveWriteback = TotalActiveWriteback + 1; 5876 } 5877 5878 if (TotalActiveWriteback == 0) { 5879 #ifdef __DML_VBA_DEBUG__ 5880 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 5881 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 5882 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 5883 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 5884 #endif 5885 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 5886 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 5887 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 5888 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 5889 *NumberOfStutterBurstsPerFrame = ( 5890 *StutterEfficiencyNotIncludingVBlank > 0 ? 5891 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5892 *Z8NumberOfStutterBurstsPerFrame = ( 5893 *Z8StutterEfficiencyNotIncludingVBlank > 0 ? 5894 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0); 5895 } else { 5896 *StutterEfficiencyNotIncludingVBlank = 0.; 5897 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 5898 *NumberOfStutterBurstsPerFrame = 0; 5899 *Z8NumberOfStutterBurstsPerFrame = 0; 5900 } 5901 #ifdef __DML_VBA_DEBUG__ 5902 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface); 5903 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 5904 __func__, *StutterEfficiencyNotIncludingVBlank); 5905 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", 5906 __func__, *Z8StutterEfficiencyNotIncludingVBlank); 5907 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 5908 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 5909 #endif 5910 5911 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 5912 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) { 5913 if (BlendingAndTiming[k] == k) { 5914 if (TotalNumberOfActiveOTG == 0) { 5915 doublePixelClock = PixelClock[k]; 5916 doubleHTotal = HTotal[k]; 5917 doubleVTotal = VTotal[k]; 5918 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k] 5919 || doubleVTotal != VTotal[k]) { 5920 SameTiming = false; 5921 } 5922 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 5923 } 5924 } 5925 } 5926 5927 if (*StutterEfficiencyNotIncludingVBlank > 0) { 5928 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 5929 5930 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming 5931 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) { 5932 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime 5933 + StutterBurstTime * VActiveTimeCriticalSurface 5934 / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 5935 } else { 5936 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 5937 } 5938 } else { 5939 *StutterEfficiency = 0; 5940 } 5941 5942 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 5943 LastZ8StutterPeriod = VActiveTimeCriticalSurface 5944 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 5945 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod + 5946 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) { 5947 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime 5948 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100; 5949 } else { 5950 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 5951 } 5952 } else { 5953 *Z8StutterEfficiency = 0.; 5954 } 5955 5956 #ifdef __DML_VBA_DEBUG__ 5957 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 5958 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 5959 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 5960 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 5961 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 5962 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 5963 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", 5964 __func__, *StutterEfficiencyNotIncludingVBlank); 5965 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 5966 #endif 5967 5968 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface 5969 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface); 5970 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024); 5971 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface) 5972 - DETBufferSizeYCriticalSurface; 5973 5974 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1) 5975 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0) 5976 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0) 5977 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize)); 5978 5979 #ifdef __DML_VBA_DEBUG__ 5980 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface); 5981 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize); 5982 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize); 5983 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE); 5984 #endif 5985 } // CalculateStutterEfficiency 5986 5987 void dml32_CalculateMaxDETAndMinCompressedBufferSize( 5988 unsigned int ConfigReturnBufferSizeInKByte, 5989 unsigned int ROBBufferSizeInKByte, 5990 unsigned int MaxNumDPP, 5991 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size 5992 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA 5993 5994 /* Output */ 5995 unsigned int *MaxTotalDETInKByte, 5996 unsigned int *nomDETInKByte, 5997 unsigned int *MinCompressedBufferSizeInKByte) 5998 { 5999 bool det_buff_size_override_en = nomDETInKByteOverrideEnable; 6000 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue; 6001 6002 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte + 6003 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64); 6004 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64); 6005 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte; 6006 6007 #ifdef __DML_VBA_DEBUG__ 6008 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte); 6009 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte); 6010 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP); 6011 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte); 6012 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte); 6013 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte); 6014 #endif 6015 6016 if (det_buff_size_override_en) { 6017 *nomDETInKByte = det_buff_size_override_val; 6018 #ifdef __DML_VBA_DEBUG__ 6019 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte); 6020 #endif 6021 } 6022 } // CalculateMaxDETAndMinCompressedBufferSize 6023 6024 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces, 6025 double ReturnBW, 6026 bool NotUrgentLatencyHiding[], 6027 double ReadBandwidthLuma[], 6028 double ReadBandwidthChroma[], 6029 double cursor_bw[], 6030 double meta_row_bandwidth[], 6031 double dpte_row_bandwidth[], 6032 unsigned int NumberOfDPP[], 6033 double UrgentBurstFactorLuma[], 6034 double UrgentBurstFactorChroma[], 6035 double UrgentBurstFactorCursor[]) 6036 { 6037 unsigned int k; 6038 bool NotEnoughUrgentLatencyHiding = false; 6039 bool CalculateVActiveBandwithSupport_val = false; 6040 double VActiveBandwith = 0; 6041 6042 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6043 if (NotUrgentLatencyHiding[k]) { 6044 NotEnoughUrgentLatencyHiding = true; 6045 } 6046 } 6047 6048 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6049 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k]; 6050 } 6051 6052 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6053 6054 #ifdef __DML_VBA_DEBUG__ 6055 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding); 6056 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith); 6057 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6058 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val); 6059 #endif 6060 return CalculateVActiveBandwithSupport_val; 6061 } 6062 6063 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces, 6064 double ReturnBW, 6065 bool NotUrgentLatencyHiding[], 6066 double ReadBandwidthLuma[], 6067 double ReadBandwidthChroma[], 6068 double PrefetchBandwidthLuma[], 6069 double PrefetchBandwidthChroma[], 6070 double cursor_bw[], 6071 double meta_row_bandwidth[], 6072 double dpte_row_bandwidth[], 6073 double cursor_bw_pre[], 6074 double prefetch_vmrow_bw[], 6075 unsigned int NumberOfDPP[], 6076 double UrgentBurstFactorLuma[], 6077 double UrgentBurstFactorChroma[], 6078 double UrgentBurstFactorCursor[], 6079 double UrgentBurstFactorLumaPre[], 6080 double UrgentBurstFactorChromaPre[], 6081 double UrgentBurstFactorCursorPre[], 6082 6083 /* output */ 6084 double *PrefetchBandwidth, 6085 double *FractionOfUrgentBandwidth, 6086 bool *PrefetchBandwidthSupport) 6087 { 6088 unsigned int k; 6089 bool NotEnoughUrgentLatencyHiding = false; 6090 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6091 if (NotUrgentLatencyHiding[k]) { 6092 NotEnoughUrgentLatencyHiding = true; 6093 } 6094 } 6095 6096 *PrefetchBandwidth = 0; 6097 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6098 *PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6099 ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]), 6100 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6101 } 6102 6103 *PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding; 6104 *FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW; 6105 } 6106 6107 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces, 6108 double ReturnBW, 6109 double ReadBandwidthLuma[], 6110 double ReadBandwidthChroma[], 6111 double PrefetchBandwidthLuma[], 6112 double PrefetchBandwidthChroma[], 6113 double cursor_bw[], 6114 double cursor_bw_pre[], 6115 unsigned int NumberOfDPP[], 6116 double UrgentBurstFactorLuma[], 6117 double UrgentBurstFactorChroma[], 6118 double UrgentBurstFactorCursor[], 6119 double UrgentBurstFactorLumaPre[], 6120 double UrgentBurstFactorChromaPre[], 6121 double UrgentBurstFactorCursorPre[]) 6122 { 6123 unsigned int k; 6124 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW; 6125 6126 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6127 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6128 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6129 } 6130 6131 return CalculateBandwidthAvailableForImmediateFlip_val; 6132 } 6133 6134 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces, 6135 double ReturnBW, 6136 enum immediate_flip_requirement ImmediateFlipRequirement[], 6137 double final_flip_bw[], 6138 double ReadBandwidthLuma[], 6139 double ReadBandwidthChroma[], 6140 double PrefetchBandwidthLuma[], 6141 double PrefetchBandwidthChroma[], 6142 double cursor_bw[], 6143 double meta_row_bandwidth[], 6144 double dpte_row_bandwidth[], 6145 double cursor_bw_pre[], 6146 double prefetch_vmrow_bw[], 6147 unsigned int NumberOfDPP[], 6148 double UrgentBurstFactorLuma[], 6149 double UrgentBurstFactorChroma[], 6150 double UrgentBurstFactorCursor[], 6151 double UrgentBurstFactorLumaPre[], 6152 double UrgentBurstFactorChromaPre[], 6153 double UrgentBurstFactorCursorPre[], 6154 6155 /* output */ 6156 double *TotalBandwidth, 6157 double *FractionOfUrgentBandwidth, 6158 bool *ImmediateFlipBandwidthSupport) 6159 { 6160 unsigned int k; 6161 *TotalBandwidth = 0; 6162 for (k = 0; k < NumberOfActiveSurfaces; ++k) { 6163 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) { 6164 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6165 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6166 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6167 } else { 6168 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k], 6169 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k], 6170 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]); 6171 } 6172 } 6173 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW); 6174 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW; 6175 } 6176