1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29 
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31 
32 unsigned int dml32_dscceComputeDelay(
33 		unsigned int bpc,
34 		double BPP,
35 		unsigned int sliceWidth,
36 		unsigned int numSlices,
37 		enum output_format_class pixelFormat,
38 		enum output_encoder_class Output)
39 {
40 	// valid bpc         = source bits per component in the set of {8, 10, 12}
41 	// valid bpp         = increments of 1/16 of a bit
42 	//                    min = 6/7/8 in N420/N422/444, respectively
43 	//                    max = such that compression is 1:1
44 	//valid sliceWidth  = number of pixels per slice line,
45 	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 
49 	// fixed value
50 	unsigned int rcModelSize = 8192;
51 
52 	// N422/N420 operate at 2 pixels per clock
53 	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 	Delay, pixels;
55 
56 	if (pixelFormat == dm_420)
57 		pixelsPerClock = 2;
58 	else if (pixelFormat == dm_n422)
59 		pixelsPerClock = 2;
60 	// #all other modes operate at 1 pixel per clock
61 	else
62 		pixelsPerClock = 1;
63 
64 	//initial transmit delay as per PPS
65 	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66 
67 	//compute ssm delay
68 	if (bpc == 8)
69 		D = 81;
70 	else if (bpc == 10)
71 		D = 89;
72 	else
73 		D = 113;
74 
75 	//divide by pixel per cycle to compute slice width as seen by DSC
76 	w = sliceWidth / pixelsPerClock;
77 
78 	//422 mode has an additional cycle of delay
79 	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 		s = 0;
81 	else
82 		s = 1;
83 
84 	//main calculation for the dscce
85 	ix = initalXmitDelay + 45;
86 	wx = (w + 2) / 3;
87 	p = 3 * wx - w;
88 	l0 = ix / w;
89 	a = ix + p * l0;
90 	ax = (a + 2) / 3 + D + 6 + 1;
91 	L = (ax + wx - 1) / wx;
92 	if ((ix % w) == 0 && p != 0)
93 		lstall = 1;
94 	else
95 		lstall = 0;
96 	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97 
98 	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 	pixels = Delay * 3 * pixelsPerClock;
100 
101 #ifdef __DML_VBA_DEBUG__
102 	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 	dml_print("DML::%s: Output: %d\n", __func__, Output);
108 	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110 
111 	return pixels;
112 }
113 
114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 	unsigned int Delay = 0;
117 
118 	if (pixelFormat == dm_420) {
119 		//   sfr
120 		Delay = Delay + 2;
121 		//   dsccif
122 		Delay = Delay + 0;
123 		//   dscc - input deserializer
124 		Delay = Delay + 3;
125 		//   dscc gets pixels every other cycle
126 		Delay = Delay + 2;
127 		//   dscc - input cdc fifo
128 		Delay = Delay + 12;
129 		//   dscc gets pixels every other cycle
130 		Delay = Delay + 13;
131 		//   dscc - cdc uncertainty
132 		Delay = Delay + 2;
133 		//   dscc - output cdc fifo
134 		Delay = Delay + 7;
135 		//   dscc gets pixels every other cycle
136 		Delay = Delay + 3;
137 		//   dscc - cdc uncertainty
138 		Delay = Delay + 2;
139 		//   dscc - output serializer
140 		Delay = Delay + 1;
141 		//   sft
142 		Delay = Delay + 1;
143 	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 		//   sfr
145 		Delay = Delay + 2;
146 		//   dsccif
147 		Delay = Delay + 1;
148 		//   dscc - input deserializer
149 		Delay = Delay + 5;
150 		//  dscc - input cdc fifo
151 		Delay = Delay + 25;
152 		//   dscc - cdc uncertainty
153 		Delay = Delay + 2;
154 		//   dscc - output cdc fifo
155 		Delay = Delay + 10;
156 		//   dscc - cdc uncertainty
157 		Delay = Delay + 2;
158 		//   dscc - output serializer
159 		Delay = Delay + 1;
160 		//   sft
161 		Delay = Delay + 1;
162 	} else {
163 		//   sfr
164 		Delay = Delay + 2;
165 		//   dsccif
166 		Delay = Delay + 0;
167 		//   dscc - input deserializer
168 		Delay = Delay + 3;
169 		//   dscc - input cdc fifo
170 		Delay = Delay + 12;
171 		//   dscc - cdc uncertainty
172 		Delay = Delay + 2;
173 		//   dscc - output cdc fifo
174 		Delay = Delay + 7;
175 		//   dscc - output serializer
176 		Delay = Delay + 1;
177 		//   dscc - cdc uncertainty
178 		Delay = Delay + 2;
179 		//   sft
180 		Delay = Delay + 1;
181 	}
182 
183 	return Delay;
184 }
185 
186 
187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 	bool is_vert = false;
190 
191 	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 		is_vert = true;
193 	else
194 		is_vert = false;
195 	return is_vert;
196 }
197 
198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 		double HRatio,
200 		double HRatioChroma,
201 		double VRatio,
202 		double VRatioChroma,
203 		double MaxDCHUBToPSCLThroughput,
204 		double MaxPSCLToLBThroughput,
205 		double PixelClock,
206 		enum source_format_class SourcePixelFormat,
207 		unsigned int HTaps,
208 		unsigned int HTapsChroma,
209 		unsigned int VTaps,
210 		unsigned int VTapsChroma,
211 
212 		/* output */
213 		double *PSCL_THROUGHPUT,
214 		double *PSCL_THROUGHPUT_CHROMA,
215 		double *DPPCLKUsingSingleDPP)
216 {
217 	double DPPCLKUsingSingleDPPLuma;
218 	double DPPCLKUsingSingleDPPChroma;
219 
220 	if (HRatio > 1) {
221 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 				dml_ceil((double) HTaps / 6.0, 1.0));
223 	} else {
224 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 	}
226 
227 	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 			*PSCL_THROUGHPUT, 1);
229 
230 	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232 
233 	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 			SourcePixelFormat != dm_rgbe_alpha)) {
235 		*PSCL_THROUGHPUT_CHROMA = 0;
236 		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 	} else {
238 		if (HRatioChroma > 1) {
239 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 		} else {
242 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 		}
244 		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 	}
250 }
251 
252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 		enum source_format_class SourcePixelFormat,
254 		enum dm_swizzle_mode SurfaceTiling,
255 
256 		/* Output */
257 		unsigned int *BytePerPixelY,
258 		unsigned int *BytePerPixelC,
259 		double  *BytePerPixelDETY,
260 		double  *BytePerPixelDETC,
261 		unsigned int *BlockHeight256BytesY,
262 		unsigned int *BlockHeight256BytesC,
263 		unsigned int *BlockWidth256BytesY,
264 		unsigned int *BlockWidth256BytesC,
265 		unsigned int *MacroTileHeightY,
266 		unsigned int *MacroTileHeightC,
267 		unsigned int *MacroTileWidthY,
268 		unsigned int *MacroTileWidthC)
269 {
270 	if (SourcePixelFormat == dm_444_64) {
271 		*BytePerPixelDETY = 8;
272 		*BytePerPixelDETC = 0;
273 		*BytePerPixelY = 8;
274 		*BytePerPixelC = 0;
275 	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 		*BytePerPixelDETY = 4;
277 		*BytePerPixelDETC = 0;
278 		*BytePerPixelY = 4;
279 		*BytePerPixelC = 0;
280 	} else if (SourcePixelFormat == dm_444_16) {
281 		*BytePerPixelDETY = 2;
282 		*BytePerPixelDETC = 0;
283 		*BytePerPixelY = 2;
284 		*BytePerPixelC = 0;
285 	} else if (SourcePixelFormat == dm_444_8) {
286 		*BytePerPixelDETY = 1;
287 		*BytePerPixelDETC = 0;
288 		*BytePerPixelY = 1;
289 		*BytePerPixelC = 0;
290 	} else if (SourcePixelFormat == dm_rgbe_alpha) {
291 		*BytePerPixelDETY = 4;
292 		*BytePerPixelDETC = 1;
293 		*BytePerPixelY = 4;
294 		*BytePerPixelC = 1;
295 	} else if (SourcePixelFormat == dm_420_8) {
296 		*BytePerPixelDETY = 1;
297 		*BytePerPixelDETC = 2;
298 		*BytePerPixelY = 1;
299 		*BytePerPixelC = 2;
300 	} else if (SourcePixelFormat == dm_420_12) {
301 		*BytePerPixelDETY = 2;
302 		*BytePerPixelDETC = 4;
303 		*BytePerPixelY = 2;
304 		*BytePerPixelC = 4;
305 	} else {
306 		*BytePerPixelDETY = 4.0 / 3;
307 		*BytePerPixelDETC = 8.0 / 3;
308 		*BytePerPixelY = 2;
309 		*BytePerPixelC = 4;
310 	}
311 #ifdef __DML_VBA_DEBUG__
312 	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
316 	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
317 #endif
318 	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 			|| SourcePixelFormat == dm_444_16
320 			|| SourcePixelFormat == dm_444_8
321 			|| SourcePixelFormat == dm_mono_16
322 			|| SourcePixelFormat == dm_mono_8
323 			|| SourcePixelFormat == dm_rgbe)) {
324 		if (SurfaceTiling == dm_sw_linear)
325 			*BlockHeight256BytesY = 1;
326 		else if (SourcePixelFormat == dm_444_64)
327 			*BlockHeight256BytesY = 4;
328 		else if (SourcePixelFormat == dm_444_8)
329 			*BlockHeight256BytesY = 16;
330 		else
331 			*BlockHeight256BytesY = 8;
332 
333 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 		*BlockHeight256BytesC = 0;
335 		*BlockWidth256BytesC = 0;
336 	} else {
337 		if (SurfaceTiling == dm_sw_linear) {
338 			*BlockHeight256BytesY = 1;
339 			*BlockHeight256BytesC = 1;
340 		} else if (SourcePixelFormat == dm_rgbe_alpha) {
341 			*BlockHeight256BytesY = 8;
342 			*BlockHeight256BytesC = 16;
343 		} else if (SourcePixelFormat == dm_420_8) {
344 			*BlockHeight256BytesY = 16;
345 			*BlockHeight256BytesC = 8;
346 		} else {
347 			*BlockHeight256BytesY = 8;
348 			*BlockHeight256BytesC = 8;
349 		}
350 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 	}
353 #ifdef __DML_VBA_DEBUG__
354 	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
355 	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
357 	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359 
360 	if (SurfaceTiling == dm_sw_linear) {
361 		*MacroTileHeightY = *BlockHeight256BytesY;
362 		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 		*MacroTileHeightC = *BlockHeight256BytesC;
364 		if (*MacroTileHeightC == 0)
365 			*MacroTileWidthC = 0;
366 		else
367 			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 		if (*MacroTileHeightC == 0)
374 			*MacroTileWidthC = 0;
375 		else
376 			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 	} else {
378 		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 		if (*MacroTileHeightC == 0)
382 			*MacroTileWidthC = 0;
383 		else
384 			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 	}
386 
387 #ifdef __DML_VBA_DEBUG__
388 	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
389 	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
391 	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394 
395 void dml32_CalculateSwathAndDETConfiguration(
396 		unsigned int DETSizeOverride[],
397 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 		unsigned int ConfigReturnBufferSizeInKByte,
399 		unsigned int MaxTotalDETInKByte,
400 		unsigned int MinCompressedBufferSizeInKByte,
401 		double ForceSingleDPP,
402 		unsigned int NumberOfActiveSurfaces,
403 		unsigned int nomDETInKByte,
404 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 		unsigned int PixelChunkSizeKBytes,
407 		unsigned int ROBSizeKBytes,
408 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 		enum output_encoder_class Output[],
410 		double ReadBandwidthLuma[],
411 		double ReadBandwidthChroma[],
412 		double MaximumSwathWidthLuma[],
413 		double MaximumSwathWidthChroma[],
414 		enum dm_rotation_angle SourceRotation[],
415 		bool ViewportStationary[],
416 		enum source_format_class SourcePixelFormat[],
417 		enum dm_swizzle_mode SurfaceTiling[],
418 		unsigned int ViewportWidth[],
419 		unsigned int ViewportHeight[],
420 		unsigned int ViewportXStart[],
421 		unsigned int ViewportYStart[],
422 		unsigned int ViewportXStartC[],
423 		unsigned int ViewportYStartC[],
424 		unsigned int SurfaceWidthY[],
425 		unsigned int SurfaceWidthC[],
426 		unsigned int SurfaceHeightY[],
427 		unsigned int SurfaceHeightC[],
428 		unsigned int Read256BytesBlockHeightY[],
429 		unsigned int Read256BytesBlockHeightC[],
430 		unsigned int Read256BytesBlockWidthY[],
431 		unsigned int Read256BytesBlockWidthC[],
432 		enum odm_combine_mode ODMMode[],
433 		unsigned int BlendingAndTiming[],
434 		unsigned int BytePerPixY[],
435 		unsigned int BytePerPixC[],
436 		double BytePerPixDETY[],
437 		double BytePerPixDETC[],
438 		unsigned int HActive[],
439 		double HRatio[],
440 		double HRatioChroma[],
441 		unsigned int DPPPerSurface[],
442 
443 		/* Output */
444 		unsigned int swath_width_luma_ub[],
445 		unsigned int swath_width_chroma_ub[],
446 		double SwathWidth[],
447 		double SwathWidthChroma[],
448 		unsigned int SwathHeightY[],
449 		unsigned int SwathHeightC[],
450 		unsigned int DETBufferSizeInKByte[],
451 		unsigned int DETBufferSizeY[],
452 		unsigned int DETBufferSizeC[],
453 		bool *UnboundedRequestEnabled,
454 		unsigned int *CompressedBufferSizeInkByte,
455 		unsigned int *CompBufReservedSpaceKBytes,
456 		bool *CompBufReservedSpaceNeedAdjustment,
457 		bool ViewportSizeSupportPerSurface[],
458 		bool *ViewportSizeSupport)
459 {
460 	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 	unsigned int RoundedUpSwathSizeBytesY;
465 	unsigned int RoundedUpSwathSizeBytesC;
466 	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 	unsigned int k;
469 	unsigned int TotalActiveDPP = 0;
470 	bool NoChromaSurfaces = true;
471 	unsigned int DETBufferSizeInKByteForSwathCalculation;
472 
473 #ifdef __DML_VBA_DEBUG__
474 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 	dml32_CalculateSwathWidth(ForceSingleDPP,
479 			NumberOfActiveSurfaces,
480 			SourcePixelFormat,
481 			SourceRotation,
482 			ViewportStationary,
483 			ViewportWidth,
484 			ViewportHeight,
485 			ViewportXStart,
486 			ViewportYStart,
487 			ViewportXStartC,
488 			ViewportYStartC,
489 			SurfaceWidthY,
490 			SurfaceWidthC,
491 			SurfaceHeightY,
492 			SurfaceHeightC,
493 			ODMMode,
494 			BytePerPixY,
495 			BytePerPixC,
496 			Read256BytesBlockHeightY,
497 			Read256BytesBlockHeightC,
498 			Read256BytesBlockWidthY,
499 			Read256BytesBlockWidthC,
500 			BlendingAndTiming,
501 			HActive,
502 			HRatio,
503 			DPPPerSurface,
504 
505 			/* Output */
506 			SwathWidthdoubleDPP,
507 			SwathWidthdoubleDPPChroma,
508 			SwathWidth,
509 			SwathWidthChroma,
510 			MaximumSwathHeightY,
511 			MaximumSwathHeightC,
512 			swath_width_luma_ub,
513 			swath_width_chroma_ub);
514 
515 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 				RoundedUpMaxSwathSizeBytesY[k]);
525 		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 				RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531 
532 		if (SourcePixelFormat[k] == dm_420_10) {
533 			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 		}
536 	}
537 
538 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 			NoChromaSurfaces = false;
543 		}
544 	}
545 
546 	// By default, just set the reserved space to 2 pixel chunks size
547 	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548 
549 	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553 
554 	if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 	}
557 
558 	#ifdef __DML_VBA_DEBUG__
559 		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
560 		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
561 	#endif
562 
563 	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564 
565 	dml32_CalculateDETBufferSize(DETSizeOverride,
566 			UseMALLForPStateChange,
567 			ForceSingleDPP,
568 			NumberOfActiveSurfaces,
569 			*UnboundedRequestEnabled,
570 			nomDETInKByte,
571 			MaxTotalDETInKByte,
572 			ConfigReturnBufferSizeInKByte,
573 			MinCompressedBufferSizeInKByte,
574 			CompressedBufferSegmentSizeInkByteFinal,
575 			SourcePixelFormat,
576 			ReadBandwidthLuma,
577 			ReadBandwidthChroma,
578 			RoundedUpMaxSwathSizeBytesY,
579 			RoundedUpMaxSwathSizeBytesC,
580 			DPPPerSurface,
581 
582 			/* Output */
583 			DETBufferSizeInKByte,    // per hubp pipe
584 			CompressedBufferSizeInkByte);
585 
586 #ifdef __DML_VBA_DEBUG__
587 	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594 
595 	*ViewportSizeSupport = true;
596 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597 
598 		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 				DETBufferSizeInKByteForSwathCalculation);
603 #endif
604 
605 		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 			SwathHeightY[k] = MaximumSwathHeightY[k];
608 			SwathHeightC[k] = MaximumSwathHeightC[k];
609 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 			SwathHeightC[k] = MaximumSwathHeightC[k];
616 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 			SwathHeightY[k] = MaximumSwathHeightY[k];
622 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 		} else {
626 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 		}
631 
632 		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 			*ViewportSizeSupport = false;
637 			ViewportSizeSupportPerSurface[k] = false;
638 		} else {
639 			ViewportSizeSupportPerSurface[k] = true;
640 		}
641 
642 		if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 			DETBufferSizeC[k] = 0;
648 		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 		} else {
655 #ifdef __DML_VBA_DEBUG__
656 			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 		}
661 
662 #ifdef __DML_VBA_DEBUG__
663 		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 				k, RoundedUpMaxSwathSizeBytesY[k]);
667 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 				k, RoundedUpMaxSwathSizeBytesC[k]);
669 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 				ViewportSizeSupportPerSurface[k]);
676 #endif
677 
678 	}
679 } // CalculateSwathAndDETConfiguration
680 
681 void dml32_CalculateSwathWidth(
682 		bool				ForceSingleDPP,
683 		unsigned int			NumberOfActiveSurfaces,
684 		enum source_format_class	SourcePixelFormat[],
685 		enum dm_rotation_angle		SourceRotation[],
686 		bool				ViewportStationary[],
687 		unsigned int			ViewportWidth[],
688 		unsigned int			ViewportHeight[],
689 		unsigned int			ViewportXStart[],
690 		unsigned int			ViewportYStart[],
691 		unsigned int			ViewportXStartC[],
692 		unsigned int			ViewportYStartC[],
693 		unsigned int			SurfaceWidthY[],
694 		unsigned int			SurfaceWidthC[],
695 		unsigned int			SurfaceHeightY[],
696 		unsigned int			SurfaceHeightC[],
697 		enum odm_combine_mode		ODMMode[],
698 		unsigned int			BytePerPixY[],
699 		unsigned int			BytePerPixC[],
700 		unsigned int			Read256BytesBlockHeightY[],
701 		unsigned int			Read256BytesBlockHeightC[],
702 		unsigned int			Read256BytesBlockWidthY[],
703 		unsigned int			Read256BytesBlockWidthC[],
704 		unsigned int			BlendingAndTiming[],
705 		unsigned int			HActive[],
706 		double				HRatio[],
707 		unsigned int			DPPPerSurface[],
708 
709 		/* Output */
710 		double				SwathWidthdoubleDPPY[],
711 		double				SwathWidthdoubleDPPC[],
712 		double				SwathWidthY[], // per-pipe
713 		double				SwathWidthC[], // per-pipe
714 		unsigned int			MaximumSwathHeightY[],
715 		unsigned int			MaximumSwathHeightC[],
716 		unsigned int			swath_width_luma_ub[], // per-pipe
717 		unsigned int			swath_width_chroma_ub[]) // per-pipe
718 {
719 	unsigned int k, j;
720 	enum odm_combine_mode MainSurfaceODMMode;
721 
722 	unsigned int surface_width_ub_l;
723 	unsigned int surface_height_ub_l;
724 	unsigned int surface_width_ub_c = 0;
725 	unsigned int surface_height_ub_c = 0;
726 
727 #ifdef __DML_VBA_DEBUG__
728 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731 
732 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 		if (!IsVertical(SourceRotation[k]))
734 			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 		else
736 			SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737 
738 #ifdef __DML_VBA_DEBUG__
739 		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742 
743 		MainSurfaceODMMode = ODMMode[k];
744 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 			if (BlendingAndTiming[k] == j)
746 				MainSurfaceODMMode = ODMMode[j];
747 		}
748 
749 		if (ForceSingleDPP) {
750 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 		} else {
752 			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 						dml_round(HActive[k] / 4.0 * HRatio[k]));
755 			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 						dml_round(HActive[k] / 2.0 * HRatio[k]));
758 			} else if (DPPPerSurface[k] == 2) {
759 				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 			} else {
761 				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 			}
763 		}
764 
765 #ifdef __DML_VBA_DEBUG__
766 		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772 
773 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 				SourcePixelFormat[k] == dm_420_12) {
775 			SwathWidthC[k] = SwathWidthY[k] / 2;
776 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 		} else {
778 			SwathWidthC[k] = SwathWidthY[k];
779 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 		}
781 
782 		if (ForceSingleDPP == true) {
783 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 		}
786 
787 		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789 
790 		if (!IsVertical(SourceRotation[k])) {
791 			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 						dml_floor(ViewportXStart[k] +
796 								SwathWidthY[k] +
797 								Read256BytesBlockWidthY[k] - 1,
798 								Read256BytesBlockWidthY[k]) -
799 								dml_floor(ViewportXStart[k],
800 								Read256BytesBlockWidthY[k]));
801 			} else {
802 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 						dml_ceil(SwathWidthY[k] - 1,
804 								Read256BytesBlockWidthY[k]) +
805 								Read256BytesBlockWidthY[k]);
806 			}
807 			if (BytePerPixC[k] > 0) {
808 				surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 									Read256BytesBlockWidthC[k] - 1,
813 									Read256BytesBlockWidthC[k]) -
814 									dml_floor(ViewportXStartC[k],
815 									Read256BytesBlockWidthC[k]));
816 				} else {
817 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 							dml_ceil(SwathWidthC[k] - 1,
819 								Read256BytesBlockWidthC[k]) +
820 								Read256BytesBlockWidthC[k]);
821 				}
822 			} else {
823 				swath_width_chroma_ub[k] = 0;
824 			}
825 		} else {
826 			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828 
829 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 						Read256BytesBlockHeightY[k]) -
833 						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 			} else {
835 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 			}
838 			if (BytePerPixC[k] > 0) {
839 				surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 									Read256BytesBlockHeightC[k] - 1,
844 									Read256BytesBlockHeightC[k]) -
845 									dml_floor(ViewportYStartC[k],
846 											Read256BytesBlockHeightC[k]));
847 				} else {
848 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 							Read256BytesBlockHeightC[k]);
851 				}
852 			} else {
853 				swath_width_chroma_ub[k] = 0;
854 			}
855 		}
856 
857 #ifdef __DML_VBA_DEBUG__
858 		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873 
874 	}
875 } // CalculateSwathWidth
876 
877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 			unsigned int TotalNumberOfActiveDPP,
879 			bool NoChroma,
880 			enum output_encoder_class Output,
881 			enum dm_swizzle_mode SurfaceTiling,
882 			bool CompBufReservedSpaceNeedAdjustment,
883 			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 	bool ret_val = false;
886 
887 	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 			TotalNumberOfActiveDPP == 1 && NoChroma);
889 	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 		ret_val = false;
891 
892 	if (SurfaceTiling == dm_sw_linear)
893 		ret_val = false;
894 
895 	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 		ret_val = false;
897 
898 #ifdef __DML_VBA_DEBUG__
899 	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
900 	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
902 #endif
903 
904 	return (ret_val);
905 }
906 
907 void dml32_CalculateDETBufferSize(
908 		unsigned int DETSizeOverride[],
909 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 		bool ForceSingleDPP,
911 		unsigned int NumberOfActiveSurfaces,
912 		bool UnboundedRequestEnabled,
913 		unsigned int nomDETInKByte,
914 		unsigned int MaxTotalDETInKByte,
915 		unsigned int ConfigReturnBufferSizeInKByte,
916 		unsigned int MinCompressedBufferSizeInKByte,
917 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 		enum source_format_class SourcePixelFormat[],
919 		double ReadBandwidthLuma[],
920 		double ReadBandwidthChroma[],
921 		unsigned int RoundedUpMaxSwathSizeBytesY[],
922 		unsigned int RoundedUpMaxSwathSizeBytesC[],
923 		unsigned int DPPPerSurface[],
924 		/* Output */
925 		unsigned int DETBufferSizeInKByte[],
926 		unsigned int *CompressedBufferSizeInkByte)
927 {
928 	unsigned int DETBufferSizePoolInKByte;
929 	unsigned int NextDETBufferPieceInKByte;
930 	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 	bool NextPotentialSurfaceToAssignDETPieceFound;
932 	unsigned int NextSurfaceToAssignDETPiece;
933 	double TotalBandwidth;
934 	double BandwidthOfSurfacesNotAssignedDETPiece;
935 	unsigned int max_minDET;
936 	unsigned int minDET;
937 	unsigned int minDET_pipe;
938 	unsigned int j, k;
939 
940 #ifdef __DML_VBA_DEBUG__
941 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 			CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951 
952 	// Note: Will use default det size if that fits 2 swaths
953 	if (UnboundedRequestEnabled) {
954 		if (DETSizeOverride[0] > 0) {
955 			DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 		} else {
957 			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 					((double) RoundedUpMaxSwathSizeBytesY[0] +
959 							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 		}
961 		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 	} else {
963 		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 			DETBufferSizeInKByte[k] = nomDETInKByte;
966 			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 					SourcePixelFormat[k] == dm_420_12) {
968 				max_minDET = nomDETInKByte - 64;
969 			} else {
970 				max_minDET = nomDETInKByte;
971 			}
972 			minDET = 128;
973 			minDET_pipe = 0;
974 
975 			// add DET resource until can hold 2 full swaths
976 			while (minDET <= max_minDET && minDET_pipe == 0) {
977 				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 					minDET_pipe = minDET;
980 				minDET = minDET + 64;
981 			}
982 
983 #ifdef __DML_VBA_DEBUG__
984 			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
985 			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
986 			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
987 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 					RoundedUpMaxSwathSizeBytesY[k]);
989 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 					RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992 
993 			if (minDET_pipe == 0) {
994 				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 						__func__, k, minDET_pipe);
999 #endif
1000 			}
1001 
1002 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 				DETBufferSizeInKByte[k] = 0;
1004 			} else if (DETSizeOverride[k] > 0) {
1005 				DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 				DETBufferSizeInKByte[k] = minDET_pipe;
1010 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 			}
1013 
1014 #ifdef __DML_VBA_DEBUG__
1015 			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 		}
1021 
1022 		TotalBandwidth = 0;
1023 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 		}
1027 #ifdef __DML_VBA_DEBUG__
1028 		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036 
1037 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 			} else {
1046 				DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 			}
1048 #ifdef __DML_VBA_DEBUG__
1049 			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 					DETPieceAssignedToThisSurfaceAlready[k]);
1051 			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 					BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 		}
1055 
1056 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 			NextPotentialSurfaceToAssignDETPieceFound = false;
1058 			NextSurfaceToAssignDETPiece = 0;
1059 
1060 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 						ReadBandwidthLuma[k]);
1064 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 						ReadBandwidthChroma[k]);
1066 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 						NextSurfaceToAssignDETPiece);
1072 #endif
1073 				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 						(!NextPotentialSurfaceToAssignDETPieceFound ||
1075 						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 					NextSurfaceToAssignDETPiece = k;
1079 					NextPotentialSurfaceToAssignDETPieceFound = true;
1080 				}
1081 #ifdef __DML_VBA_DEBUG__
1082 				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 			}
1088 
1089 			if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 				// Note: To show the banker's rounding behavior in VBA and also the fact
1091 				// that the DET buffer size varies due to precision issue
1092 				//
1093 				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
1094 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 				// BandwidthOfSurfacesNotAssignedDETPiece /
1097 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1099 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 				 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 				//
1104 				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106 
1107 				NextDETBufferPieceInKByte = dml_min(
1108 					dml_round((double) DETBufferSizePoolInKByte *
1109 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 						BandwidthOfSurfacesNotAssignedDETPiece /
1112 						((ForceSingleDPP ? 1 :
1113 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 						(ForceSingleDPP ? 1 :
1115 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 						dml_floor((double) DETBufferSizePoolInKByte,
1117 						(ForceSingleDPP ? 1 :
1118 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119 
1120 				// Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 				// We should limit the per-pipe DET size to the nominal / max per pipe.
1122 				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 					} else {
1128 						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 						// already has the max per-pipe value
1130 						NextDETBufferPieceInKByte = 0;
1131 					}
1132 				}
1133 
1134 #ifdef __DML_VBA_DEBUG__
1135 				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 					DETBufferSizePoolInKByte);
1137 				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 					NextSurfaceToAssignDETPiece);
1139 				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 					NextDETBufferPieceInKByte);
1147 				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 					__func__, j, NextSurfaceToAssignDETPiece,
1149 					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151 
1152 				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 						+ NextDETBufferPieceInKByte
1155 						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159 
1160 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 			}
1166 		}
1167 		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 	}
1169 	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170 
1171 #ifdef __DML_VBA_DEBUG__
1172 	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 	}
1178 #endif
1179 } // CalculateDETBufferSize
1180 
1181 void dml32_CalculateODMMode(
1182 		unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 		unsigned int HActive,
1184 		enum output_format_class OutFormat,
1185 		enum output_encoder_class Output,
1186 		enum odm_combine_policy ODMUse,
1187 		double StateDispclk,
1188 		double MaxDispclk,
1189 		bool DSCEnable,
1190 		unsigned int TotalNumberOfActiveDPP,
1191 		unsigned int MaxNumDPP,
1192 		double PixelClock,
1193 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 		double DISPCLKRampingMargin,
1195 		double DISPCLKDPPCLKVCOSpeed,
1196 		unsigned int NumberOfDSCSlices,
1197 
1198 		/* Output */
1199 		bool *TotalAvailablePipesSupport,
1200 		unsigned int *NumberOfDPP,
1201 		enum odm_combine_mode *ODMMode,
1202 		double *RequiredDISPCLKPerSurface)
1203 {
1204 
1205 	double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208 
1209 	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 			MaxDispclk);
1212 	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 			MaxDispclk);
1215 	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 			MaxDispclk);
1218 	*TotalAvailablePipesSupport = true;
1219 	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220 
1221 	if (ODMUse == dm_odm_combine_policy_none)
1222 		*ODMMode = dm_odm_combine_mode_disabled;
1223 
1224 	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 	*NumberOfDPP = 0;
1226 
1227 	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 	// (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229 
1230 	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 					|| NumberOfDSCSlices > 8)))) {
1234 		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 			*ODMMode = dm_odm_combine_mode_4to1;
1236 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 			*NumberOfDPP = 4;
1238 		} else {
1239 			*TotalAvailablePipesSupport = false;
1240 		}
1241 	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 					|| (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 			*ODMMode = dm_odm_combine_mode_2to1;
1248 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 			*NumberOfDPP = 2;
1250 		} else {
1251 			*TotalAvailablePipesSupport = false;
1252 		}
1253 	} else {
1254 		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 			*NumberOfDPP = 1;
1256 		else
1257 			*TotalAvailablePipesSupport = false;
1258 	}
1259 	if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 			ODMUse != dm_odm_combine_policy_4to1) {
1261 		if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 			*ODMMode = dm_odm_combine_mode_disabled;
1263 			*NumberOfDPP = 0;
1264 			*TotalAvailablePipesSupport = false;
1265 		} else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 				*ODMMode == dm_odm_combine_mode_4to1) {
1267 			*ODMMode = dm_odm_combine_mode_4to1;
1268 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 			*NumberOfDPP = 4;
1270 		} else {
1271 			*ODMMode = dm_odm_combine_mode_2to1;
1272 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 			*NumberOfDPP = 2;
1274 		}
1275 	}
1276 	if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 			HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 		*ODMMode = dm_odm_combine_mode_disabled;
1279 		*NumberOfDPP = 0;
1280 		*TotalAvailablePipesSupport = false;
1281 	}
1282 }
1283 
1284 double dml32_CalculateRequiredDispclk(
1285 		enum odm_combine_mode ODMMode,
1286 		double PixelClock,
1287 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 		double DISPCLKRampingMargin,
1289 		double DISPCLKDPPCLKVCOSpeed,
1290 		double MaxDispclk)
1291 {
1292 	double RequiredDispclk = 0.;
1293 	double PixelClockAfterODM;
1294 	double DISPCLKWithRampingRoundedToDFSGranularity;
1295 	double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 	double MaxDispclkRoundedDownToDFSGranularity;
1297 
1298 	if (ODMMode == dm_odm_combine_mode_4to1)
1299 		PixelClockAfterODM = PixelClock / 4;
1300 	else if (ODMMode == dm_odm_combine_mode_2to1)
1301 		PixelClockAfterODM = PixelClock / 2;
1302 	else
1303 		PixelClockAfterODM = PixelClock;
1304 
1305 
1306 	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309 
1310 	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312 
1313 	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314 
1315 	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 	else
1320 		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321 
1322 	return RequiredDispclk;
1323 }
1324 
1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 	if (Clock <= 0.0)
1328 		return 0.0;
1329 
1330 	if (round_up)
1331 		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 	else
1333 		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335 
1336 void dml32_CalculateOutputLink(
1337 		double PHYCLKPerState,
1338 		double PHYCLKD18PerState,
1339 		double PHYCLKD32PerState,
1340 		double Downspreading,
1341 		bool IsMainSurfaceUsingTheIndicatedTiming,
1342 		enum output_encoder_class Output,
1343 		enum output_format_class OutputFormat,
1344 		unsigned int HTotal,
1345 		unsigned int HActive,
1346 		double PixelClockBackEnd,
1347 		double ForcedOutputLinkBPP,
1348 		unsigned int DSCInputBitPerComponent,
1349 		unsigned int NumberOfDSCSlices,
1350 		double AudioSampleRate,
1351 		unsigned int AudioSampleLayout,
1352 		enum odm_combine_mode ODMModeNoDSC,
1353 		enum odm_combine_mode ODMModeDSC,
1354 		bool DSCEnable,
1355 		unsigned int OutputLinkDPLanes,
1356 		enum dm_output_link_dp_rate OutputLinkDPRate,
1357 
1358 		/* Output */
1359 		bool *RequiresDSC,
1360 		double *RequiresFEC,
1361 		double  *OutBpp,
1362 		enum dm_output_type *OutputType,
1363 		enum dm_output_rate *OutputRate,
1364 		unsigned int *RequiredSlots)
1365 {
1366 	bool LinkDSCEnable;
1367 	unsigned int dummy;
1368 	*RequiresDSC = false;
1369 	*RequiresFEC = false;
1370 	*OutBpp = 0;
1371 	*OutputType = dm_output_type_unknown;
1372 	*OutputRate = dm_output_rate_unknown;
1373 
1374 	if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 		if (Output == dm_hdmi) {
1376 			*RequiresDSC = false;
1377 			*RequiresFEC = false;
1378 			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 					ODMModeNoDSC, ODMModeDSC, &dummy);
1382 			//OutputTypeAndRate = "HDMI";
1383 			*OutputType = dm_output_type_hdmi;
1384 
1385 		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 			if (DSCEnable == true) {
1387 				*RequiresDSC = true;
1388 				LinkDSCEnable = true;
1389 				if (Output == dm_dp || Output == dm_dp2p0)
1390 					*RequiresFEC = true;
1391 				else
1392 					*RequiresFEC = false;
1393 			} else {
1394 				*RequiresDSC = false;
1395 				LinkDSCEnable = false;
1396 				if (Output == dm_dp2p0)
1397 					*RequiresFEC = true;
1398 				else
1399 					*RequiresFEC = false;
1400 			}
1401 			if (Output == dm_dp2p0) {
1402 				*OutBpp = 0;
1403 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 						PHYCLKD32PerState >= 10000 / 32) {
1405 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 					if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411 							ForcedOutputLinkBPP == 0) {
1412 						*RequiresDSC = true;
1413 						LinkDSCEnable = true;
1414 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 								OutputFormat, DSCInputBitPerComponent,
1418 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 					}
1421 					//OutputTypeAndRate = Output & " UHBR10";
1422 					*OutputType = dm_output_type_dp2p0;
1423 					*OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 				}
1425 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 						*OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 
1433 					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 							ForcedOutputLinkBPP == 0) {
1435 						*RequiresDSC = true;
1436 						LinkDSCEnable = true;
1437 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 								OutputFormat, DSCInputBitPerComponent,
1441 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 					}
1444 					//OutputTypeAndRate = Output & " UHBR13p5";
1445 					*OutputType = dm_output_type_dp2p0;
1446 					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 				}
1448 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 						*RequiresDSC = true;
1457 						LinkDSCEnable = true;
1458 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 								OutputFormat, DSCInputBitPerComponent,
1462 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 					}
1465 					//OutputTypeAndRate = Output & " UHBR20";
1466 					*OutputType = dm_output_type_dp2p0;
1467 					*OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 				}
1469 			} else {
1470 				*OutBpp = 0;
1471 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 						PHYCLKPerState >= 270) {
1473 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 							ForcedOutputLinkBPP == 0) {
1480 						*RequiresDSC = true;
1481 						LinkDSCEnable = true;
1482 						if (Output == dm_dp)
1483 							*RequiresFEC = true;
1484 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 								OutputFormat, DSCInputBitPerComponent,
1488 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 					}
1491 					//OutputTypeAndRate = Output & " HBR";
1492 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 					*OutputRate = dm_output_rate_dp_rate_hbr;
1494 				}
1495 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 						*OutBpp == 0 && PHYCLKPerState >= 540) {
1497 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502 
1503 					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 							ForcedOutputLinkBPP == 0) {
1505 						*RequiresDSC = true;
1506 						LinkDSCEnable = true;
1507 						if (Output == dm_dp)
1508 							*RequiresFEC = true;
1509 
1510 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 								OutputFormat, DSCInputBitPerComponent,
1514 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 					}
1517 					//OutputTypeAndRate = Output & " HBR2";
1518 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 					*OutputRate = dm_output_rate_dp_rate_hbr2;
1520 				}
1521 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 							ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 							RequiredSlots);
1528 
1529 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 						*RequiresDSC = true;
1531 						LinkDSCEnable = true;
1532 						if (Output == dm_dp)
1533 							*RequiresFEC = true;
1534 
1535 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 								OutputFormat, DSCInputBitPerComponent,
1539 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 					}
1542 					//OutputTypeAndRate = Output & " HBR3";
1543 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 					*OutputRate = dm_output_rate_dp_rate_hbr3;
1545 				}
1546 			}
1547 		}
1548 	}
1549 }
1550 
1551 void dml32_CalculateDPPCLK(
1552 		unsigned int NumberOfActiveSurfaces,
1553 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 		double DISPCLKDPPCLKVCOSpeed,
1555 		double DPPCLKUsingSingleDPP[],
1556 		unsigned int DPPPerSurface[],
1557 
1558 		/* output */
1559 		double *GlobalDPPCLK,
1560 		double Dppclk[])
1561 {
1562 	unsigned int k;
1563 	*GlobalDPPCLK = 0;
1564 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 	}
1568 	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572 
1573 double dml32_TruncToValidBPP(
1574 		double LinkBitRate,
1575 		unsigned int Lanes,
1576 		unsigned int HTotal,
1577 		unsigned int HActive,
1578 		double PixelClock,
1579 		double DesiredBPP,
1580 		bool DSCEnable,
1581 		enum output_encoder_class Output,
1582 		enum output_format_class Format,
1583 		unsigned int DSCInputBitPerComponent,
1584 		unsigned int DSCSlices,
1585 		unsigned int AudioRate,
1586 		unsigned int AudioLayout,
1587 		enum odm_combine_mode ODMModeNoDSC,
1588 		enum odm_combine_mode ODMModeDSC,
1589 		/* Output */
1590 		unsigned int *RequiredSlots)
1591 {
1592 	double    MaxLinkBPP;
1593 	unsigned int   MinDSCBPP;
1594 	double    MaxDSCBPP;
1595 	unsigned int   NonDSCBPP0;
1596 	unsigned int   NonDSCBPP1;
1597 	unsigned int   NonDSCBPP2;
1598 	unsigned int   NonDSCBPP3;
1599 
1600 	if (Format == dm_420) {
1601 		NonDSCBPP0 = 12;
1602 		NonDSCBPP1 = 15;
1603 		NonDSCBPP2 = 18;
1604 		MinDSCBPP = 6;
1605 		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1606 	} else if (Format == dm_444) {
1607 		NonDSCBPP0 = 18;
1608 		NonDSCBPP1 = 24;
1609 		NonDSCBPP2 = 30;
1610 		NonDSCBPP3 = 36;
1611 		MinDSCBPP = 8;
1612 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1613 	} else {
1614 		if (Output == dm_hdmi) {
1615 			NonDSCBPP0 = 24;
1616 			NonDSCBPP1 = 24;
1617 			NonDSCBPP2 = 24;
1618 		} else {
1619 			NonDSCBPP0 = 16;
1620 			NonDSCBPP1 = 20;
1621 			NonDSCBPP2 = 24;
1622 		}
1623 		if (Format == dm_n422) {
1624 			MinDSCBPP = 7;
1625 			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1626 		} else {
1627 			MinDSCBPP = 8;
1628 			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1629 		}
1630 	}
1631 	if (Output == dm_dp2p0) {
1632 		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1633 	} else if (DSCEnable && Output == dm_dp) {
1634 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1635 	} else {
1636 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1637 	}
1638 
1639 	if (DSCEnable) {
1640 		if (ODMModeDSC == dm_odm_combine_mode_4to1)
1641 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1642 		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1643 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1644 		else if (ODMModeDSC == dm_odm_split_mode_1to2)
1645 			MaxLinkBPP = 2 * MaxLinkBPP;
1646 	} else {
1647 		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1648 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1649 		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1650 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1651 		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1652 			MaxLinkBPP = 2 * MaxLinkBPP;
1653 	}
1654 
1655 	if (DesiredBPP == 0) {
1656 		if (DSCEnable) {
1657 			if (MaxLinkBPP < MinDSCBPP)
1658 				return BPP_INVALID;
1659 			else if (MaxLinkBPP >= MaxDSCBPP)
1660 				return MaxDSCBPP;
1661 			else
1662 				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1663 		} else {
1664 			if (MaxLinkBPP >= NonDSCBPP3)
1665 				return NonDSCBPP3;
1666 			else if (MaxLinkBPP >= NonDSCBPP2)
1667 				return NonDSCBPP2;
1668 			else if (MaxLinkBPP >= NonDSCBPP1)
1669 				return NonDSCBPP1;
1670 			else if (MaxLinkBPP >= NonDSCBPP0)
1671 				return 16.0;
1672 			else
1673 				return BPP_INVALID;
1674 		}
1675 	} else {
1676 		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1677 				DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1678 				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1679 			return BPP_INVALID;
1680 		else
1681 			return DesiredBPP;
1682 	}
1683 
1684 	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1685 
1686 	return BPP_INVALID;
1687 } // TruncToValidBPP
1688 
1689 double dml32_RequiredDTBCLK(
1690 		bool              DSCEnable,
1691 		double               PixelClock,
1692 		enum output_format_class  OutputFormat,
1693 		double               OutputBpp,
1694 		unsigned int              DSCSlices,
1695 		unsigned int                 HTotal,
1696 		unsigned int                 HActive,
1697 		unsigned int              AudioRate,
1698 		unsigned int              AudioLayout)
1699 {
1700 	double PixelWordRate;
1701 	double HCActive;
1702 	double HCBlank;
1703 	double AverageTribyteRate;
1704 	double HActiveTribyteRate;
1705 
1706 	if (DSCEnable != true)
1707 		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1708 
1709 	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1710 	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1711 			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1712 	HCBlank = 64 + 32 *
1713 			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1714 	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1715 	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1716 	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1717 }
1718 
1719 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1720 		enum odm_combine_mode ODMMode,
1721 		unsigned int DSCInputBitPerComponent,
1722 		double OutputBpp,
1723 		unsigned int HActive,
1724 		unsigned int HTotal,
1725 		unsigned int NumberOfDSCSlices,
1726 		enum output_format_class  OutputFormat,
1727 		enum output_encoder_class Output,
1728 		double PixelClock,
1729 		double PixelClockBackEnd,
1730 		double dsc_delay_factor_wa)
1731 {
1732 	unsigned int DSCDelayRequirement_val;
1733 
1734 	if (DSCEnabled == true && OutputBpp != 0) {
1735 		if (ODMMode == dm_odm_combine_mode_4to1) {
1736 			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1737 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1738 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1739 		} else if (ODMMode == dm_odm_combine_mode_2to1) {
1740 			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1741 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1742 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1743 		} else {
1744 			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1745 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1746 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1747 		}
1748 
1749 		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1750 				dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1751 
1752 		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1753 
1754 	} else {
1755 		DSCDelayRequirement_val = 0;
1756 	}
1757 
1758 #ifdef __DML_VBA_DEBUG__
1759 	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1760 	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1761 	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1762 	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1763 	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1764 	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1765 	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1766 #endif
1767 
1768 	return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1769 }
1770 
1771 void dml32_CalculateSurfaceSizeInMall(
1772 		unsigned int NumberOfActiveSurfaces,
1773 		unsigned int MALLAllocatedForDCN,
1774 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1775 		bool DCCEnable[],
1776 		bool ViewportStationary[],
1777 		unsigned int ViewportXStartY[],
1778 		unsigned int ViewportYStartY[],
1779 		unsigned int ViewportXStartC[],
1780 		unsigned int ViewportYStartC[],
1781 		unsigned int ViewportWidthY[],
1782 		unsigned int ViewportHeightY[],
1783 		unsigned int BytesPerPixelY[],
1784 		unsigned int ViewportWidthC[],
1785 		unsigned int ViewportHeightC[],
1786 		unsigned int BytesPerPixelC[],
1787 		unsigned int SurfaceWidthY[],
1788 		unsigned int SurfaceWidthC[],
1789 		unsigned int SurfaceHeightY[],
1790 		unsigned int SurfaceHeightC[],
1791 		unsigned int Read256BytesBlockWidthY[],
1792 		unsigned int Read256BytesBlockWidthC[],
1793 		unsigned int Read256BytesBlockHeightY[],
1794 		unsigned int Read256BytesBlockHeightC[],
1795 		unsigned int ReadBlockWidthY[],
1796 		unsigned int ReadBlockWidthC[],
1797 		unsigned int ReadBlockHeightY[],
1798 		unsigned int ReadBlockHeightC[],
1799 
1800 		/* Output */
1801 		unsigned int    SurfaceSizeInMALL[],
1802 		bool *ExceededMALLSize)
1803 {
1804 	unsigned int TotalSurfaceSizeInMALL  = 0;
1805 	unsigned int k;
1806 
1807 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1808 		if (ViewportStationary[k]) {
1809 			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1810 					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1811 						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1812 						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1813 						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1814 						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1815 						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1816 
1817 			if (ReadBlockWidthC[k] > 0) {
1818 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1819 						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1820 							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1821 							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1822 							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1823 							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1824 							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1825 							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1826 							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1827 							BytesPerPixelC[k];
1828 			}
1829 			if (DCCEnable[k] == true) {
1830 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1831 						dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1832 							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1833 							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1834 							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1835 							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1836 							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1837 							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1838 							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1839 							* Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1840 				if (Read256BytesBlockWidthC[k] > 0) {
1841 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1842 							dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1843 								Read256BytesBlockWidthC[k]),
1844 								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1845 								* Read256BytesBlockWidthC[k] - 1, 8 *
1846 								Read256BytesBlockWidthC[k]) -
1847 								dml_floor(ViewportXStartC[k], 8 *
1848 								Read256BytesBlockWidthC[k])) *
1849 								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1850 								Read256BytesBlockHeightC[k]),
1851 								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1852 								8 * Read256BytesBlockHeightC[k] - 1, 8 *
1853 								Read256BytesBlockHeightC[k]) -
1854 								dml_floor(ViewportYStartC[k], 8 *
1855 								Read256BytesBlockHeightC[k])) *
1856 								BytesPerPixelC[k] / 256;
1857 				}
1858 			}
1859 		} else {
1860 			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1861 					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1862 					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1863 							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1864 							BytesPerPixelY[k];
1865 			if (ReadBlockWidthC[k] > 0) {
1866 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1867 						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1868 								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1869 						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1870 								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1871 								BytesPerPixelC[k];
1872 			}
1873 			if (DCCEnable[k] == true) {
1874 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1875 						dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1876 								Read256BytesBlockWidthY[k] - 1), 8 *
1877 								Read256BytesBlockWidthY[k]) *
1878 						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1879 								Read256BytesBlockHeightY[k] - 1), 8 *
1880 								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1881 
1882 				if (Read256BytesBlockWidthC[k] > 0) {
1883 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1884 							dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1885 									Read256BytesBlockWidthC[k] - 1), 8 *
1886 									Read256BytesBlockWidthC[k]) *
1887 							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1888 									Read256BytesBlockHeightC[k] - 1), 8 *
1889 									Read256BytesBlockHeightC[k]) *
1890 									BytesPerPixelC[k] / 256;
1891 				}
1892 			}
1893 		}
1894 	}
1895 
1896 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1897 		if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1898 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1899 	}
1900 	*ExceededMALLSize =  (TotalSurfaceSizeInMALL > MALLAllocatedForDCN * 1024 * 1024);
1901 } // CalculateSurfaceSizeInMall
1902 
1903 void dml32_CalculateVMRowAndSwath(
1904 		unsigned int NumberOfActiveSurfaces,
1905 		DmlPipe myPipe[],
1906 		unsigned int SurfaceSizeInMALL[],
1907 		unsigned int PTEBufferSizeInRequestsLuma,
1908 		unsigned int PTEBufferSizeInRequestsChroma,
1909 		unsigned int DCCMetaBufferSizeBytes,
1910 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1911 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1912 		unsigned int MALLAllocatedForDCN,
1913 		double SwathWidthY[],
1914 		double SwathWidthC[],
1915 		bool GPUVMEnable,
1916 		bool HostVMEnable,
1917 		unsigned int HostVMMaxNonCachedPageTableLevels,
1918 		unsigned int GPUVMMaxPageTableLevels,
1919 		unsigned int GPUVMMinPageSizeKBytes[],
1920 		unsigned int HostVMMinPageSize,
1921 
1922 		/* Output */
1923 		bool PTEBufferSizeNotExceeded[],
1924 		bool DCCMetaBufferSizeNotExceeded[],
1925 		unsigned int dpte_row_width_luma_ub[],
1926 		unsigned int dpte_row_width_chroma_ub[],
1927 		unsigned int dpte_row_height_luma[],
1928 		unsigned int dpte_row_height_chroma[],
1929 		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1930 		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1931 		unsigned int meta_req_width[],
1932 		unsigned int meta_req_width_chroma[],
1933 		unsigned int meta_req_height[],
1934 		unsigned int meta_req_height_chroma[],
1935 		unsigned int meta_row_width[],
1936 		unsigned int meta_row_width_chroma[],
1937 		unsigned int meta_row_height[],
1938 		unsigned int meta_row_height_chroma[],
1939 		unsigned int vm_group_bytes[],
1940 		unsigned int dpte_group_bytes[],
1941 		unsigned int PixelPTEReqWidthY[],
1942 		unsigned int PixelPTEReqHeightY[],
1943 		unsigned int PTERequestSizeY[],
1944 		unsigned int PixelPTEReqWidthC[],
1945 		unsigned int PixelPTEReqHeightC[],
1946 		unsigned int PTERequestSizeC[],
1947 		unsigned int dpde0_bytes_per_frame_ub_l[],
1948 		unsigned int meta_pte_bytes_per_frame_ub_l[],
1949 		unsigned int dpde0_bytes_per_frame_ub_c[],
1950 		unsigned int meta_pte_bytes_per_frame_ub_c[],
1951 		double PrefetchSourceLinesY[],
1952 		double PrefetchSourceLinesC[],
1953 		double VInitPreFillY[],
1954 		double VInitPreFillC[],
1955 		unsigned int MaxNumSwathY[],
1956 		unsigned int MaxNumSwathC[],
1957 		double meta_row_bw[],
1958 		double dpte_row_bw[],
1959 		double PixelPTEBytesPerRow[],
1960 		double PDEAndMetaPTEBytesFrame[],
1961 		double MetaRowByte[],
1962 		bool use_one_row_for_frame[],
1963 		bool use_one_row_for_frame_flip[],
1964 		bool UsesMALLForStaticScreen[],
1965 		bool PTE_BUFFER_MODE[],
1966 		unsigned int BIGK_FRAGMENT_SIZE[])
1967 {
1968 	unsigned int k;
1969 	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1970 	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1971 	unsigned int PDEAndMetaPTEBytesFrameY;
1972 	unsigned int PDEAndMetaPTEBytesFrameC;
1973 	unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1974 	unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1975 	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1976 	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1977 	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1978 	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1979 	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1980 	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1981 	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1982 	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1983 	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1984 
1985 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1986 		if (HostVMEnable == true) {
1987 			vm_group_bytes[k] = 512;
1988 			dpte_group_bytes[k] = 512;
1989 		} else if (GPUVMEnable == true) {
1990 			vm_group_bytes[k] = 2048;
1991 			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1992 				dpte_group_bytes[k] = 512;
1993 			else
1994 				dpte_group_bytes[k] = 2048;
1995 		} else {
1996 			vm_group_bytes[k] = 0;
1997 			dpte_group_bytes[k] = 0;
1998 		}
1999 
2000 		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2001 				myPipe[k].SourcePixelFormat == dm_420_12 ||
2002 				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2003 			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2004 					!IsVertical(myPipe[k].SourceRotation)) {
2005 				PTEBufferSizeInRequestsForLuma[k] =
2006 						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2007 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2008 			} else {
2009 				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2010 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2011 			}
2012 
2013 			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2014 					myPipe[k].ViewportStationary,
2015 					myPipe[k].DCCEnable,
2016 					myPipe[k].DPPPerSurface,
2017 					myPipe[k].BlockHeight256BytesC,
2018 					myPipe[k].BlockWidth256BytesC,
2019 					myPipe[k].SourcePixelFormat,
2020 					myPipe[k].SurfaceTiling,
2021 					myPipe[k].BytePerPixelC,
2022 					myPipe[k].SourceRotation,
2023 					SwathWidthC[k],
2024 					myPipe[k].ViewportHeightChroma,
2025 					myPipe[k].ViewportXStartC,
2026 					myPipe[k].ViewportYStartC,
2027 					GPUVMEnable,
2028 					HostVMEnable,
2029 					HostVMMaxNonCachedPageTableLevels,
2030 					GPUVMMaxPageTableLevels,
2031 					GPUVMMinPageSizeKBytes[k],
2032 					HostVMMinPageSize,
2033 					PTEBufferSizeInRequestsForChroma[k],
2034 					myPipe[k].PitchC,
2035 					myPipe[k].DCCMetaPitchC,
2036 					myPipe[k].BlockWidthC,
2037 					myPipe[k].BlockHeightC,
2038 
2039 					/* Output */
2040 					&MetaRowByteC[k],
2041 					&PixelPTEBytesPerRowC[k],
2042 					&dpte_row_width_chroma_ub[k],
2043 					&dpte_row_height_chroma[k],
2044 					&dpte_row_height_linear_chroma[k],
2045 					&PixelPTEBytesPerRowC_one_row_per_frame[k],
2046 					&dpte_row_width_chroma_ub_one_row_per_frame[k],
2047 					&dpte_row_height_chroma_one_row_per_frame[k],
2048 					&meta_req_width_chroma[k],
2049 					&meta_req_height_chroma[k],
2050 					&meta_row_width_chroma[k],
2051 					&meta_row_height_chroma[k],
2052 					&PixelPTEReqWidthC[k],
2053 					&PixelPTEReqHeightC[k],
2054 					&PTERequestSizeC[k],
2055 					&dpde0_bytes_per_frame_ub_c[k],
2056 					&meta_pte_bytes_per_frame_ub_c[k]);
2057 
2058 			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2059 					myPipe[k].VRatioChroma,
2060 					myPipe[k].VTapsChroma,
2061 					myPipe[k].InterlaceEnable,
2062 					myPipe[k].ProgressiveToInterlaceUnitInOPP,
2063 					myPipe[k].SwathHeightC,
2064 					myPipe[k].SourceRotation,
2065 					myPipe[k].ViewportStationary,
2066 					SwathWidthC[k],
2067 					myPipe[k].ViewportHeightChroma,
2068 					myPipe[k].ViewportXStartC,
2069 					myPipe[k].ViewportYStartC,
2070 
2071 					/* Output */
2072 					&VInitPreFillC[k],
2073 					&MaxNumSwathC[k]);
2074 		} else {
2075 			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2076 			PTEBufferSizeInRequestsForChroma[k] = 0;
2077 			PixelPTEBytesPerRowC[k] = 0;
2078 			PDEAndMetaPTEBytesFrameC = 0;
2079 			MetaRowByteC[k] = 0;
2080 			MaxNumSwathC[k] = 0;
2081 			PrefetchSourceLinesC[k] = 0;
2082 			dpte_row_height_chroma_one_row_per_frame[k] = 0;
2083 			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2084 			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2085 		}
2086 
2087 		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2088 				myPipe[k].ViewportStationary,
2089 				myPipe[k].DCCEnable,
2090 				myPipe[k].DPPPerSurface,
2091 				myPipe[k].BlockHeight256BytesY,
2092 				myPipe[k].BlockWidth256BytesY,
2093 				myPipe[k].SourcePixelFormat,
2094 				myPipe[k].SurfaceTiling,
2095 				myPipe[k].BytePerPixelY,
2096 				myPipe[k].SourceRotation,
2097 				SwathWidthY[k],
2098 				myPipe[k].ViewportHeight,
2099 				myPipe[k].ViewportXStart,
2100 				myPipe[k].ViewportYStart,
2101 				GPUVMEnable,
2102 				HostVMEnable,
2103 				HostVMMaxNonCachedPageTableLevels,
2104 				GPUVMMaxPageTableLevels,
2105 				GPUVMMinPageSizeKBytes[k],
2106 				HostVMMinPageSize,
2107 				PTEBufferSizeInRequestsForLuma[k],
2108 				myPipe[k].PitchY,
2109 				myPipe[k].DCCMetaPitchY,
2110 				myPipe[k].BlockWidthY,
2111 				myPipe[k].BlockHeightY,
2112 
2113 				/* Output */
2114 				&MetaRowByteY[k],
2115 				&PixelPTEBytesPerRowY[k],
2116 				&dpte_row_width_luma_ub[k],
2117 				&dpte_row_height_luma[k],
2118 				&dpte_row_height_linear_luma[k],
2119 				&PixelPTEBytesPerRowY_one_row_per_frame[k],
2120 				&dpte_row_width_luma_ub_one_row_per_frame[k],
2121 				&dpte_row_height_luma_one_row_per_frame[k],
2122 				&meta_req_width[k],
2123 				&meta_req_height[k],
2124 				&meta_row_width[k],
2125 				&meta_row_height[k],
2126 				&PixelPTEReqWidthY[k],
2127 				&PixelPTEReqHeightY[k],
2128 				&PTERequestSizeY[k],
2129 				&dpde0_bytes_per_frame_ub_l[k],
2130 				&meta_pte_bytes_per_frame_ub_l[k]);
2131 
2132 		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2133 				myPipe[k].VRatio,
2134 				myPipe[k].VTaps,
2135 				myPipe[k].InterlaceEnable,
2136 				myPipe[k].ProgressiveToInterlaceUnitInOPP,
2137 				myPipe[k].SwathHeightY,
2138 				myPipe[k].SourceRotation,
2139 				myPipe[k].ViewportStationary,
2140 				SwathWidthY[k],
2141 				myPipe[k].ViewportHeight,
2142 				myPipe[k].ViewportXStart,
2143 				myPipe[k].ViewportYStart,
2144 
2145 				/* Output */
2146 				&VInitPreFillY[k],
2147 				&MaxNumSwathY[k]);
2148 
2149 		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2150 		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2151 
2152 		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2153 				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2154 			PTEBufferSizeNotExceeded[k] = true;
2155 		} else {
2156 			PTEBufferSizeNotExceeded[k] = false;
2157 		}
2158 
2159 		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2160 			PTEBufferSizeInRequestsForLuma[k] &&
2161 			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2162 	}
2163 
2164 	dml32_CalculateMALLUseForStaticScreen(
2165 			NumberOfActiveSurfaces,
2166 			MALLAllocatedForDCN,
2167 			UseMALLForStaticScreen,   // mode
2168 			SurfaceSizeInMALL,
2169 			one_row_per_frame_fits_in_buffer,
2170 			/* Output */
2171 			UsesMALLForStaticScreen); // boolen
2172 
2173 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2174 		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2175 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2176 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2177 				(GPUVMMinPageSizeKBytes[k] > 64);
2178 		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2179 	}
2180 
2181 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2182 #ifdef __DML_VBA_DEBUG__
2183 		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2184 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2185 #endif
2186 		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2187 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2188 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2189 				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2190 
2191 		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2192 				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2193 
2194 		if (use_one_row_for_frame[k]) {
2195 			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2196 			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2197 			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2198 			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2199 			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2200 			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2201 			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2202 		}
2203 
2204 		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2205 			DCCMetaBufferSizeNotExceeded[k] = true;
2206 		else
2207 			DCCMetaBufferSizeNotExceeded[k] = false;
2208 
2209 		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2210 		if (use_one_row_for_frame[k])
2211 			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2212 
2213 		dml32_CalculateRowBandwidth(
2214 				GPUVMEnable,
2215 				myPipe[k].SourcePixelFormat,
2216 				myPipe[k].VRatio,
2217 				myPipe[k].VRatioChroma,
2218 				myPipe[k].DCCEnable,
2219 				myPipe[k].HTotal / myPipe[k].PixelClock,
2220 				MetaRowByteY[k], MetaRowByteC[k],
2221 				meta_row_height[k],
2222 				meta_row_height_chroma[k],
2223 				PixelPTEBytesPerRowY[k],
2224 				PixelPTEBytesPerRowC[k],
2225 				dpte_row_height_luma[k],
2226 				dpte_row_height_chroma[k],
2227 
2228 				/* Output */
2229 				&meta_row_bw[k],
2230 				&dpte_row_bw[k]);
2231 #ifdef __DML_VBA_DEBUG__
2232 		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2233 		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2234 				__func__, k, use_one_row_for_frame_flip[k]);
2235 		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2236 				__func__, k, UseMALLForPStateChange[k]);
2237 		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2238 		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2239 				__func__, k, dpte_row_width_luma_ub[k]);
2240 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2241 		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2242 				__func__, k, dpte_row_height_chroma[k]);
2243 		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2244 				__func__, k, dpte_row_width_chroma_ub[k]);
2245 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2246 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2247 		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2248 				__func__, k, PTEBufferSizeNotExceeded[k]);
2249 		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2250 		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2251 #endif
2252 	}
2253 } // CalculateVMRowAndSwath
2254 
2255 unsigned int dml32_CalculateVMAndRowBytes(
2256 		bool ViewportStationary,
2257 		bool DCCEnable,
2258 		unsigned int NumberOfDPPs,
2259 		unsigned int BlockHeight256Bytes,
2260 		unsigned int BlockWidth256Bytes,
2261 		enum source_format_class SourcePixelFormat,
2262 		unsigned int SurfaceTiling,
2263 		unsigned int BytePerPixel,
2264 		enum dm_rotation_angle SourceRotation,
2265 		double SwathWidth,
2266 		unsigned int ViewportHeight,
2267 		unsigned int    ViewportXStart,
2268 		unsigned int    ViewportYStart,
2269 		bool GPUVMEnable,
2270 		bool HostVMEnable,
2271 		unsigned int HostVMMaxNonCachedPageTableLevels,
2272 		unsigned int GPUVMMaxPageTableLevels,
2273 		unsigned int GPUVMMinPageSizeKBytes,
2274 		unsigned int HostVMMinPageSize,
2275 		unsigned int PTEBufferSizeInRequests,
2276 		unsigned int Pitch,
2277 		unsigned int DCCMetaPitch,
2278 		unsigned int MacroTileWidth,
2279 		unsigned int MacroTileHeight,
2280 
2281 		/* Output */
2282 		unsigned int *MetaRowByte,
2283 		unsigned int *PixelPTEBytesPerRow,
2284 		unsigned int    *dpte_row_width_ub,
2285 		unsigned int *dpte_row_height,
2286 		unsigned int *dpte_row_height_linear,
2287 		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2288 		unsigned int    *dpte_row_width_ub_one_row_per_frame,
2289 		unsigned int    *dpte_row_height_one_row_per_frame,
2290 		unsigned int *MetaRequestWidth,
2291 		unsigned int *MetaRequestHeight,
2292 		unsigned int *meta_row_width,
2293 		unsigned int *meta_row_height,
2294 		unsigned int *PixelPTEReqWidth,
2295 		unsigned int *PixelPTEReqHeight,
2296 		unsigned int *PTERequestSize,
2297 		unsigned int    *DPDE0BytesFrame,
2298 		unsigned int    *MetaPTEBytesFrame)
2299 {
2300 	unsigned int MPDEBytesFrame;
2301 	unsigned int DCCMetaSurfaceBytes;
2302 	unsigned int ExtraDPDEBytesFrame;
2303 	unsigned int PDEAndMetaPTEBytesFrame;
2304 	unsigned int HostVMDynamicLevels = 0;
2305 	unsigned int    MacroTileSizeBytes;
2306 	unsigned int    vp_height_meta_ub;
2307 	unsigned int    vp_height_dpte_ub;
2308 	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2309 
2310 	if (GPUVMEnable == true && HostVMEnable == true) {
2311 		if (HostVMMinPageSize < 2048)
2312 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2313 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2314 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2315 		else
2316 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2317 	}
2318 
2319 	*MetaRequestHeight = 8 * BlockHeight256Bytes;
2320 	*MetaRequestWidth = 8 * BlockWidth256Bytes;
2321 	if (SurfaceTiling == dm_sw_linear) {
2322 		*meta_row_height = 32;
2323 		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2324 				- dml_floor(ViewportXStart, *MetaRequestWidth);
2325 	} else if (!IsVertical(SourceRotation)) {
2326 		*meta_row_height = *MetaRequestHeight;
2327 		if (ViewportStationary && NumberOfDPPs == 1) {
2328 			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2329 					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2330 		} else {
2331 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2332 		}
2333 		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2334 	} else {
2335 		*meta_row_height = *MetaRequestWidth;
2336 		if (ViewportStationary && NumberOfDPPs == 1) {
2337 			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2338 					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2339 		} else {
2340 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2341 		}
2342 		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2343 	}
2344 
2345 	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2346 		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2347 				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2348 	} else if (!IsVertical(SourceRotation)) {
2349 		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2350 	} else {
2351 		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2352 	}
2353 
2354 	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2355 
2356 	if (GPUVMEnable == true) {
2357 		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2358 				(8 * 4.0 * 1024), 1) + 1) * 64;
2359 		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2360 	} else {
2361 		*MetaPTEBytesFrame = 0;
2362 		MPDEBytesFrame = 0;
2363 	}
2364 
2365 	if (DCCEnable != true) {
2366 		*MetaPTEBytesFrame = 0;
2367 		MPDEBytesFrame = 0;
2368 		*MetaRowByte = 0;
2369 	}
2370 
2371 	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2372 
2373 	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2374 		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2375 			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2376 					MacroTileHeight - 1, MacroTileHeight) -
2377 					dml_floor(ViewportYStart, MacroTileHeight);
2378 		} else if (!IsVertical(SourceRotation)) {
2379 			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2380 		} else {
2381 			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2382 		}
2383 		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2384 				(8 * 2097152), 1) + 1);
2385 		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2386 	} else {
2387 		*DPDE0BytesFrame = 0;
2388 		ExtraDPDEBytesFrame = 0;
2389 		vp_height_dpte_ub = 0;
2390 	}
2391 
2392 	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2393 
2394 #ifdef __DML_VBA_DEBUG__
2395 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2396 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2397 	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2398 	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2399 	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2400 	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2401 	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2402 	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2403 	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2404 	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2405 	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2406 	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2407 	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2408 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2409 	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2410 	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2411 	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2412 #endif
2413 
2414 	if (HostVMEnable == true)
2415 		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2416 
2417 	if (SurfaceTiling == dm_sw_linear) {
2418 		*PixelPTEReqHeight = 1;
2419 		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2420 		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2421 		*PTERequestSize = 64;
2422 	} else if (GPUVMMinPageSizeKBytes == 4) {
2423 		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2424 		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2425 		*PTERequestSize = 128;
2426 	} else {
2427 		*PixelPTEReqHeight = MacroTileHeight;
2428 		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2429 		*PTERequestSize = 64;
2430 	}
2431 #ifdef __DML_VBA_DEBUG__
2432 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2433 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2434 	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2435 	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2436 	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2437 	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2438 	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2439 #endif
2440 
2441 	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2442 	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2443 			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2444 					(double) *PixelPTEReqWidth;
2445 	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2446 			*PTERequestSize;
2447 
2448 	if (SurfaceTiling == dm_sw_linear) {
2449 		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2450 				*PixelPTEReqWidth / Pitch), 1));
2451 #ifdef __DML_VBA_DEBUG__
2452 		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2453 				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2454 		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2455 				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2456 		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2457 				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2458 		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2459 				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2460 						*PixelPTEReqWidth / Pitch), 1));
2461 		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2462 #endif
2463 		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2464 				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2465 		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2466 
2467 		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2468 		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2469 				PixelPTEReqWidth_linear / Pitch), 1);
2470 		if (*dpte_row_height_linear > 128)
2471 			*dpte_row_height_linear = 128;
2472 
2473 	} else if (!IsVertical(SourceRotation)) {
2474 		*dpte_row_height = *PixelPTEReqHeight;
2475 
2476 		if (GPUVMMinPageSizeKBytes > 64) {
2477 			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2478 					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2479 		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
2480 			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2481 					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2482 					dml_floor(ViewportXStart, *PixelPTEReqWidth);
2483 		} else {
2484 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2485 					*PixelPTEReqWidth;
2486 		}
2487 
2488 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2489 	} else {
2490 		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2491 
2492 		if (ViewportStationary && (NumberOfDPPs == 1)) {
2493 			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2494 					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2495 		} else {
2496 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2497 					* *PixelPTEReqHeight;
2498 		}
2499 
2500 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2501 	}
2502 
2503 	if (GPUVMEnable != true)
2504 		*PixelPTEBytesPerRow = 0;
2505 	if (HostVMEnable == true)
2506 		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2507 
2508 #ifdef __DML_VBA_DEBUG__
2509 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2510 	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2511 	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2512 	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2513 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2514 	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2515 	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2516 	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2517 			__func__, *dpte_row_width_ub_one_row_per_frame);
2518 	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2519 			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
2520 	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2521 			*MetaPTEBytesFrame);
2522 #endif
2523 
2524 	return PDEAndMetaPTEBytesFrame;
2525 } // CalculateVMAndRowBytes
2526 
2527 double dml32_CalculatePrefetchSourceLines(
2528 		double VRatio,
2529 		unsigned int VTaps,
2530 		bool Interlace,
2531 		bool ProgressiveToInterlaceUnitInOPP,
2532 		unsigned int SwathHeight,
2533 		enum dm_rotation_angle SourceRotation,
2534 		bool ViewportStationary,
2535 		double SwathWidth,
2536 		unsigned int ViewportHeight,
2537 		unsigned int ViewportXStart,
2538 		unsigned int ViewportYStart,
2539 
2540 		/* Output */
2541 		double *VInitPreFill,
2542 		unsigned int *MaxNumSwath)
2543 {
2544 
2545 	unsigned int vp_start_rot;
2546 	unsigned int sw0_tmp;
2547 	unsigned int MaxPartialSwath;
2548 	double numLines;
2549 
2550 #ifdef __DML_VBA_DEBUG__
2551 	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2552 	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2553 	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2554 	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2555 	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2556 	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2557 #endif
2558 	if (ProgressiveToInterlaceUnitInOPP)
2559 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2560 	else
2561 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2562 
2563 	if (ViewportStationary) {
2564 		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2565 			vp_start_rot = SwathHeight -
2566 					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2567 		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2568 			vp_start_rot = ViewportXStart;
2569 		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2570 			vp_start_rot = SwathHeight -
2571 					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2572 		} else {
2573 			vp_start_rot = ViewportYStart;
2574 		}
2575 		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2576 		if (sw0_tmp < *VInitPreFill)
2577 			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2578 		else
2579 			*MaxNumSwath = 1;
2580 		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2581 	} else {
2582 		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2583 		if (*VInitPreFill > 1)
2584 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2585 		else
2586 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2587 	}
2588 	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2589 
2590 #ifdef __DML_VBA_DEBUG__
2591 	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2592 	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2593 	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2594 	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2595 	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2596 #endif
2597 	return numLines;
2598 
2599 } // CalculatePrefetchSourceLines
2600 
2601 void dml32_CalculateMALLUseForStaticScreen(
2602 		unsigned int NumberOfActiveSurfaces,
2603 		unsigned int MALLAllocatedForDCNFinal,
2604 		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2605 		unsigned int SurfaceSizeInMALL[],
2606 		bool one_row_per_frame_fits_in_buffer[],
2607 
2608 		/* output */
2609 		bool UsesMALLForStaticScreen[])
2610 {
2611 	unsigned int k;
2612 	unsigned int SurfaceToAddToMALL;
2613 	bool CanAddAnotherSurfaceToMALL;
2614 	unsigned int TotalSurfaceSizeInMALL;
2615 
2616 	TotalSurfaceSizeInMALL = 0;
2617 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2618 		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2619 		if (UsesMALLForStaticScreen[k])
2620 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2621 #ifdef __DML_VBA_DEBUG__
2622 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2623 		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2624 #endif
2625 	}
2626 
2627 	SurfaceToAddToMALL = 0;
2628 	CanAddAnotherSurfaceToMALL = true;
2629 	while (CanAddAnotherSurfaceToMALL) {
2630 		CanAddAnotherSurfaceToMALL = false;
2631 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2632 			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2633 					!UsesMALLForStaticScreen[k] &&
2634 					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2635 					one_row_per_frame_fits_in_buffer[k] &&
2636 					(!CanAddAnotherSurfaceToMALL ||
2637 					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2638 				CanAddAnotherSurfaceToMALL = true;
2639 				SurfaceToAddToMALL = k;
2640 #ifdef __DML_VBA_DEBUG__
2641 				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2642 						__func__, k, UseMALLForStaticScreen[k]);
2643 #endif
2644 			}
2645 		}
2646 		if (CanAddAnotherSurfaceToMALL) {
2647 			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2648 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2649 
2650 #ifdef __DML_VBA_DEBUG__
2651 			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2652 			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2653 #endif
2654 
2655 		}
2656 	}
2657 }
2658 
2659 void dml32_CalculateRowBandwidth(
2660 		bool GPUVMEnable,
2661 		enum source_format_class SourcePixelFormat,
2662 		double VRatio,
2663 		double VRatioChroma,
2664 		bool DCCEnable,
2665 		double LineTime,
2666 		unsigned int MetaRowByteLuma,
2667 		unsigned int MetaRowByteChroma,
2668 		unsigned int meta_row_height_luma,
2669 		unsigned int meta_row_height_chroma,
2670 		unsigned int PixelPTEBytesPerRowLuma,
2671 		unsigned int PixelPTEBytesPerRowChroma,
2672 		unsigned int dpte_row_height_luma,
2673 		unsigned int dpte_row_height_chroma,
2674 		/* Output */
2675 		double *meta_row_bw,
2676 		double *dpte_row_bw)
2677 {
2678 	if (DCCEnable != true) {
2679 		*meta_row_bw = 0;
2680 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2681 			SourcePixelFormat == dm_rgbe_alpha) {
2682 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2683 				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2684 	} else {
2685 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2686 	}
2687 
2688 	if (GPUVMEnable != true) {
2689 		*dpte_row_bw = 0;
2690 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2691 			SourcePixelFormat == dm_rgbe_alpha) {
2692 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2693 				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2694 	} else {
2695 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2696 	}
2697 }
2698 
2699 double dml32_CalculateUrgentLatency(
2700 		double UrgentLatencyPixelDataOnly,
2701 		double UrgentLatencyPixelMixedWithVMData,
2702 		double UrgentLatencyVMDataOnly,
2703 		bool   DoUrgentLatencyAdjustment,
2704 		double UrgentLatencyAdjustmentFabricClockComponent,
2705 		double UrgentLatencyAdjustmentFabricClockReference,
2706 		double FabricClock)
2707 {
2708 	double   ret;
2709 
2710 	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2711 	if (DoUrgentLatencyAdjustment == true) {
2712 		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2713 				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2714 	}
2715 	return ret;
2716 }
2717 
2718 void dml32_CalculateUrgentBurstFactor(
2719 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2720 		unsigned int    swath_width_luma_ub,
2721 		unsigned int    swath_width_chroma_ub,
2722 		unsigned int SwathHeightY,
2723 		unsigned int SwathHeightC,
2724 		double  LineTime,
2725 		double  UrgentLatency,
2726 		double  CursorBufferSize,
2727 		unsigned int CursorWidth,
2728 		unsigned int CursorBPP,
2729 		double  VRatio,
2730 		double  VRatioC,
2731 		double  BytePerPixelInDETY,
2732 		double  BytePerPixelInDETC,
2733 		unsigned int    DETBufferSizeY,
2734 		unsigned int    DETBufferSizeC,
2735 		/* Output */
2736 		double *UrgentBurstFactorCursor,
2737 		double *UrgentBurstFactorLuma,
2738 		double *UrgentBurstFactorChroma,
2739 		bool   *NotEnoughUrgentLatencyHiding)
2740 {
2741 	double       LinesInDETLuma;
2742 	double       LinesInDETChroma;
2743 	unsigned int LinesInCursorBuffer;
2744 	double       CursorBufferSizeInTime;
2745 	double       DETBufferSizeInTimeLuma;
2746 	double       DETBufferSizeInTimeChroma;
2747 
2748 	*NotEnoughUrgentLatencyHiding = 0;
2749 
2750 	if (CursorWidth > 0) {
2751 		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2752 				(CursorWidth * CursorBPP / 8.0)), 1.0);
2753 		if (VRatio > 0) {
2754 			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2755 			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2756 				*NotEnoughUrgentLatencyHiding = 1;
2757 				*UrgentBurstFactorCursor = 0;
2758 			} else {
2759 				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
2760 						(CursorBufferSizeInTime - UrgentLatency);
2761 			}
2762 		} else {
2763 			*UrgentBurstFactorCursor = 1;
2764 		}
2765 	}
2766 
2767 	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2768 			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2769 
2770 	if (VRatio > 0) {
2771 		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2772 		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2773 			*NotEnoughUrgentLatencyHiding = 1;
2774 			*UrgentBurstFactorLuma = 0;
2775 		} else {
2776 			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2777 		}
2778 	} else {
2779 		*UrgentBurstFactorLuma = 1;
2780 	}
2781 
2782 	if (BytePerPixelInDETC > 0) {
2783 		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2784 					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2785 					/ swath_width_chroma_ub;
2786 
2787 		if (VRatio > 0) {
2788 			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2789 			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2790 				*NotEnoughUrgentLatencyHiding = 1;
2791 				*UrgentBurstFactorChroma = 0;
2792 			} else {
2793 				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2794 						/ (DETBufferSizeInTimeChroma - UrgentLatency);
2795 			}
2796 		} else {
2797 			*UrgentBurstFactorChroma = 1;
2798 		}
2799 	}
2800 } // CalculateUrgentBurstFactor
2801 
2802 void dml32_CalculateDCFCLKDeepSleep(
2803 		unsigned int NumberOfActiveSurfaces,
2804 		unsigned int BytePerPixelY[],
2805 		unsigned int BytePerPixelC[],
2806 		double VRatio[],
2807 		double VRatioChroma[],
2808 		double SwathWidthY[],
2809 		double SwathWidthC[],
2810 		unsigned int DPPPerSurface[],
2811 		double HRatio[],
2812 		double HRatioChroma[],
2813 		double PixelClock[],
2814 		double PSCL_THROUGHPUT[],
2815 		double PSCL_THROUGHPUT_CHROMA[],
2816 		double Dppclk[],
2817 		double ReadBandwidthLuma[],
2818 		double ReadBandwidthChroma[],
2819 		unsigned int ReturnBusWidth,
2820 
2821 		/* Output */
2822 		double *DCFClkDeepSleep)
2823 {
2824 	unsigned int k;
2825 	double   DisplayPipeLineDeliveryTimeLuma;
2826 	double   DisplayPipeLineDeliveryTimeChroma;
2827 	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2828 	double ReadBandwidth = 0.0;
2829 
2830 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2831 
2832 		if (VRatio[k] <= 1) {
2833 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2834 					/ PixelClock[k];
2835 		} else {
2836 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2837 		}
2838 		if (BytePerPixelC[k] == 0) {
2839 			DisplayPipeLineDeliveryTimeChroma = 0;
2840 		} else {
2841 			if (VRatioChroma[k] <= 1) {
2842 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2843 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2844 			} else {
2845 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2846 						/ Dppclk[k];
2847 			}
2848 		}
2849 
2850 		if (BytePerPixelC[k] > 0) {
2851 			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2852 					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2853 					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2854 					32.0 / DisplayPipeLineDeliveryTimeChroma);
2855 		} else {
2856 			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2857 					64.0 / DisplayPipeLineDeliveryTimeLuma;
2858 		}
2859 		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2860 
2861 #ifdef __DML_VBA_DEBUG__
2862 		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2863 		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2864 #endif
2865 	}
2866 
2867 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2868 		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2869 
2870 	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2871 
2872 #ifdef __DML_VBA_DEBUG__
2873 	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2874 	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2875 	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2876 	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2877 #endif
2878 
2879 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2880 		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2881 #ifdef __DML_VBA_DEBUG__
2882 	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2883 #endif
2884 } // CalculateDCFCLKDeepSleep
2885 
2886 double dml32_CalculateWriteBackDelay(
2887 		enum source_format_class WritebackPixelFormat,
2888 		double WritebackHRatio,
2889 		double WritebackVRatio,
2890 		unsigned int WritebackVTaps,
2891 		unsigned int         WritebackDestinationWidth,
2892 		unsigned int         WritebackDestinationHeight,
2893 		unsigned int         WritebackSourceHeight,
2894 		unsigned int HTotal)
2895 {
2896 	double CalculateWriteBackDelay;
2897 	double Line_length;
2898 	double Output_lines_last_notclamped;
2899 	double WritebackVInit;
2900 
2901 	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2902 	Line_length = dml_max((double) WritebackDestinationWidth,
2903 			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2904 	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2905 			dml_ceil(((double)WritebackSourceHeight -
2906 					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2907 	if (Output_lines_last_notclamped < 0) {
2908 		CalculateWriteBackDelay = 0;
2909 	} else {
2910 		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2911 				(HTotal - WritebackDestinationWidth) + 80;
2912 	}
2913 	return CalculateWriteBackDelay;
2914 }
2915 
2916 void dml32_UseMinimumDCFCLK(
2917 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2918 		bool DRRDisplay[],
2919 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2920 		unsigned int MaxInterDCNTileRepeaters,
2921 		unsigned int MaxPrefetchMode,
2922 		double DRAMClockChangeLatencyFinal,
2923 		double FCLKChangeLatency,
2924 		double SREnterPlusExitTime,
2925 		unsigned int ReturnBusWidth,
2926 		unsigned int RoundTripPingLatencyCycles,
2927 		unsigned int ReorderingBytes,
2928 		unsigned int PixelChunkSizeInKByte,
2929 		unsigned int MetaChunkSize,
2930 		bool GPUVMEnable,
2931 		unsigned int GPUVMMaxPageTableLevels,
2932 		bool HostVMEnable,
2933 		unsigned int NumberOfActiveSurfaces,
2934 		double HostVMMinPageSize,
2935 		unsigned int HostVMMaxNonCachedPageTableLevels,
2936 		bool DynamicMetadataVMEnabled,
2937 		bool ImmediateFlipRequirement,
2938 		bool ProgressiveToInterlaceUnitInOPP,
2939 		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2940 		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2941 		unsigned int VTotal[],
2942 		unsigned int VActive[],
2943 		unsigned int DynamicMetadataTransmittedBytes[],
2944 		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2945 		bool Interlace[],
2946 		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2947 		double RequiredDISPCLK[][2],
2948 		double UrgLatency[],
2949 		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2950 		double ProjectedDCFClkDeepSleep[][2],
2951 		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2952 		unsigned int TotalNumberOfActiveDPP[][2],
2953 		unsigned int TotalNumberOfDCCActiveDPP[][2],
2954 		unsigned int dpte_group_bytes[],
2955 		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2956 		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2957 		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2958 		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2959 		unsigned int BytePerPixelY[],
2960 		unsigned int BytePerPixelC[],
2961 		unsigned int HTotal[],
2962 		double PixelClock[],
2963 		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2964 		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2965 		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2966 		bool DynamicMetadataEnable[],
2967 		double ReadBandwidthLuma[],
2968 		double ReadBandwidthChroma[],
2969 		double DCFCLKPerState[],
2970 		/* Output */
2971 		double DCFCLKState[][2])
2972 {
2973 	unsigned int i, j, k;
2974 	unsigned int     dummy1;
2975 	double dummy2, dummy3;
2976 	double   NormalEfficiency;
2977 	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2978 
2979 	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2980 	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2981 		for  (j = 0; j <= 1; ++j) {
2982 			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2983 			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2984 			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2985 			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2986 			double MinimumTWait = 0.0;
2987 			double DPTEBandwidth;
2988 			double DCFCLKRequiredForAverageBandwidth;
2989 			unsigned int ExtraLatencyBytes;
2990 			double ExtraLatencyCycles;
2991 			double DCFCLKRequiredForPeakBandwidth;
2992 			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2993 			double MinimumTvmPlus2Tr0;
2994 
2995 			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2996 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2997 				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2998 						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2999 								/ (15.75 * HTotal[k] / PixelClock[k]);
3000 			}
3001 
3002 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3003 				NoOfDPPState[k] = NoOfDPP[i][j][k];
3004 
3005 			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3006 			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3007 
3008 			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3009 					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3010 					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3011 					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3012 					HostVMMaxNonCachedPageTableLevels);
3013 			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3014 					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3015 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3016 				double DCFCLKCyclesRequiredInPrefetch;
3017 				double PrefetchTime;
3018 
3019 				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3020 						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3021 						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3022 								* BytePerPixelC[k]) / NormalEfficiency
3023 						/ ReturnBusWidth;
3024 				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3025 						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3026 								/ NormalEfficiency / ReturnBusWidth
3027 								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3028 						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3029 								/ ReturnBusWidth
3030 						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3031 						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
3032 				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3033 						* HTotal[k] / PixelClock[k];
3034 				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3035 						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3036 						UrgLatency[i] * GPUVMMaxPageTableLevels *
3037 						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3038 
3039 				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3040 						UseMALLForPStateChange[k],
3041 						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3042 						DRRDisplay[k],
3043 						DRAMClockChangeLatencyFinal,
3044 						FCLKChangeLatency,
3045 						UrgLatency[i],
3046 						SREnterPlusExitTime);
3047 
3048 				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3049 						MinimumTWait - UrgLatency[i] *
3050 						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3051 						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3052 						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3053 						DynamicMetadataVMExtraLatency[k];
3054 
3055 				if (PrefetchTime > 0) {
3056 					double ExpectedVRatioPrefetch;
3057 
3058 					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3059 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3060 							DCFCLKCyclesRequiredInPrefetch);
3061 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3062 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3063 							PrefetchPixelLinesTime[k] *
3064 							dml_max(1.0, ExpectedVRatioPrefetch) *
3065 							dml_max(1.0, ExpectedVRatioPrefetch / 4);
3066 					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3067 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3068 								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3069 								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3070 								NormalEfficiency / ReturnBusWidth;
3071 					}
3072 				} else {
3073 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3074 				}
3075 				if (DynamicMetadataEnable[k] == true) {
3076 					double TSetupPipe;
3077 					double TdmbfPipe;
3078 					double TdmsksPipe;
3079 					double TdmecPipe;
3080 					double AllowedTimeForUrgentExtraLatency;
3081 
3082 					dml32_CalculateVUpdateAndDynamicMetadataParameters(
3083 							MaxInterDCNTileRepeaters,
3084 							RequiredDPPCLKPerSurface[i][j][k],
3085 							RequiredDISPCLK[i][j],
3086 							ProjectedDCFClkDeepSleep[i][j],
3087 							PixelClock[k],
3088 							HTotal[k],
3089 							VTotal[k] - VActive[k],
3090 							DynamicMetadataTransmittedBytes[k],
3091 							DynamicMetadataLinesBeforeActiveRequired[k],
3092 							Interlace[k],
3093 							ProgressiveToInterlaceUnitInOPP,
3094 
3095 							/* output */
3096 							&TSetupPipe,
3097 							&TdmbfPipe,
3098 							&TdmecPipe,
3099 							&TdmsksPipe,
3100 							&dummy1,
3101 							&dummy2,
3102 							&dummy3);
3103 					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3104 							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3105 							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3106 					if (AllowedTimeForUrgentExtraLatency > 0)
3107 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3108 								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3109 								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3110 					else
3111 						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3112 				}
3113 			}
3114 			DCFCLKRequiredForPeakBandwidth = 0;
3115 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3116 				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3117 						DCFCLKRequiredForPeakBandwidthPerSurface[k];
3118 			}
3119 			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3120 					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3121 					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3122 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3123 				double MaximumTvmPlus2Tr0PlusTsw;
3124 
3125 				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3126 						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3127 				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3128 					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3129 				} else {
3130 					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3131 							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3132 								MinimumTvmPlus2Tr0 -
3133 								PrefetchPixelLinesTime[k] / 4),
3134 							(2 * ExtraLatencyCycles +
3135 								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3136 								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3137 				}
3138 			}
3139 			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3140 					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3141 		}
3142 	}
3143 }
3144 
3145 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3146 		unsigned int TotalNumberOfActiveDPP,
3147 		unsigned int PixelChunkSizeInKByte,
3148 		unsigned int TotalNumberOfDCCActiveDPP,
3149 		unsigned int MetaChunkSize,
3150 		bool GPUVMEnable,
3151 		bool HostVMEnable,
3152 		unsigned int NumberOfActiveSurfaces,
3153 		unsigned int NumberOfDPP[],
3154 		unsigned int dpte_group_bytes[],
3155 		double HostVMInefficiencyFactor,
3156 		double HostVMMinPageSize,
3157 		unsigned int HostVMMaxNonCachedPageTableLevels)
3158 {
3159 	unsigned int k;
3160 	double   ret;
3161 	unsigned int  HostVMDynamicLevels;
3162 
3163 	if (GPUVMEnable == true && HostVMEnable == true) {
3164 		if (HostVMMinPageSize < 2048)
3165 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3166 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3167 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3168 		else
3169 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3170 	} else {
3171 		HostVMDynamicLevels = 0;
3172 	}
3173 
3174 	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3175 			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3176 
3177 	if (GPUVMEnable == true) {
3178 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3179 			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3180 					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3181 		}
3182 	}
3183 	return ret;
3184 }
3185 
3186 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3187 		unsigned int MaxInterDCNTileRepeaters,
3188 		double Dppclk,
3189 		double Dispclk,
3190 		double DCFClkDeepSleep,
3191 		double PixelClock,
3192 		unsigned int HTotal,
3193 		unsigned int VBlank,
3194 		unsigned int DynamicMetadataTransmittedBytes,
3195 		unsigned int DynamicMetadataLinesBeforeActiveRequired,
3196 		unsigned int InterlaceEnable,
3197 		bool ProgressiveToInterlaceUnitInOPP,
3198 
3199 		/* output */
3200 		double *TSetup,
3201 		double *Tdmbf,
3202 		double *Tdmec,
3203 		double *Tdmsks,
3204 		unsigned int *VUpdateOffsetPix,
3205 		double *VUpdateWidthPix,
3206 		double *VReadyOffsetPix)
3207 {
3208 	double TotalRepeaterDelayTime;
3209 
3210 	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3211 	*VUpdateWidthPix  =
3212 			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3213 	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3214 			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3215 	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3216 	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3217 	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3218 	*Tdmec = HTotal / PixelClock;
3219 
3220 	if (DynamicMetadataLinesBeforeActiveRequired == 0)
3221 		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3222 	else
3223 		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3224 
3225 	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3226 		*Tdmsks = *Tdmsks / 2;
3227 #ifdef __DML_VBA_DEBUG__
3228 	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3229 	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3230 	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3231 
3232 	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3233 			__func__, DynamicMetadataLinesBeforeActiveRequired);
3234 	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3235 	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3236 	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3237 	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3238 #endif
3239 }
3240 
3241 double dml32_CalculateTWait(
3242 		unsigned int PrefetchMode,
3243 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3244 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3245 		bool DRRDisplay,
3246 		double DRAMClockChangeLatency,
3247 		double FCLKChangeLatency,
3248 		double UrgentLatency,
3249 		double SREnterPlusExitTime)
3250 {
3251 	double TWait = 0.0;
3252 
3253 	if (PrefetchMode == 0 &&
3254 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3255 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3256 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3257 			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3258 		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3259 	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3260 		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3261 	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3262 		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3263 	} else {
3264 		TWait = UrgentLatency;
3265 	}
3266 
3267 #ifdef __DML_VBA_DEBUG__
3268 	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3269 	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3270 #endif
3271 	return TWait;
3272 } // CalculateTWait
3273 
3274 // Function: get_return_bw_mbps
3275 // Megabyte per second
3276 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3277 		const int VoltageLevel,
3278 		const bool HostVMEnable,
3279 		const double DCFCLK,
3280 		const double FabricClock,
3281 		const double DRAMSpeed)
3282 {
3283 	double ReturnBW = 0.;
3284 	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3285 	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3286 	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3287 	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3288 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3289 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3290 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3291 	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3292 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3293 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3294 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3295 
3296 	if (HostVMEnable != true)
3297 		ReturnBW = PixelDataOnlyReturnBW;
3298 	else
3299 		ReturnBW = PixelMixedWithVMDataReturnBW;
3300 
3301 #ifdef __DML_VBA_DEBUG__
3302 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3303 	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3304 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3305 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3306 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3307 	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3308 	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3309 	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3310 	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3311 	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3312 	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3313 #endif
3314 	return ReturnBW;
3315 }
3316 
3317 // Function: get_return_bw_mbps_vm_only
3318 // Megabyte per second
3319 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3320 		const int VoltageLevel,
3321 		const double DCFCLK,
3322 		const double FabricClock,
3323 		const double DRAMSpeed)
3324 {
3325 	double VMDataOnlyReturnBW = dml_min3(
3326 			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3327 			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3328 					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3329 			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3330 					* (VoltageLevel < 2 ?
3331 							soc->pct_ideal_dram_bw_after_urgent_strobe :
3332 							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3333 #ifdef __DML_VBA_DEBUG__
3334 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3335 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3336 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3337 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3338 	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3339 #endif
3340 	return VMDataOnlyReturnBW;
3341 }
3342 
3343 double dml32_CalculateExtraLatency(
3344 		unsigned int RoundTripPingLatencyCycles,
3345 		unsigned int ReorderingBytes,
3346 		double DCFCLK,
3347 		unsigned int TotalNumberOfActiveDPP,
3348 		unsigned int PixelChunkSizeInKByte,
3349 		unsigned int TotalNumberOfDCCActiveDPP,
3350 		unsigned int MetaChunkSize,
3351 		double ReturnBW,
3352 		bool GPUVMEnable,
3353 		bool HostVMEnable,
3354 		unsigned int NumberOfActiveSurfaces,
3355 		unsigned int NumberOfDPP[],
3356 		unsigned int dpte_group_bytes[],
3357 		double HostVMInefficiencyFactor,
3358 		double HostVMMinPageSize,
3359 		unsigned int HostVMMaxNonCachedPageTableLevels)
3360 {
3361 	double ExtraLatencyBytes;
3362 	double ExtraLatency;
3363 
3364 	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3365 			ReorderingBytes,
3366 			TotalNumberOfActiveDPP,
3367 			PixelChunkSizeInKByte,
3368 			TotalNumberOfDCCActiveDPP,
3369 			MetaChunkSize,
3370 			GPUVMEnable,
3371 			HostVMEnable,
3372 			NumberOfActiveSurfaces,
3373 			NumberOfDPP,
3374 			dpte_group_bytes,
3375 			HostVMInefficiencyFactor,
3376 			HostVMMinPageSize,
3377 			HostVMMaxNonCachedPageTableLevels);
3378 
3379 	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3380 
3381 #ifdef __DML_VBA_DEBUG__
3382 	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3383 	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3384 	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3385 	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3386 	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3387 #endif
3388 
3389 	return ExtraLatency;
3390 } // CalculateExtraLatency
3391 
3392 bool dml32_CalculatePrefetchSchedule(
3393 		struct vba_vars_st *v,
3394 		unsigned int k,
3395 		double HostVMInefficiencyFactor,
3396 		DmlPipe *myPipe,
3397 		unsigned int DSCDelay,
3398 		unsigned int DPP_RECOUT_WIDTH,
3399 		unsigned int VStartup,
3400 		unsigned int MaxVStartup,
3401 		double UrgentLatency,
3402 		double UrgentExtraLatency,
3403 		double TCalc,
3404 		unsigned int PDEAndMetaPTEBytesFrame,
3405 		unsigned int MetaRowByte,
3406 		unsigned int PixelPTEBytesPerRow,
3407 		double PrefetchSourceLinesY,
3408 		unsigned int SwathWidthY,
3409 		unsigned int VInitPreFillY,
3410 		unsigned int MaxNumSwathY,
3411 		double PrefetchSourceLinesC,
3412 		unsigned int SwathWidthC,
3413 		unsigned int VInitPreFillC,
3414 		unsigned int MaxNumSwathC,
3415 		unsigned int swath_width_luma_ub,
3416 		unsigned int swath_width_chroma_ub,
3417 		unsigned int SwathHeightY,
3418 		unsigned int SwathHeightC,
3419 		double TWait,
3420 		double TPreReq,
3421 		/* Output */
3422 		double   *DSTXAfterScaler,
3423 		double   *DSTYAfterScaler,
3424 		double *DestinationLinesForPrefetch,
3425 		double *PrefetchBandwidth,
3426 		double *DestinationLinesToRequestVMInVBlank,
3427 		double *DestinationLinesToRequestRowInVBlank,
3428 		double *VRatioPrefetchY,
3429 		double *VRatioPrefetchC,
3430 		double *RequiredPrefetchPixDataBWLuma,
3431 		double *RequiredPrefetchPixDataBWChroma,
3432 		bool   *NotEnoughTimeForDynamicMetadata,
3433 		double *Tno_bw,
3434 		double *prefetch_vmrow_bw,
3435 		double *Tdmdl_vm,
3436 		double *Tdmdl,
3437 		double *TSetup,
3438 		unsigned int   *VUpdateOffsetPix,
3439 		double   *VUpdateWidthPix,
3440 		double   *VReadyOffsetPix)
3441 {
3442 	double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3443 	bool MyError = false;
3444 	unsigned int DPPCycles, DISPCLKCycles;
3445 	double DSTTotalPixelsAfterScaler;
3446 	double LineTime;
3447 	double dst_y_prefetch_equ;
3448 	double prefetch_bw_oto;
3449 	double Tvm_oto;
3450 	double Tr0_oto;
3451 	double Tvm_oto_lines;
3452 	double Tr0_oto_lines;
3453 	double dst_y_prefetch_oto;
3454 	double TimeForFetchingMetaPTE = 0;
3455 	double TimeForFetchingRowInVBlank = 0;
3456 	double LinesToRequestPrefetchPixelData = 0;
3457 	unsigned int HostVMDynamicLevelsTrips;
3458 	double  trip_to_mem;
3459 	double  Tvm_trips;
3460 	double  Tr0_trips;
3461 	double  Tvm_trips_rounded;
3462 	double  Tr0_trips_rounded;
3463 	double  Lsw_oto;
3464 	double  Tpre_rounded;
3465 	double  prefetch_bw_equ;
3466 	double  Tvm_equ;
3467 	double  Tr0_equ;
3468 	double  Tdmbf;
3469 	double  Tdmec;
3470 	double  Tdmsks;
3471 	double  prefetch_sw_bytes;
3472 	double  bytes_pp;
3473 	double  dep_bytes;
3474 	unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3475 	double  min_Lsw;
3476 	double  Tsw_est1 = 0;
3477 	double  Tsw_est3 = 0;
3478 	double  TPreMargin = 0;
3479 
3480 	if (v->GPUVMEnable == true && v->HostVMEnable == true)
3481 		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3482 	else
3483 		HostVMDynamicLevelsTrips = 0;
3484 #ifdef __DML_VBA_DEBUG__
3485 	dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3486 	dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3487 	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3488 	dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3489 			__func__, v->HostVMEnable, HostVMInefficiencyFactor);
3490 #endif
3491 	dml32_CalculateVUpdateAndDynamicMetadataParameters(
3492 			v->MaxInterDCNTileRepeaters,
3493 			myPipe->Dppclk,
3494 			myPipe->Dispclk,
3495 			myPipe->DCFClkDeepSleep,
3496 			myPipe->PixelClock,
3497 			myPipe->HTotal,
3498 			myPipe->VBlank,
3499 			v->DynamicMetadataTransmittedBytes[k],
3500 			v->DynamicMetadataLinesBeforeActiveRequired[k],
3501 			myPipe->InterlaceEnable,
3502 			myPipe->ProgressiveToInterlaceUnitInOPP,
3503 			TSetup,
3504 
3505 			/* output */
3506 			&Tdmbf,
3507 			&Tdmec,
3508 			&Tdmsks,
3509 			VUpdateOffsetPix,
3510 			VUpdateWidthPix,
3511 			VReadyOffsetPix);
3512 
3513 	LineTime = myPipe->HTotal / myPipe->PixelClock;
3514 	trip_to_mem = UrgentLatency;
3515 	Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3516 
3517 	if (v->DynamicMetadataVMEnabled == true)
3518 		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
3519 	else
3520 		*Tdmdl = TWait + UrgentExtraLatency;
3521 
3522 #ifdef __DML_VBA_ALLOW_DELTA__
3523 	if (v->DynamicMetadataEnable[k] == false)
3524 		*Tdmdl = 0.0;
3525 #endif
3526 
3527 	if (v->DynamicMetadataEnable[k] == true) {
3528 		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3529 			*NotEnoughTimeForDynamicMetadata = true;
3530 #ifdef __DML_VBA_DEBUG__
3531 			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3532 			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3533 					__func__, Tdmbf);
3534 			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3535 			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3536 					__func__, Tdmsks);
3537 			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3538 					__func__, *Tdmdl);
3539 #endif
3540 		} else {
3541 			*NotEnoughTimeForDynamicMetadata = false;
3542 		}
3543 	} else {
3544 		*NotEnoughTimeForDynamicMetadata = false;
3545 	}
3546 
3547 	*Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3548 			v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3549 
3550 	if (myPipe->ScalerEnabled)
3551 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3552 	else
3553 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3554 
3555 	DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3556 
3557 	DISPCLKCycles = v->DISPCLKDelaySubtotal;
3558 
3559 	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3560 		return true;
3561 
3562 	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3563 			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3564 
3565 	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3566 			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3567 			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3568 					myPipe->HActive / 2 : 0)
3569 			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3570 
3571 #ifdef __DML_VBA_DEBUG__
3572 	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3573 	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3574 	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3575 	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3576 	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3577 	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3578 	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3579 	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3580 	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3581 #endif
3582 
3583 	if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3584 		*DSTYAfterScaler = 1;
3585 	else
3586 		*DSTYAfterScaler = 0;
3587 
3588 	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3589 	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3590 	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3591 #ifdef __DML_VBA_DEBUG__
3592 	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3593 	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3594 #endif
3595 
3596 	MyError = false;
3597 
3598 	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3599 
3600 	if (v->GPUVMEnable == true) {
3601 		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3602 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3603 		if (v->GPUVMMaxPageTableLevels >= 3) {
3604 			*Tno_bw = UrgentExtraLatency + trip_to_mem *
3605 					(double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3606 		} else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3607 			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3608 					4.0 * LineTime; // VBA_ERROR
3609 			*Tno_bw = UrgentExtraLatency;
3610 		} else {
3611 			*Tno_bw = 0;
3612 		}
3613 	} else if (myPipe->DCCEnable == true) {
3614 		Tvm_trips_rounded = LineTime / 4.0;
3615 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3616 		*Tno_bw = 0;
3617 	} else {
3618 		Tvm_trips_rounded = LineTime / 4.0;
3619 		Tr0_trips_rounded = LineTime / 2.0;
3620 		*Tno_bw = 0;
3621 	}
3622 	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3623 	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3624 
3625 	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3626 			|| myPipe->SourcePixelFormat == dm_420_12) {
3627 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3628 	} else {
3629 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3630 	}
3631 
3632 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3633 			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3634 	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3635 			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3636 
3637 	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3638 	min_Lsw = dml_max(min_Lsw, 1.0);
3639 	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3640 
3641 	if (v->GPUVMEnable == true) {
3642 		Tvm_oto = dml_max3(
3643 				Tvm_trips,
3644 				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3645 				LineTime / 4.0);
3646 	} else
3647 		Tvm_oto = LineTime / 4.0;
3648 
3649 	if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3650 		Tr0_oto = dml_max4(
3651 				Tr0_trips,
3652 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3653 				(LineTime - Tvm_oto)/2.0,
3654 				LineTime / 4.0);
3655 #ifdef __DML_VBA_DEBUG__
3656 		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3657 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3658 		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3659 		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3660 		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3661 #endif
3662 	} else
3663 		Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3664 
3665 	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3666 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3667 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3668 
3669 	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3670 			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3671 
3672 #ifdef __DML_VBA_DEBUG__
3673 	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3674 	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3675 	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3676 	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3677 	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3678 	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3679 	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3680 	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3681 	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3682 	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3683 	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3684 	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3685 	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3686 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3687 	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3688 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3689 	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3690 	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3691 	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3692 	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3693 	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3694 	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3695 	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3696 	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3697 	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3698 	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3699 	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3700 #endif
3701 
3702 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3703 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
3704 
3705 	TPreMargin = Tpre_rounded - TPreReq;
3706 #ifdef __DML_VBA_DEBUG__
3707 	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3708 	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3709 	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3710 	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3711 			__func__, VStartup * LineTime);
3712 	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3713 	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3714 	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3715 	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3716 	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3717 	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3718 	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3719 			__func__, *DSTYAfterScaler);
3720 #endif
3721 	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3722 			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3723 
3724 	if (prefetch_sw_bytes < dep_bytes)
3725 		prefetch_sw_bytes = 2 * dep_bytes;
3726 
3727 	*PrefetchBandwidth = 0;
3728 	*DestinationLinesToRequestVMInVBlank = 0;
3729 	*DestinationLinesToRequestRowInVBlank = 0;
3730 	*VRatioPrefetchY = 0;
3731 	*VRatioPrefetchC = 0;
3732 	*RequiredPrefetchPixDataBWLuma = 0;
3733 	if (dst_y_prefetch_equ > 1 && TPreMargin > 0.0) {
3734 		double PrefetchBandwidth1;
3735 		double PrefetchBandwidth2;
3736 		double PrefetchBandwidth3;
3737 		double PrefetchBandwidth4;
3738 
3739 		if (Tpre_rounded - *Tno_bw > 0) {
3740 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3741 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3742 					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3743 			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3744 		} else
3745 			PrefetchBandwidth1 = 0;
3746 
3747 		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3748 				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3749 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3750 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3751 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3752 		}
3753 
3754 		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3755 			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3756 			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3757 		else
3758 			PrefetchBandwidth2 = 0;
3759 
3760 		if (Tpre_rounded - Tvm_trips_rounded > 0) {
3761 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3762 					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3763 			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3764 		} else
3765 			PrefetchBandwidth3 = 0;
3766 
3767 
3768 		if (VStartup == MaxVStartup &&
3769 				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3770 				LineTime - Tvm_trips_rounded > 0) {
3771 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3772 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3773 		}
3774 
3775 		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3776 			PrefetchBandwidth4 = prefetch_sw_bytes /
3777 					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3778 		} else {
3779 			PrefetchBandwidth4 = 0;
3780 		}
3781 
3782 #ifdef __DML_VBA_DEBUG__
3783 		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3784 		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3785 		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3786 		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3787 		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3788 		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3789 		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3790 		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3791 		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3792 #endif
3793 		{
3794 			bool Case1OK;
3795 			bool Case2OK;
3796 			bool Case3OK;
3797 
3798 			if (PrefetchBandwidth1 > 0) {
3799 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3800 						>= Tvm_trips_rounded
3801 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3802 								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
3803 					Case1OK = true;
3804 				} else {
3805 					Case1OK = false;
3806 				}
3807 			} else {
3808 				Case1OK = false;
3809 			}
3810 
3811 			if (PrefetchBandwidth2 > 0) {
3812 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3813 						>= Tvm_trips_rounded
3814 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3815 						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
3816 					Case2OK = true;
3817 				} else {
3818 					Case2OK = false;
3819 				}
3820 			} else {
3821 				Case2OK = false;
3822 			}
3823 
3824 			if (PrefetchBandwidth3 > 0) {
3825 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3826 						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3827 								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3828 								Tr0_trips_rounded) {
3829 					Case3OK = true;
3830 				} else {
3831 					Case3OK = false;
3832 				}
3833 			} else {
3834 				Case3OK = false;
3835 			}
3836 
3837 			if (Case1OK)
3838 				prefetch_bw_equ = PrefetchBandwidth1;
3839 			else if (Case2OK)
3840 				prefetch_bw_equ = PrefetchBandwidth2;
3841 			else if (Case3OK)
3842 				prefetch_bw_equ = PrefetchBandwidth3;
3843 			else
3844 				prefetch_bw_equ = PrefetchBandwidth4;
3845 
3846 #ifdef __DML_VBA_DEBUG__
3847 			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3848 			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3849 			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3850 			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3851 #endif
3852 
3853 			if (prefetch_bw_equ > 0) {
3854 				if (v->GPUVMEnable == true) {
3855 					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3856 							HostVMInefficiencyFactor / prefetch_bw_equ,
3857 							Tvm_trips, LineTime / 4);
3858 				} else {
3859 					Tvm_equ = LineTime / 4;
3860 				}
3861 
3862 				if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3863 					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3864 							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3865 							(LineTime - Tvm_equ) / 2, LineTime / 4);
3866 				} else {
3867 					Tr0_equ = (LineTime - Tvm_equ) / 2;
3868 				}
3869 			} else {
3870 				Tvm_equ = 0;
3871 				Tr0_equ = 0;
3872 #ifdef __DML_VBA_DEBUG__
3873 				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3874 #endif
3875 			}
3876 		}
3877 
3878 		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3879 			if (dst_y_prefetch_oto * LineTime < TPreReq) {
3880 				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3881 			} else {
3882 				*DestinationLinesForPrefetch = dst_y_prefetch_oto;
3883 			}
3884 			TimeForFetchingMetaPTE = Tvm_oto;
3885 			TimeForFetchingRowInVBlank = Tr0_oto;
3886 			*PrefetchBandwidth = prefetch_bw_oto;
3887 		} else {
3888 			*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3889 			TimeForFetchingMetaPTE = Tvm_equ;
3890 			TimeForFetchingRowInVBlank = Tr0_equ;
3891 			*PrefetchBandwidth = prefetch_bw_equ;
3892 		}
3893 
3894 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3895 
3896 		*DestinationLinesToRequestRowInVBlank =
3897 				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3898 
3899 		LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3900 				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3901 
3902 #ifdef __DML_VBA_DEBUG__
3903 		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3904 		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3905 				__func__, *DestinationLinesToRequestVMInVBlank);
3906 		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3907 		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3908 		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3909 				__func__, *DestinationLinesToRequestRowInVBlank);
3910 		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3911 		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3912 #endif
3913 
3914 		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3915 			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3916 			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3917 #ifdef __DML_VBA_DEBUG__
3918 			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3919 			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3920 			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3921 #endif
3922 			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3923 				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3924 					*VRatioPrefetchY =
3925 							dml_max((double) PrefetchSourceLinesY /
3926 									LinesToRequestPrefetchPixelData,
3927 									(double) MaxNumSwathY * SwathHeightY /
3928 									(LinesToRequestPrefetchPixelData -
3929 									(VInitPreFillY - 3.0) / 2.0));
3930 					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3931 				} else {
3932 					MyError = true;
3933 					*VRatioPrefetchY = 0;
3934 				}
3935 #ifdef __DML_VBA_DEBUG__
3936 				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3937 				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3938 				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3939 #endif
3940 			}
3941 
3942 			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3943 			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3944 
3945 #ifdef __DML_VBA_DEBUG__
3946 			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3947 			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3948 			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3949 #endif
3950 			if ((SwathHeightC > 4)) {
3951 				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3952 					*VRatioPrefetchC =
3953 						dml_max(*VRatioPrefetchC,
3954 							(double) MaxNumSwathC * SwathHeightC /
3955 							(LinesToRequestPrefetchPixelData -
3956 							(VInitPreFillC - 3.0) / 2.0));
3957 					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3958 				} else {
3959 					MyError = true;
3960 					*VRatioPrefetchC = 0;
3961 				}
3962 #ifdef __DML_VBA_DEBUG__
3963 				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3964 				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3965 				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3966 #endif
3967 			}
3968 
3969 			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3970 					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3971 					/ LineTime;
3972 
3973 #ifdef __DML_VBA_DEBUG__
3974 			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3975 			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3976 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3977 			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3978 					__func__, *RequiredPrefetchPixDataBWLuma);
3979 #endif
3980 			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3981 					LinesToRequestPrefetchPixelData
3982 					* myPipe->BytePerPixelC
3983 					* swath_width_chroma_ub / LineTime;
3984 		} else {
3985 			MyError = true;
3986 #ifdef __DML_VBA_DEBUG__
3987 			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3988 					__func__, LinesToRequestPrefetchPixelData);
3989 #endif
3990 			*VRatioPrefetchY = 0;
3991 			*VRatioPrefetchC = 0;
3992 			*RequiredPrefetchPixDataBWLuma = 0;
3993 			*RequiredPrefetchPixDataBWChroma = 0;
3994 		}
3995 #ifdef __DML_VBA_DEBUG__
3996 		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3997 			(double)LinesToRequestPrefetchPixelData * LineTime +
3998 			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3999 		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4000 		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4001 			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4002 		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4003 		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4004 			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4005 			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4006 		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4007 				PixelPTEBytesPerRow);
4008 #endif
4009 	} else {
4010 		MyError = true;
4011 #ifdef __DML_VBA_DEBUG__
4012 		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4013 				__func__, dst_y_prefetch_equ);
4014 #endif
4015 	}
4016 
4017 	{
4018 		double prefetch_vm_bw;
4019 		double prefetch_row_bw;
4020 
4021 		if (PDEAndMetaPTEBytesFrame == 0) {
4022 			prefetch_vm_bw = 0;
4023 		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
4024 #ifdef __DML_VBA_DEBUG__
4025 			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4026 			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4027 			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4028 					__func__, *DestinationLinesToRequestVMInVBlank);
4029 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4030 #endif
4031 			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4032 					(*DestinationLinesToRequestVMInVBlank * LineTime);
4033 #ifdef __DML_VBA_DEBUG__
4034 			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4035 #endif
4036 		} else {
4037 			prefetch_vm_bw = 0;
4038 			MyError = true;
4039 #ifdef __DML_VBA_DEBUG__
4040 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4041 					__func__, *DestinationLinesToRequestVMInVBlank);
4042 #endif
4043 		}
4044 
4045 		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4046 			prefetch_row_bw = 0;
4047 		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
4048 			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4049 					(*DestinationLinesToRequestRowInVBlank * LineTime);
4050 
4051 #ifdef __DML_VBA_DEBUG__
4052 			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4053 			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4054 			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4055 					__func__, *DestinationLinesToRequestRowInVBlank);
4056 			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4057 #endif
4058 		} else {
4059 			prefetch_row_bw = 0;
4060 			MyError = true;
4061 #ifdef __DML_VBA_DEBUG__
4062 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4063 					__func__, *DestinationLinesToRequestRowInVBlank);
4064 #endif
4065 		}
4066 
4067 		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4068 	}
4069 
4070 	if (MyError) {
4071 		*PrefetchBandwidth = 0;
4072 		TimeForFetchingMetaPTE = 0;
4073 		TimeForFetchingRowInVBlank = 0;
4074 		*DestinationLinesToRequestVMInVBlank = 0;
4075 		*DestinationLinesToRequestRowInVBlank = 0;
4076 		*DestinationLinesForPrefetch = 0;
4077 		LinesToRequestPrefetchPixelData = 0;
4078 		*VRatioPrefetchY = 0;
4079 		*VRatioPrefetchC = 0;
4080 		*RequiredPrefetchPixDataBWLuma = 0;
4081 		*RequiredPrefetchPixDataBWChroma = 0;
4082 	}
4083 
4084 	return MyError;
4085 } // CalculatePrefetchSchedule
4086 
4087 void dml32_CalculateFlipSchedule(
4088 		double HostVMInefficiencyFactor,
4089 		double UrgentExtraLatency,
4090 		double UrgentLatency,
4091 		unsigned int GPUVMMaxPageTableLevels,
4092 		bool HostVMEnable,
4093 		unsigned int HostVMMaxNonCachedPageTableLevels,
4094 		bool GPUVMEnable,
4095 		double HostVMMinPageSize,
4096 		double PDEAndMetaPTEBytesPerFrame,
4097 		double MetaRowBytes,
4098 		double DPTEBytesPerRow,
4099 		double BandwidthAvailableForImmediateFlip,
4100 		unsigned int TotImmediateFlipBytes,
4101 		enum source_format_class SourcePixelFormat,
4102 		double LineTime,
4103 		double VRatio,
4104 		double VRatioChroma,
4105 		double Tno_bw,
4106 		bool DCCEnable,
4107 		unsigned int dpte_row_height,
4108 		unsigned int meta_row_height,
4109 		unsigned int dpte_row_height_chroma,
4110 		unsigned int meta_row_height_chroma,
4111 		bool    use_one_row_for_frame_flip,
4112 
4113 		/* Output */
4114 		double *DestinationLinesToRequestVMInImmediateFlip,
4115 		double *DestinationLinesToRequestRowInImmediateFlip,
4116 		double *final_flip_bw,
4117 		bool *ImmediateFlipSupportedForPipe)
4118 {
4119 	double min_row_time = 0.0;
4120 	unsigned int HostVMDynamicLevelsTrips;
4121 	double TimeForFetchingMetaPTEImmediateFlip;
4122 	double TimeForFetchingRowInVBlankImmediateFlip;
4123 	double ImmediateFlipBW;
4124 
4125 	if (GPUVMEnable == true && HostVMEnable == true)
4126 		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4127 	else
4128 		HostVMDynamicLevelsTrips = 0;
4129 
4130 #ifdef __DML_VBA_DEBUG__
4131 	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4132 	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4133 #endif
4134 
4135 	if (TotImmediateFlipBytes > 0) {
4136 		if (use_one_row_for_frame_flip) {
4137 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4138 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4139 		} else {
4140 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4141 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4142 		}
4143 		if (GPUVMEnable == true) {
4144 			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4145 					HostVMInefficiencyFactor / ImmediateFlipBW,
4146 					UrgentExtraLatency + UrgentLatency *
4147 					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4148 					LineTime / 4.0);
4149 		} else {
4150 			TimeForFetchingMetaPTEImmediateFlip = 0;
4151 		}
4152 		if ((GPUVMEnable == true || DCCEnable == true)) {
4153 			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4154 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4155 					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4156 		} else {
4157 			TimeForFetchingRowInVBlankImmediateFlip = 0;
4158 		}
4159 
4160 		*DestinationLinesToRequestVMInImmediateFlip =
4161 				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4162 		*DestinationLinesToRequestRowInImmediateFlip =
4163 				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4164 
4165 		if (GPUVMEnable == true) {
4166 			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4167 					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4168 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4169 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4170 		} else if ((GPUVMEnable == true || DCCEnable == true)) {
4171 			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4172 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4173 		} else {
4174 			*final_flip_bw = 0;
4175 		}
4176 	} else {
4177 		TimeForFetchingMetaPTEImmediateFlip = 0;
4178 		TimeForFetchingRowInVBlankImmediateFlip = 0;
4179 		*DestinationLinesToRequestVMInImmediateFlip = 0;
4180 		*DestinationLinesToRequestRowInImmediateFlip = 0;
4181 		*final_flip_bw = 0;
4182 	}
4183 
4184 	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4185 		if (GPUVMEnable == true && DCCEnable != true) {
4186 			min_row_time = dml_min(dpte_row_height *
4187 					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4188 		} else if (GPUVMEnable != true && DCCEnable == true) {
4189 			min_row_time = dml_min(meta_row_height *
4190 					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4191 		} else {
4192 			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4193 					LineTime / VRatio, dpte_row_height_chroma * LineTime /
4194 					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4195 		}
4196 	} else {
4197 		if (GPUVMEnable == true && DCCEnable != true) {
4198 			min_row_time = dpte_row_height * LineTime / VRatio;
4199 		} else if (GPUVMEnable != true && DCCEnable == true) {
4200 			min_row_time = meta_row_height * LineTime / VRatio;
4201 		} else {
4202 			min_row_time =
4203 				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4204 		}
4205 	}
4206 
4207 	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4208 			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4209 					> min_row_time) {
4210 		*ImmediateFlipSupportedForPipe = false;
4211 	} else {
4212 		*ImmediateFlipSupportedForPipe = true;
4213 	}
4214 
4215 #ifdef __DML_VBA_DEBUG__
4216 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4217 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4218 	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4219 			__func__, *DestinationLinesToRequestVMInImmediateFlip);
4220 	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4221 			__func__, *DestinationLinesToRequestRowInImmediateFlip);
4222 	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4223 	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4224 			__func__, TimeForFetchingRowInVBlankImmediateFlip);
4225 	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4226 	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4227 #endif
4228 } // CalculateFlipSchedule
4229 
4230 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4231 		struct vba_vars_st *v,
4232 		unsigned int PrefetchMode,
4233 		double DCFCLK,
4234 		double ReturnBW,
4235 		SOCParametersList mmSOCParameters,
4236 		double SOCCLK,
4237 		double DCFClkDeepSleep,
4238 		unsigned int DETBufferSizeY[],
4239 		unsigned int DETBufferSizeC[],
4240 		unsigned int SwathHeightY[],
4241 		unsigned int SwathHeightC[],
4242 		double SwathWidthY[],
4243 		double SwathWidthC[],
4244 		unsigned int DPPPerSurface[],
4245 		double BytePerPixelDETY[],
4246 		double BytePerPixelDETC[],
4247 		double DSTXAfterScaler[],
4248 		double DSTYAfterScaler[],
4249 		bool UnboundedRequestEnabled,
4250 		unsigned int CompressedBufferSizeInkByte,
4251 
4252 		/* Output */
4253 		enum clock_change_support *DRAMClockChangeSupport,
4254 		double MaxActiveDRAMClockChangeLatencySupported[],
4255 		unsigned int SubViewportLinesNeededInMALL[],
4256 		enum dm_fclock_change_support *FCLKChangeSupport,
4257 		double *MinActiveFCLKChangeLatencySupported,
4258 		bool *USRRetrainingSupport,
4259 		double ActiveDRAMClockChangeLatencyMargin[])
4260 {
4261 	unsigned int i, j, k;
4262 	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4263 	unsigned int DRAMClockChangeSupportNumber = 0;
4264 	unsigned int LastSurfaceWithoutMargin;
4265 	unsigned int DRAMClockChangeMethod = 0;
4266 	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4267 	double MinActiveFCLKChangeMargin = 0.;
4268 	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4269 	double ActiveClockChangeLatencyHidingY;
4270 	double ActiveClockChangeLatencyHidingC;
4271 	double ActiveClockChangeLatencyHiding;
4272 	double EffectiveDETBufferSizeY;
4273 	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4274 	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4275 	double TotalPixelBW = 0.0;
4276 	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4277 	double     EffectiveLBLatencyHidingY;
4278 	double     EffectiveLBLatencyHidingC;
4279 	double     LinesInDETY[DC__NUM_DPP__MAX];
4280 	double     LinesInDETC[DC__NUM_DPP__MAX];
4281 	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4282 	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4283 	double     FullDETBufferingTimeY;
4284 	double     FullDETBufferingTimeC;
4285 	double     WritebackDRAMClockChangeLatencyMargin;
4286 	double     WritebackFCLKChangeLatencyMargin;
4287 	double     WritebackLatencyHiding;
4288 	bool    SameTimingForFCLKChange;
4289 
4290 	unsigned int    TotalActiveWriteback = 0;
4291 	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4292 	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4293 
4294 	v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4295 	v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4296 			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4297 	v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4298 	v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4299 	v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4300 			+ 10 / DCFClkDeepSleep;
4301 	v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4302 			+ 10 / DCFClkDeepSleep;
4303 	v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4304 			+ 10 / DCFClkDeepSleep;
4305 	v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4306 			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4307 
4308 #ifdef __DML_VBA_DEBUG__
4309 	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4310 	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4311 	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4312 	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4313 	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4314 	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4315 	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4316 	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4317 	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4318 	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4319 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4320 			__func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4321 #endif
4322 
4323 
4324 	TotalActiveWriteback = 0;
4325 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4326 		if (v->WritebackEnable[k] == true)
4327 			TotalActiveWriteback = TotalActiveWriteback + 1;
4328 	}
4329 
4330 	if (TotalActiveWriteback <= 1) {
4331 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4332 	} else {
4333 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4334 				+ v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4335 	}
4336 	if (v->USRRetrainingRequiredFinal)
4337 		v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark
4338 				+ mmSOCParameters.USRRetrainingLatency;
4339 
4340 	if (TotalActiveWriteback <= 1) {
4341 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4342 				+ mmSOCParameters.WritebackLatency;
4343 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4344 				+ mmSOCParameters.WritebackLatency;
4345 	} else {
4346 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4347 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4348 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4349 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4350 	}
4351 
4352 	if (v->USRRetrainingRequiredFinal)
4353 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4354 				+ mmSOCParameters.USRRetrainingLatency;
4355 
4356 	if (v->USRRetrainingRequiredFinal)
4357 		v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4358 				+ mmSOCParameters.USRRetrainingLatency;
4359 
4360 #ifdef __DML_VBA_DEBUG__
4361 	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4362 			__func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4363 	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4364 	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4365 	dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4366 	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4367 #endif
4368 
4369 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4370 		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4371 				SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4372 	}
4373 
4374 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4375 
4376 		LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4377 		LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4378 
4379 
4380 #ifdef __DML_VBA_DEBUG__
4381 		dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4382 		dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
4383 		dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
4384 		dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
4385 		dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
4386 #endif
4387 
4388 		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4389 		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4390 		EffectiveDETBufferSizeY = DETBufferSizeY[k];
4391 
4392 		if (UnboundedRequestEnabled) {
4393 			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4394 					+ CompressedBufferSizeInkByte * 1024
4395 							* (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4396 							/ (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4397 		}
4398 
4399 		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4400 		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4401 		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4402 
4403 		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4404 				- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4405 
4406 		if (v->NumberOfActiveSurfaces > 1) {
4407 			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4408 					- (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4409 							/ v->PixelClock[k] / v->VRatio[k];
4410 		}
4411 
4412 		if (BytePerPixelDETC[k] > 0) {
4413 			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4414 			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4415 			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4416 					/ v->VRatioChroma[k];
4417 			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4418 					- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4419 							/ v->PixelClock[k];
4420 			if (v->NumberOfActiveSurfaces > 1) {
4421 				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4422 						- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4423 								/ v->PixelClock[k] / v->VRatioChroma[k];
4424 			}
4425 			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4426 					ActiveClockChangeLatencyHidingC);
4427 		} else {
4428 			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4429 		}
4430 
4431 		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4432 				- v->Watermark.DRAMClockChangeWatermark;
4433 		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4434 				- v->Watermark.FCLKChangeWatermark;
4435 		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4436 
4437 		if (v->WritebackEnable[k]) {
4438 			WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4439 					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4440 							/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4441 			if (v->WritebackPixelFormat[k] == dm_444_64)
4442 				WritebackLatencyHiding = WritebackLatencyHiding / 2;
4443 
4444 			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4445 					- v->Watermark.WritebackDRAMClockChangeWatermark;
4446 
4447 			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4448 					- v->Watermark.WritebackFCLKChangeWatermark;
4449 
4450 			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4451 					WritebackFCLKChangeLatencyMargin);
4452 			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4453 					WritebackDRAMClockChangeLatencyMargin);
4454 		}
4455 		MaxActiveDRAMClockChangeLatencySupported[k] =
4456 				(v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4457 						0 :
4458 						(ActiveDRAMClockChangeLatencyMargin[k]
4459 								+ mmSOCParameters.DRAMClockChangeLatency);
4460 	}
4461 
4462 	for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4463 		for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4464 			if (i == j ||
4465 					(v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4466 					(v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4467 					(v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4468 					(v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4469 					v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4470 					v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4471 					(v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4472 				SynchronizedSurfaces[i][j] = true;
4473 			} else {
4474 				SynchronizedSurfaces[i][j] = false;
4475 			}
4476 		}
4477 	}
4478 
4479 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4480 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4481 				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4482 				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4483 			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4484 			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4485 			SurfaceWithMinActiveFCLKChangeMargin = k;
4486 		}
4487 	}
4488 
4489 	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4490 
4491 	SameTimingForFCLKChange = true;
4492 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4493 		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4494 			if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4495 					(SameTimingForFCLKChange ||
4496 					ActiveFCLKChangeLatencyMargin[k] <
4497 					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4498 				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4499 			}
4500 			SameTimingForFCLKChange = false;
4501 		}
4502 	}
4503 
4504 	if (MinActiveFCLKChangeMargin > 0) {
4505 		*FCLKChangeSupport = dm_fclock_change_vactive;
4506 	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4507 			(PrefetchMode <= 1)) {
4508 		*FCLKChangeSupport = dm_fclock_change_vblank;
4509 	} else {
4510 		*FCLKChangeSupport = dm_fclock_change_unsupported;
4511 	}
4512 
4513 	*USRRetrainingSupport = true;
4514 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4515 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4516 				(USRRetrainingLatencyMargin[k] < 0)) {
4517 			*USRRetrainingSupport = false;
4518 		}
4519 	}
4520 
4521 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4522 		if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4523 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4524 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4525 				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4526 			if (PrefetchMode > 0) {
4527 				DRAMClockChangeSupportNumber = 2;
4528 			} else if (DRAMClockChangeSupportNumber == 0) {
4529 				DRAMClockChangeSupportNumber = 1;
4530 				LastSurfaceWithoutMargin = k;
4531 			} else if (DRAMClockChangeSupportNumber == 1 &&
4532 					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4533 				DRAMClockChangeSupportNumber = 2;
4534 			}
4535 		}
4536 	}
4537 
4538 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4539 		if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4540 			DRAMClockChangeMethod = 1;
4541 		else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4542 			DRAMClockChangeMethod = 2;
4543 	}
4544 
4545 	if (DRAMClockChangeMethod == 0) {
4546 		if (DRAMClockChangeSupportNumber == 0)
4547 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4548 		else if (DRAMClockChangeSupportNumber == 1)
4549 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4550 		else
4551 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4552 	} else if (DRAMClockChangeMethod == 1) {
4553 		if (DRAMClockChangeSupportNumber == 0)
4554 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4555 		else if (DRAMClockChangeSupportNumber == 1)
4556 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4557 		else
4558 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4559 	} else {
4560 		if (DRAMClockChangeSupportNumber == 0)
4561 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4562 		else if (DRAMClockChangeSupportNumber == 1)
4563 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4564 		else
4565 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4566 	}
4567 
4568 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4569 		unsigned int dst_y_pstate;
4570 		unsigned int src_y_pstate_l;
4571 		unsigned int src_y_pstate_c;
4572 		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4573 
4574 		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4575 		src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4576 		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4577 		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4578 
4579 #ifdef __DML_VBA_DEBUG__
4580 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4581 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4582 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4583 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4584 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4585 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4586 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4587 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4588 dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
4589 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4590 #endif
4591 		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4592 
4593 		if (BytePerPixelDETC[k] > 0) {
4594 			src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4595 			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4596 			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4597 			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4598 
4599 #ifdef __DML_VBA_DEBUG__
4600 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4601 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4602 dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4603 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4604 #endif
4605 		}
4606 	}
4607 #ifdef __DML_VBA_DEBUG__
4608 	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4609 	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4610 	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4611 			__func__, *MinActiveFCLKChangeLatencySupported);
4612 	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4613 #endif
4614 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4615 
4616 double dml32_CalculateWriteBackDISPCLK(
4617 		enum source_format_class WritebackPixelFormat,
4618 		double PixelClock,
4619 		double WritebackHRatio,
4620 		double WritebackVRatio,
4621 		unsigned int WritebackHTaps,
4622 		unsigned int WritebackVTaps,
4623 		unsigned int   WritebackSourceWidth,
4624 		unsigned int   WritebackDestinationWidth,
4625 		unsigned int HTotal,
4626 		unsigned int WritebackLineBufferSize,
4627 		double DISPCLKDPPCLKVCOSpeed)
4628 {
4629 	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4630 
4631 	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4632 	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4633 	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4634 			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4635 	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4636 }
4637 
4638 void dml32_CalculateMinAndMaxPrefetchMode(
4639 		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4640 		unsigned int             *MinPrefetchMode,
4641 		unsigned int             *MaxPrefetchMode)
4642 {
4643 	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4644 		*MinPrefetchMode = 3;
4645 		*MaxPrefetchMode = 3;
4646 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4647 		*MinPrefetchMode = 2;
4648 		*MaxPrefetchMode = 2;
4649 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4650 		*MinPrefetchMode = 1;
4651 		*MaxPrefetchMode = 1;
4652 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4653 		*MinPrefetchMode = 0;
4654 		*MaxPrefetchMode = 0;
4655 	} else {
4656 		*MinPrefetchMode = 0;
4657 		*MaxPrefetchMode = 3;
4658 	}
4659 } // CalculateMinAndMaxPrefetchMode
4660 
4661 void dml32_CalculatePixelDeliveryTimes(
4662 		unsigned int             NumberOfActiveSurfaces,
4663 		double              VRatio[],
4664 		double              VRatioChroma[],
4665 		double              VRatioPrefetchY[],
4666 		double              VRatioPrefetchC[],
4667 		unsigned int             swath_width_luma_ub[],
4668 		unsigned int             swath_width_chroma_ub[],
4669 		unsigned int             DPPPerSurface[],
4670 		double              HRatio[],
4671 		double              HRatioChroma[],
4672 		double              PixelClock[],
4673 		double              PSCL_THROUGHPUT[],
4674 		double              PSCL_THROUGHPUT_CHROMA[],
4675 		double              Dppclk[],
4676 		unsigned int             BytePerPixelC[],
4677 		enum dm_rotation_angle   SourceRotation[],
4678 		unsigned int             NumberOfCursors[],
4679 		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4680 		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4681 		unsigned int             BlockWidth256BytesY[],
4682 		unsigned int             BlockHeight256BytesY[],
4683 		unsigned int             BlockWidth256BytesC[],
4684 		unsigned int             BlockHeight256BytesC[],
4685 
4686 		/* Output */
4687 		double              DisplayPipeLineDeliveryTimeLuma[],
4688 		double              DisplayPipeLineDeliveryTimeChroma[],
4689 		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4690 		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4691 		double              DisplayPipeRequestDeliveryTimeLuma[],
4692 		double              DisplayPipeRequestDeliveryTimeChroma[],
4693 		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4694 		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4695 		double              CursorRequestDeliveryTime[],
4696 		double              CursorRequestDeliveryTimePrefetch[])
4697 {
4698 	double   req_per_swath_ub;
4699 	unsigned int k;
4700 
4701 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4702 
4703 #ifdef __DML_VBA_DEBUG__
4704 		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4705 		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4706 		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4707 		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4708 		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4709 		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4710 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4711 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4712 		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4713 		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4714 		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4715 #endif
4716 
4717 		if (VRatio[k] <= 1) {
4718 			DisplayPipeLineDeliveryTimeLuma[k] =
4719 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4720 		} else {
4721 			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4722 		}
4723 
4724 		if (BytePerPixelC[k] == 0) {
4725 			DisplayPipeLineDeliveryTimeChroma[k] = 0;
4726 		} else {
4727 			if (VRatioChroma[k] <= 1) {
4728 				DisplayPipeLineDeliveryTimeChroma[k] =
4729 					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4730 			} else {
4731 				DisplayPipeLineDeliveryTimeChroma[k] =
4732 					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4733 			}
4734 		}
4735 
4736 		if (VRatioPrefetchY[k] <= 1) {
4737 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4738 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4739 		} else {
4740 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4741 					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4742 		}
4743 
4744 		if (BytePerPixelC[k] == 0) {
4745 			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4746 		} else {
4747 			if (VRatioPrefetchC[k] <= 1) {
4748 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4749 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4750 			} else {
4751 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4752 						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4753 			}
4754 		}
4755 #ifdef __DML_VBA_DEBUG__
4756 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4757 				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4758 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4759 				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4760 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4761 				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4762 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4763 				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4764 #endif
4765 	}
4766 
4767 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4768 		if (!IsVertical(SourceRotation[k]))
4769 			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4770 		else
4771 			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4772 #ifdef __DML_VBA_DEBUG__
4773 		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4774 #endif
4775 
4776 		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4777 		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4778 				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4779 		if (BytePerPixelC[k] == 0) {
4780 			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4781 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4782 		} else {
4783 			if (!IsVertical(SourceRotation[k]))
4784 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4785 			else
4786 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4787 #ifdef __DML_VBA_DEBUG__
4788 			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4789 #endif
4790 			DisplayPipeRequestDeliveryTimeChroma[k] =
4791 					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4792 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4793 					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4794 		}
4795 #ifdef __DML_VBA_DEBUG__
4796 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4797 				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4798 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4799 				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4800 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4801 				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4802 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4803 				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4804 #endif
4805 	}
4806 
4807 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4808 		unsigned int cursor_req_per_width;
4809 
4810 		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4811 				256.0 / 8.0, 1.0);
4812 		if (NumberOfCursors[k] > 0) {
4813 			if (VRatio[k] <= 1) {
4814 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4815 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4816 			} else {
4817 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4818 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4819 			}
4820 			if (VRatioPrefetchY[k] <= 1) {
4821 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4822 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4823 			} else {
4824 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4825 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4826 			}
4827 		} else {
4828 			CursorRequestDeliveryTime[k] = 0;
4829 			CursorRequestDeliveryTimePrefetch[k] = 0;
4830 		}
4831 #ifdef __DML_VBA_DEBUG__
4832 		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4833 				__func__, k, NumberOfCursors[k]);
4834 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4835 				__func__, k, CursorRequestDeliveryTime[k]);
4836 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4837 				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
4838 #endif
4839 	}
4840 } // CalculatePixelDeliveryTimes
4841 
4842 void dml32_CalculateMetaAndPTETimes(
4843 		bool use_one_row_for_frame[],
4844 		unsigned int NumberOfActiveSurfaces,
4845 		bool GPUVMEnable,
4846 		unsigned int MetaChunkSize,
4847 		unsigned int MinMetaChunkSizeBytes,
4848 		unsigned int    HTotal[],
4849 		double  VRatio[],
4850 		double  VRatioChroma[],
4851 		double  DestinationLinesToRequestRowInVBlank[],
4852 		double  DestinationLinesToRequestRowInImmediateFlip[],
4853 		bool DCCEnable[],
4854 		double  PixelClock[],
4855 		unsigned int BytePerPixelY[],
4856 		unsigned int BytePerPixelC[],
4857 		enum dm_rotation_angle SourceRotation[],
4858 		unsigned int dpte_row_height[],
4859 		unsigned int dpte_row_height_chroma[],
4860 		unsigned int meta_row_width[],
4861 		unsigned int meta_row_width_chroma[],
4862 		unsigned int meta_row_height[],
4863 		unsigned int meta_row_height_chroma[],
4864 		unsigned int meta_req_width[],
4865 		unsigned int meta_req_width_chroma[],
4866 		unsigned int meta_req_height[],
4867 		unsigned int meta_req_height_chroma[],
4868 		unsigned int dpte_group_bytes[],
4869 		unsigned int    PTERequestSizeY[],
4870 		unsigned int    PTERequestSizeC[],
4871 		unsigned int    PixelPTEReqWidthY[],
4872 		unsigned int    PixelPTEReqHeightY[],
4873 		unsigned int    PixelPTEReqWidthC[],
4874 		unsigned int    PixelPTEReqHeightC[],
4875 		unsigned int    dpte_row_width_luma_ub[],
4876 		unsigned int    dpte_row_width_chroma_ub[],
4877 
4878 		/* Output */
4879 		double DST_Y_PER_PTE_ROW_NOM_L[],
4880 		double DST_Y_PER_PTE_ROW_NOM_C[],
4881 		double DST_Y_PER_META_ROW_NOM_L[],
4882 		double DST_Y_PER_META_ROW_NOM_C[],
4883 		double TimePerMetaChunkNominal[],
4884 		double TimePerChromaMetaChunkNominal[],
4885 		double TimePerMetaChunkVBlank[],
4886 		double TimePerChromaMetaChunkVBlank[],
4887 		double TimePerMetaChunkFlip[],
4888 		double TimePerChromaMetaChunkFlip[],
4889 		double time_per_pte_group_nom_luma[],
4890 		double time_per_pte_group_vblank_luma[],
4891 		double time_per_pte_group_flip_luma[],
4892 		double time_per_pte_group_nom_chroma[],
4893 		double time_per_pte_group_vblank_chroma[],
4894 		double time_per_pte_group_flip_chroma[])
4895 {
4896 	unsigned int   meta_chunk_width;
4897 	unsigned int   min_meta_chunk_width;
4898 	unsigned int   meta_chunk_per_row_int;
4899 	unsigned int   meta_row_remainder;
4900 	unsigned int   meta_chunk_threshold;
4901 	unsigned int   meta_chunks_per_row_ub;
4902 	unsigned int   meta_chunk_width_chroma;
4903 	unsigned int   min_meta_chunk_width_chroma;
4904 	unsigned int   meta_chunk_per_row_int_chroma;
4905 	unsigned int   meta_row_remainder_chroma;
4906 	unsigned int   meta_chunk_threshold_chroma;
4907 	unsigned int   meta_chunks_per_row_ub_chroma;
4908 	unsigned int   dpte_group_width_luma;
4909 	unsigned int   dpte_groups_per_row_luma_ub;
4910 	unsigned int   dpte_group_width_chroma;
4911 	unsigned int   dpte_groups_per_row_chroma_ub;
4912 	unsigned int k;
4913 
4914 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4915 		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4916 		if (BytePerPixelC[k] == 0)
4917 			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4918 		else
4919 			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4920 		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4921 		if (BytePerPixelC[k] == 0)
4922 			DST_Y_PER_META_ROW_NOM_C[k] = 0;
4923 		else
4924 			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4925 	}
4926 
4927 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4928 		if (DCCEnable[k] == true) {
4929 			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4930 			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4931 			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4932 			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4933 			if (!IsVertical(SourceRotation[k]))
4934 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4935 			else
4936 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4937 
4938 			if (meta_row_remainder <= meta_chunk_threshold)
4939 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4940 			else
4941 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4942 
4943 			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4944 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4945 			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4946 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4947 			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4948 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4949 			if (BytePerPixelC[k] == 0) {
4950 				TimePerChromaMetaChunkNominal[k] = 0;
4951 				TimePerChromaMetaChunkVBlank[k] = 0;
4952 				TimePerChromaMetaChunkFlip[k] = 0;
4953 			} else {
4954 				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4955 						meta_row_height_chroma[k];
4956 				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4957 						meta_row_height_chroma[k];
4958 				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4959 						meta_chunk_width_chroma;
4960 				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4961 				if (!IsVertical(SourceRotation[k])) {
4962 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4963 							meta_req_width_chroma[k];
4964 				} else {
4965 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4966 							meta_req_height_chroma[k];
4967 				}
4968 				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4969 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4970 				else
4971 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4972 
4973 				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4974 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4975 				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4976 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4977 				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4978 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4979 			}
4980 		} else {
4981 			TimePerMetaChunkNominal[k] = 0;
4982 			TimePerMetaChunkVBlank[k] = 0;
4983 			TimePerMetaChunkFlip[k] = 0;
4984 			TimePerChromaMetaChunkNominal[k] = 0;
4985 			TimePerChromaMetaChunkVBlank[k] = 0;
4986 			TimePerChromaMetaChunkFlip[k] = 0;
4987 		}
4988 	}
4989 
4990 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4991 		if (GPUVMEnable == true) {
4992 			if (!IsVertical(SourceRotation[k])) {
4993 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
4994 						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
4995 			} else {
4996 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
4997 						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
4998 			}
4999 
5000 			if (use_one_row_for_frame[k]) {
5001 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5002 						(double) dpte_group_width_luma / 2.0, 1.0);
5003 			} else {
5004 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5005 						(double) dpte_group_width_luma, 1.0);
5006 			}
5007 #ifdef __DML_VBA_DEBUG__
5008 			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5009 					__func__, k, use_one_row_for_frame[k]);
5010 			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5011 					__func__, k, dpte_group_bytes[k]);
5012 			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5013 					__func__, k, PTERequestSizeY[k]);
5014 			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5015 					__func__, k, PixelPTEReqWidthY[k]);
5016 			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5017 					__func__, k, PixelPTEReqHeightY[k]);
5018 			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5019 					__func__, k, dpte_row_width_luma_ub[k]);
5020 			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5021 					__func__, k, dpte_group_width_luma);
5022 			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5023 					__func__, k, dpte_groups_per_row_luma_ub);
5024 #endif
5025 
5026 			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5027 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5028 			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5029 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5030 			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5031 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5032 			if (BytePerPixelC[k] == 0) {
5033 				time_per_pte_group_nom_chroma[k] = 0;
5034 				time_per_pte_group_vblank_chroma[k] = 0;
5035 				time_per_pte_group_flip_chroma[k] = 0;
5036 			} else {
5037 				if (!IsVertical(SourceRotation[k])) {
5038 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5039 							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5040 				} else {
5041 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5042 							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5043 				}
5044 
5045 				if (use_one_row_for_frame[k]) {
5046 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5047 							(double) dpte_group_width_chroma / 2.0, 1.0);
5048 				} else {
5049 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5050 							(double) dpte_group_width_chroma, 1.0);
5051 				}
5052 #ifdef __DML_VBA_DEBUG__
5053 				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5054 						__func__, k, dpte_row_width_chroma_ub[k]);
5055 				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5056 						__func__, k, dpte_group_width_chroma);
5057 				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5058 						__func__, k, dpte_groups_per_row_chroma_ub);
5059 #endif
5060 				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5061 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5062 				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5063 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5064 				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5065 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5066 			}
5067 		} else {
5068 			time_per_pte_group_nom_luma[k] = 0;
5069 			time_per_pte_group_vblank_luma[k] = 0;
5070 			time_per_pte_group_flip_luma[k] = 0;
5071 			time_per_pte_group_nom_chroma[k] = 0;
5072 			time_per_pte_group_vblank_chroma[k] = 0;
5073 			time_per_pte_group_flip_chroma[k] = 0;
5074 		}
5075 #ifdef __DML_VBA_DEBUG__
5076 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5077 				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
5078 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5079 				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5080 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5081 				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5082 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5083 				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5084 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5085 				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5086 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5087 				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5088 		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5089 				__func__, k, TimePerMetaChunkNominal[k]);
5090 		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5091 				__func__, k, TimePerMetaChunkVBlank[k]);
5092 		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5093 				__func__, k, TimePerMetaChunkFlip[k]);
5094 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5095 				__func__, k, TimePerChromaMetaChunkNominal[k]);
5096 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5097 				__func__, k, TimePerChromaMetaChunkVBlank[k]);
5098 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5099 				__func__, k, TimePerChromaMetaChunkFlip[k]);
5100 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5101 				__func__, k, time_per_pte_group_nom_luma[k]);
5102 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5103 				__func__, k, time_per_pte_group_vblank_luma[k]);
5104 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5105 				__func__, k, time_per_pte_group_flip_luma[k]);
5106 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5107 				__func__, k, time_per_pte_group_nom_chroma[k]);
5108 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5109 				__func__, k, time_per_pte_group_vblank_chroma[k]);
5110 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5111 				__func__, k, time_per_pte_group_flip_chroma[k]);
5112 #endif
5113 	}
5114 } // CalculateMetaAndPTETimes
5115 
5116 void dml32_CalculateVMGroupAndRequestTimes(
5117 		unsigned int     NumberOfActiveSurfaces,
5118 		bool     GPUVMEnable,
5119 		unsigned int     GPUVMMaxPageTableLevels,
5120 		unsigned int     HTotal[],
5121 		unsigned int     BytePerPixelC[],
5122 		double      DestinationLinesToRequestVMInVBlank[],
5123 		double      DestinationLinesToRequestVMInImmediateFlip[],
5124 		bool     DCCEnable[],
5125 		double      PixelClock[],
5126 		unsigned int        dpte_row_width_luma_ub[],
5127 		unsigned int        dpte_row_width_chroma_ub[],
5128 		unsigned int     vm_group_bytes[],
5129 		unsigned int     dpde0_bytes_per_frame_ub_l[],
5130 		unsigned int     dpde0_bytes_per_frame_ub_c[],
5131 		unsigned int        meta_pte_bytes_per_frame_ub_l[],
5132 		unsigned int        meta_pte_bytes_per_frame_ub_c[],
5133 
5134 		/* Output */
5135 		double      TimePerVMGroupVBlank[],
5136 		double      TimePerVMGroupFlip[],
5137 		double      TimePerVMRequestVBlank[],
5138 		double      TimePerVMRequestFlip[])
5139 {
5140 	unsigned int k;
5141 	unsigned int   num_group_per_lower_vm_stage;
5142 	unsigned int   num_req_per_lower_vm_stage;
5143 
5144 #ifdef __DML_VBA_DEBUG__
5145 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5146 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5147 #endif
5148 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5149 
5150 #ifdef __DML_VBA_DEBUG__
5151 		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5152 		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5153 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5154 				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
5155 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5156 				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
5157 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5158 				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5159 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5160 				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5161 #endif
5162 
5163 		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5164 			if (DCCEnable[k] == false) {
5165 				if (BytePerPixelC[k] > 0) {
5166 					num_group_per_lower_vm_stage = dml_ceil(
5167 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5168 							(double) (vm_group_bytes[k]), 1.0) +
5169 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5170 							(double) (vm_group_bytes[k]), 1.0);
5171 				} else {
5172 					num_group_per_lower_vm_stage = dml_ceil(
5173 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5174 							(double) (vm_group_bytes[k]), 1.0);
5175 				}
5176 			} else {
5177 				if (GPUVMMaxPageTableLevels == 1) {
5178 					if (BytePerPixelC[k] > 0) {
5179 						num_group_per_lower_vm_stage = dml_ceil(
5180 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5181 							(double) (vm_group_bytes[k]), 1.0) +
5182 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5183 							(double) (vm_group_bytes[k]), 1.0);
5184 					} else {
5185 						num_group_per_lower_vm_stage = dml_ceil(
5186 								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5187 								(double) (vm_group_bytes[k]), 1.0);
5188 					}
5189 				} else {
5190 					if (BytePerPixelC[k] > 0) {
5191 						num_group_per_lower_vm_stage = 2 + dml_ceil(
5192 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5193 							(double) (vm_group_bytes[k]), 1) +
5194 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5195 							(double) (vm_group_bytes[k]), 1) +
5196 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5197 							(double) (vm_group_bytes[k]), 1) +
5198 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5199 							(double) (vm_group_bytes[k]), 1);
5200 					} else {
5201 						num_group_per_lower_vm_stage = 1 + dml_ceil(
5202 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5203 							(double) (vm_group_bytes[k]), 1) + dml_ceil(
5204 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5205 							(double) (vm_group_bytes[k]), 1);
5206 					}
5207 				}
5208 			}
5209 
5210 			if (DCCEnable[k] == false) {
5211 				if (BytePerPixelC[k] > 0) {
5212 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5213 							dpde0_bytes_per_frame_ub_c[k] / 64;
5214 				} else {
5215 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5216 				}
5217 			} else {
5218 				if (GPUVMMaxPageTableLevels == 1) {
5219 					if (BytePerPixelC[k] > 0) {
5220 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5221 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5222 					} else {
5223 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5224 					}
5225 				} else {
5226 					if (BytePerPixelC[k] > 0) {
5227 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5228 								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5229 								meta_pte_bytes_per_frame_ub_l[k] / 64 +
5230 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5231 					} else {
5232 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5233 								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5234 					}
5235 				}
5236 			}
5237 
5238 			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5239 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5240 			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5241 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5242 			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5243 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5244 			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5245 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5246 
5247 			if (GPUVMMaxPageTableLevels > 2) {
5248 				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5249 				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5250 				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5251 				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5252 			}
5253 
5254 		} else {
5255 			TimePerVMGroupVBlank[k] = 0;
5256 			TimePerVMGroupFlip[k] = 0;
5257 			TimePerVMRequestVBlank[k] = 0;
5258 			TimePerVMRequestFlip[k] = 0;
5259 		}
5260 
5261 #ifdef __DML_VBA_DEBUG__
5262 		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5263 		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5264 		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5265 		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5266 #endif
5267 	}
5268 } // CalculateVMGroupAndRequestTimes
5269 
5270 void dml32_CalculateDCCConfiguration(
5271 		bool             DCCEnabled,
5272 		bool             DCCProgrammingAssumesScanDirectionUnknown,
5273 		enum source_format_class SourcePixelFormat,
5274 		unsigned int             SurfaceWidthLuma,
5275 		unsigned int             SurfaceWidthChroma,
5276 		unsigned int             SurfaceHeightLuma,
5277 		unsigned int             SurfaceHeightChroma,
5278 		unsigned int                nomDETInKByte,
5279 		unsigned int             RequestHeight256ByteLuma,
5280 		unsigned int             RequestHeight256ByteChroma,
5281 		enum dm_swizzle_mode     TilingFormat,
5282 		unsigned int             BytePerPixelY,
5283 		unsigned int             BytePerPixelC,
5284 		double              BytePerPixelDETY,
5285 		double              BytePerPixelDETC,
5286 		enum dm_rotation_angle   SourceRotation,
5287 		/* Output */
5288 		unsigned int        *MaxUncompressedBlockLuma,
5289 		unsigned int        *MaxUncompressedBlockChroma,
5290 		unsigned int        *MaxCompressedBlockLuma,
5291 		unsigned int        *MaxCompressedBlockChroma,
5292 		unsigned int        *IndependentBlockLuma,
5293 		unsigned int        *IndependentBlockChroma)
5294 {
5295 	typedef enum {
5296 		REQ_256Bytes,
5297 		REQ_128BytesNonContiguous,
5298 		REQ_128BytesContiguous,
5299 		REQ_NA
5300 	} RequestType;
5301 
5302 	RequestType   RequestLuma;
5303 	RequestType   RequestChroma;
5304 
5305 	unsigned int   segment_order_horz_contiguous_luma;
5306 	unsigned int   segment_order_horz_contiguous_chroma;
5307 	unsigned int   segment_order_vert_contiguous_luma;
5308 	unsigned int   segment_order_vert_contiguous_chroma;
5309 	unsigned int req128_horz_wc_l;
5310 	unsigned int req128_horz_wc_c;
5311 	unsigned int req128_vert_wc_l;
5312 	unsigned int req128_vert_wc_c;
5313 	unsigned int MAS_vp_horz_limit;
5314 	unsigned int MAS_vp_vert_limit;
5315 	unsigned int max_vp_horz_width;
5316 	unsigned int max_vp_vert_height;
5317 	unsigned int eff_surf_width_l;
5318 	unsigned int eff_surf_width_c;
5319 	unsigned int eff_surf_height_l;
5320 	unsigned int eff_surf_height_c;
5321 	unsigned int full_swath_bytes_horz_wc_l;
5322 	unsigned int full_swath_bytes_horz_wc_c;
5323 	unsigned int full_swath_bytes_vert_wc_l;
5324 	unsigned int full_swath_bytes_vert_wc_c;
5325 	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5326 
5327 	unsigned int   yuv420;
5328 	unsigned int   horz_div_l;
5329 	unsigned int   horz_div_c;
5330 	unsigned int   vert_div_l;
5331 	unsigned int   vert_div_c;
5332 
5333 	unsigned int     swath_buf_size;
5334 	double   detile_buf_vp_horz_limit;
5335 	double   detile_buf_vp_vert_limit;
5336 
5337 	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5338 			SourcePixelFormat == dm_420_12) ? 1 : 0);
5339 	horz_div_l = 1;
5340 	horz_div_c = 1;
5341 	vert_div_l = 1;
5342 	vert_div_c = 1;
5343 
5344 	if (BytePerPixelY == 1)
5345 		vert_div_l = 0;
5346 	if (BytePerPixelC == 1)
5347 		vert_div_c = 0;
5348 
5349 	if (BytePerPixelC == 0) {
5350 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5351 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5352 				BytePerPixelY / (1 + horz_div_l));
5353 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5354 				(1 + vert_div_l));
5355 	} else {
5356 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5357 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5358 				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5359 				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5360 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5361 				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5362 				(1 + vert_div_c) / (1 + yuv420));
5363 	}
5364 
5365 	if (SourcePixelFormat == dm_420_10) {
5366 		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5367 		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5368 	}
5369 
5370 	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5371 	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5372 
5373 	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5374 	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5375 	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5376 	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5377 	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5378 	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5379 	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5380 	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5381 
5382 	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5383 	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5384 	if (BytePerPixelC > 0) {
5385 		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5386 		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5387 	} else {
5388 		full_swath_bytes_horz_wc_c = 0;
5389 		full_swath_bytes_vert_wc_c = 0;
5390 	}
5391 
5392 	if (SourcePixelFormat == dm_420_10) {
5393 		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5394 		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5395 		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5396 		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5397 	}
5398 
5399 	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5400 		req128_horz_wc_l = 0;
5401 		req128_horz_wc_c = 0;
5402 	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5403 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5404 		req128_horz_wc_l = 0;
5405 		req128_horz_wc_c = 1;
5406 	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5407 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5408 		req128_horz_wc_l = 1;
5409 		req128_horz_wc_c = 0;
5410 	} else {
5411 		req128_horz_wc_l = 1;
5412 		req128_horz_wc_c = 1;
5413 	}
5414 
5415 	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5416 		req128_vert_wc_l = 0;
5417 		req128_vert_wc_c = 0;
5418 	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5419 			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5420 		req128_vert_wc_l = 0;
5421 		req128_vert_wc_c = 1;
5422 	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5423 			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5424 		req128_vert_wc_l = 1;
5425 		req128_vert_wc_c = 0;
5426 	} else {
5427 		req128_vert_wc_l = 1;
5428 		req128_vert_wc_c = 1;
5429 	}
5430 
5431 	if (BytePerPixelY == 2) {
5432 		segment_order_horz_contiguous_luma = 0;
5433 		segment_order_vert_contiguous_luma = 1;
5434 	} else {
5435 		segment_order_horz_contiguous_luma = 1;
5436 		segment_order_vert_contiguous_luma = 0;
5437 	}
5438 
5439 	if (BytePerPixelC == 2) {
5440 		segment_order_horz_contiguous_chroma = 0;
5441 		segment_order_vert_contiguous_chroma = 1;
5442 	} else {
5443 		segment_order_horz_contiguous_chroma = 1;
5444 		segment_order_vert_contiguous_chroma = 0;
5445 	}
5446 #ifdef __DML_VBA_DEBUG__
5447 	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5448 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5449 	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5450 	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5451 	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5452 	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5453 	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5454 	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5455 	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5456 			__func__, segment_order_horz_contiguous_chroma);
5457 #endif
5458 
5459 	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5460 		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5461 			RequestLuma = REQ_256Bytes;
5462 		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5463 				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5464 			RequestLuma = REQ_128BytesNonContiguous;
5465 		else
5466 			RequestLuma = REQ_128BytesContiguous;
5467 
5468 		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5469 			RequestChroma = REQ_256Bytes;
5470 		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5471 				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5472 			RequestChroma = REQ_128BytesNonContiguous;
5473 		else
5474 			RequestChroma = REQ_128BytesContiguous;
5475 
5476 	} else if (!IsVertical(SourceRotation)) {
5477 		if (req128_horz_wc_l == 0)
5478 			RequestLuma = REQ_256Bytes;
5479 		else if (segment_order_horz_contiguous_luma == 0)
5480 			RequestLuma = REQ_128BytesNonContiguous;
5481 		else
5482 			RequestLuma = REQ_128BytesContiguous;
5483 
5484 		if (req128_horz_wc_c == 0)
5485 			RequestChroma = REQ_256Bytes;
5486 		else if (segment_order_horz_contiguous_chroma == 0)
5487 			RequestChroma = REQ_128BytesNonContiguous;
5488 		else
5489 			RequestChroma = REQ_128BytesContiguous;
5490 
5491 	} else {
5492 		if (req128_vert_wc_l == 0)
5493 			RequestLuma = REQ_256Bytes;
5494 		else if (segment_order_vert_contiguous_luma == 0)
5495 			RequestLuma = REQ_128BytesNonContiguous;
5496 		else
5497 			RequestLuma = REQ_128BytesContiguous;
5498 
5499 		if (req128_vert_wc_c == 0)
5500 			RequestChroma = REQ_256Bytes;
5501 		else if (segment_order_vert_contiguous_chroma == 0)
5502 			RequestChroma = REQ_128BytesNonContiguous;
5503 		else
5504 			RequestChroma = REQ_128BytesContiguous;
5505 	}
5506 
5507 	if (RequestLuma == REQ_256Bytes) {
5508 		*MaxUncompressedBlockLuma = 256;
5509 		*MaxCompressedBlockLuma = 256;
5510 		*IndependentBlockLuma = 0;
5511 	} else if (RequestLuma == REQ_128BytesContiguous) {
5512 		*MaxUncompressedBlockLuma = 256;
5513 		*MaxCompressedBlockLuma = 128;
5514 		*IndependentBlockLuma = 128;
5515 	} else {
5516 		*MaxUncompressedBlockLuma = 256;
5517 		*MaxCompressedBlockLuma = 64;
5518 		*IndependentBlockLuma = 64;
5519 	}
5520 
5521 	if (RequestChroma == REQ_256Bytes) {
5522 		*MaxUncompressedBlockChroma = 256;
5523 		*MaxCompressedBlockChroma = 256;
5524 		*IndependentBlockChroma = 0;
5525 	} else if (RequestChroma == REQ_128BytesContiguous) {
5526 		*MaxUncompressedBlockChroma = 256;
5527 		*MaxCompressedBlockChroma = 128;
5528 		*IndependentBlockChroma = 128;
5529 	} else {
5530 		*MaxUncompressedBlockChroma = 256;
5531 		*MaxCompressedBlockChroma = 64;
5532 		*IndependentBlockChroma = 64;
5533 	}
5534 
5535 	if (DCCEnabled != true || BytePerPixelC == 0) {
5536 		*MaxUncompressedBlockChroma = 0;
5537 		*MaxCompressedBlockChroma = 0;
5538 		*IndependentBlockChroma = 0;
5539 	}
5540 
5541 	if (DCCEnabled != true) {
5542 		*MaxUncompressedBlockLuma = 0;
5543 		*MaxCompressedBlockLuma = 0;
5544 		*IndependentBlockLuma = 0;
5545 	}
5546 
5547 #ifdef __DML_VBA_DEBUG__
5548 	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5549 	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5550 	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5551 	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5552 	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5553 	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5554 #endif
5555 
5556 } // CalculateDCCConfiguration
5557 
5558 void dml32_CalculateStutterEfficiency(
5559 		unsigned int      CompressedBufferSizeInkByte,
5560 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5561 		bool   UnboundedRequestEnabled,
5562 		unsigned int      MetaFIFOSizeInKEntries,
5563 		unsigned int      ZeroSizeBufferEntries,
5564 		unsigned int      PixelChunkSizeInKByte,
5565 		unsigned int   NumberOfActiveSurfaces,
5566 		unsigned int      ROBBufferSizeInKByte,
5567 		double    TotalDataReadBandwidth,
5568 		double    DCFCLK,
5569 		double    ReturnBW,
5570 		unsigned int      CompbufReservedSpace64B,
5571 		unsigned int      CompbufReservedSpaceZs,
5572 		double    SRExitTime,
5573 		double    SRExitZ8Time,
5574 		bool   SynchronizeTimingsFinal,
5575 		unsigned int   BlendingAndTiming[],
5576 		double    StutterEnterPlusExitWatermark,
5577 		double    Z8StutterEnterPlusExitWatermark,
5578 		bool   ProgressiveToInterlaceUnitInOPP,
5579 		bool   Interlace[],
5580 		double    MinTTUVBlank[],
5581 		unsigned int   DPPPerSurface[],
5582 		unsigned int      DETBufferSizeY[],
5583 		unsigned int   BytePerPixelY[],
5584 		double    BytePerPixelDETY[],
5585 		double      SwathWidthY[],
5586 		unsigned int   SwathHeightY[],
5587 		unsigned int   SwathHeightC[],
5588 		double    NetDCCRateLuma[],
5589 		double    NetDCCRateChroma[],
5590 		double    DCCFractionOfZeroSizeRequestsLuma[],
5591 		double    DCCFractionOfZeroSizeRequestsChroma[],
5592 		unsigned int      HTotal[],
5593 		unsigned int      VTotal[],
5594 		double    PixelClock[],
5595 		double    VRatio[],
5596 		enum dm_rotation_angle SourceRotation[],
5597 		unsigned int   BlockHeight256BytesY[],
5598 		unsigned int   BlockWidth256BytesY[],
5599 		unsigned int   BlockHeight256BytesC[],
5600 		unsigned int   BlockWidth256BytesC[],
5601 		unsigned int   DCCYMaxUncompressedBlock[],
5602 		unsigned int   DCCCMaxUncompressedBlock[],
5603 		unsigned int      VActive[],
5604 		bool   DCCEnable[],
5605 		bool   WritebackEnable[],
5606 		double    ReadBandwidthSurfaceLuma[],
5607 		double    ReadBandwidthSurfaceChroma[],
5608 		double    meta_row_bw[],
5609 		double    dpte_row_bw[],
5610 
5611 		/* Output */
5612 		double   *StutterEfficiencyNotIncludingVBlank,
5613 		double   *StutterEfficiency,
5614 		unsigned int     *NumberOfStutterBurstsPerFrame,
5615 		double   *Z8StutterEfficiencyNotIncludingVBlank,
5616 		double   *Z8StutterEfficiency,
5617 		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5618 		double   *StutterPeriod,
5619 		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5620 {
5621 
5622 	bool FoundCriticalSurface = false;
5623 	unsigned int SwathSizeCriticalSurface = 0;
5624 	unsigned int LastChunkOfSwathSize;
5625 	unsigned int MissingPartOfLastSwathOfDETSize;
5626 	double LastZ8StutterPeriod = 0.0;
5627 	double LastStutterPeriod = 0.0;
5628 	unsigned int TotalNumberOfActiveOTG = 0;
5629 	double doublePixelClock;
5630 	unsigned int doubleHTotal;
5631 	unsigned int doubleVTotal;
5632 	bool SameTiming = true;
5633 	double DETBufferingTimeY;
5634 	double SwathWidthYCriticalSurface = 0.0;
5635 	double SwathHeightYCriticalSurface = 0.0;
5636 	double VActiveTimeCriticalSurface = 0.0;
5637 	double FrameTimeCriticalSurface = 0.0;
5638 	unsigned int BytePerPixelYCriticalSurface = 0;
5639 	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5640 	unsigned int DETBufferSizeYCriticalSurface = 0;
5641 	double MinTTUVBlankCriticalSurface = 0.0;
5642 	unsigned int BlockWidth256BytesYCriticalSurface = 0;
5643 	bool doublePlaneCriticalSurface = 0;
5644 	bool doublePipeCriticalSurface = 0;
5645 	double TotalCompressedReadBandwidth;
5646 	double TotalRowReadBandwidth;
5647 	double AverageDCCCompressionRate;
5648 	double EffectiveCompressedBufferSize;
5649 	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5650 	double StutterBurstTime;
5651 	unsigned int TotalActiveWriteback;
5652 	double LinesInDETY;
5653 	double LinesInDETYRoundedDownToSwath;
5654 	double MaximumEffectiveCompressionLuma;
5655 	double MaximumEffectiveCompressionChroma;
5656 	double TotalZeroSizeRequestReadBandwidth;
5657 	double TotalZeroSizeCompressedReadBandwidth;
5658 	double AverageDCCZeroSizeFraction;
5659 	double AverageZeroSizeCompressionRate;
5660 	unsigned int k;
5661 
5662 	TotalZeroSizeRequestReadBandwidth = 0;
5663 	TotalZeroSizeCompressedReadBandwidth = 0;
5664 	TotalRowReadBandwidth = 0;
5665 	TotalCompressedReadBandwidth = 0;
5666 
5667 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5668 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5669 			if (DCCEnable[k] == true) {
5670 				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5671 						|| (!IsVertical(SourceRotation[k])
5672 								&& BlockHeight256BytesY[k] > SwathHeightY[k])
5673 						|| DCCYMaxUncompressedBlock[k] < 256) {
5674 					MaximumEffectiveCompressionLuma = 2;
5675 				} else {
5676 					MaximumEffectiveCompressionLuma = 4;
5677 				}
5678 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5679 						+ ReadBandwidthSurfaceLuma[k]
5680 								/ dml_min(NetDCCRateLuma[k],
5681 										MaximumEffectiveCompressionLuma);
5682 #ifdef __DML_VBA_DEBUG__
5683 				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5684 						__func__, k, ReadBandwidthSurfaceLuma[k]);
5685 				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5686 						__func__, k, NetDCCRateLuma[k]);
5687 				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5688 						__func__, k, MaximumEffectiveCompressionLuma);
5689 #endif
5690 				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5691 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5692 				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5693 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5694 								/ MaximumEffectiveCompressionLuma;
5695 
5696 				if (ReadBandwidthSurfaceChroma[k] > 0) {
5697 					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5698 							|| (!IsVertical(SourceRotation[k])
5699 									&& BlockHeight256BytesC[k] > SwathHeightC[k])
5700 							|| DCCCMaxUncompressedBlock[k] < 256) {
5701 						MaximumEffectiveCompressionChroma = 2;
5702 					} else {
5703 						MaximumEffectiveCompressionChroma = 4;
5704 					}
5705 					TotalCompressedReadBandwidth =
5706 							TotalCompressedReadBandwidth
5707 							+ ReadBandwidthSurfaceChroma[k]
5708 							/ dml_min(NetDCCRateChroma[k],
5709 							MaximumEffectiveCompressionChroma);
5710 #ifdef __DML_VBA_DEBUG__
5711 					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5712 							__func__, k, ReadBandwidthSurfaceChroma[k]);
5713 					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5714 							__func__, k, NetDCCRateChroma[k]);
5715 					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5716 							__func__, k, MaximumEffectiveCompressionChroma);
5717 #endif
5718 					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5719 							+ ReadBandwidthSurfaceChroma[k]
5720 									* DCCFractionOfZeroSizeRequestsChroma[k];
5721 					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5722 							+ ReadBandwidthSurfaceChroma[k]
5723 									* DCCFractionOfZeroSizeRequestsChroma[k]
5724 									/ MaximumEffectiveCompressionChroma;
5725 				}
5726 			} else {
5727 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5728 						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5729 			}
5730 			TotalRowReadBandwidth = TotalRowReadBandwidth
5731 					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5732 		}
5733 	}
5734 
5735 	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5736 	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5737 
5738 #ifdef __DML_VBA_DEBUG__
5739 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5740 	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5741 	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5742 	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5743 			__func__, TotalZeroSizeCompressedReadBandwidth);
5744 	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5745 	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5746 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5747 	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5748 	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5749 	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5750 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5751 #endif
5752 	if (AverageDCCZeroSizeFraction == 1) {
5753 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5754 				/ TotalZeroSizeCompressedReadBandwidth;
5755 		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5756 				* AverageZeroSizeCompressionRate
5757 				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5758 						* AverageZeroSizeCompressionRate;
5759 	} else if (AverageDCCZeroSizeFraction > 0) {
5760 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5761 				/ TotalZeroSizeCompressedReadBandwidth;
5762 		EffectiveCompressedBufferSize = dml_min(
5763 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5764 				(double) MetaFIFOSizeInKEntries * 1024 * 64
5765 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5766 					+ 1 / AverageDCCCompressionRate))
5767 					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5768 					* AverageDCCCompressionRate,
5769 					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5770 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5771 
5772 #ifdef __DML_VBA_DEBUG__
5773 		dml_print("DML::%s: min 1 = %f\n", __func__,
5774 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5775 		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5776 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5777 						AverageDCCCompressionRate));
5778 		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5779 				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5780 		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5781 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5782 #endif
5783 	} else {
5784 		EffectiveCompressedBufferSize = dml_min(
5785 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5786 				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5787 				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5788 						* AverageDCCCompressionRate;
5789 
5790 #ifdef __DML_VBA_DEBUG__
5791 		dml_print("DML::%s: min 1 = %f\n", __func__,
5792 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5793 		dml_print("DML::%s: min 2 = %f\n", __func__,
5794 				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5795 #endif
5796 	}
5797 
5798 #ifdef __DML_VBA_DEBUG__
5799 	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5800 	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5801 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5802 #endif
5803 
5804 	*StutterPeriod = 0;
5805 
5806 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5807 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5808 			LinesInDETY = ((double) DETBufferSizeY[k]
5809 					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5810 							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5811 					/ BytePerPixelDETY[k] / SwathWidthY[k];
5812 			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5813 			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5814 					/ VRatio[k];
5815 #ifdef __DML_VBA_DEBUG__
5816 			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5817 			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5818 			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5819 			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5820 					__func__, k, ReadBandwidthSurfaceLuma[k]);
5821 			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5822 			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5823 			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5824 					__func__, k, LinesInDETYRoundedDownToSwath);
5825 			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5826 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5827 			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5828 			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5829 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5830 #endif
5831 
5832 			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5833 				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5834 
5835 				FoundCriticalSurface = true;
5836 				*StutterPeriod = DETBufferingTimeY;
5837 				FrameTimeCriticalSurface = (
5838 						isInterlaceTiming ?
5839 								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5840 						* (double) HTotal[k] / PixelClock[k];
5841 				VActiveTimeCriticalSurface = (
5842 						isInterlaceTiming ?
5843 								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5844 						* (double) HTotal[k] / PixelClock[k];
5845 				BytePerPixelYCriticalSurface = BytePerPixelY[k];
5846 				SwathWidthYCriticalSurface = SwathWidthY[k];
5847 				SwathHeightYCriticalSurface = SwathHeightY[k];
5848 				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5849 				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5850 						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
5851 				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5852 				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5853 				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5854 				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5855 
5856 #ifdef __DML_VBA_DEBUG__
5857 				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5858 						__func__, k, FoundCriticalSurface);
5859 				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5860 						__func__, k, *StutterPeriod);
5861 				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5862 						__func__, k, MinTTUVBlankCriticalSurface);
5863 				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5864 						__func__, k, FrameTimeCriticalSurface);
5865 				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5866 						__func__, k, VActiveTimeCriticalSurface);
5867 				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5868 						__func__, k, BytePerPixelYCriticalSurface);
5869 				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5870 						__func__, k, SwathWidthYCriticalSurface);
5871 				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5872 						__func__, k, SwathHeightYCriticalSurface);
5873 				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5874 						__func__, k, BlockWidth256BytesYCriticalSurface);
5875 				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5876 						__func__, k, doublePlaneCriticalSurface);
5877 				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5878 						__func__, k, doublePipeCriticalSurface);
5879 				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5880 						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5881 #endif
5882 			}
5883 		}
5884 	}
5885 
5886 	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5887 			EffectiveCompressedBufferSize);
5888 #ifdef __DML_VBA_DEBUG__
5889 	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5890 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5891 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5892 			__func__, *StutterPeriod * TotalDataReadBandwidth);
5893 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5894 	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5895 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5896 	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5897 	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5898 	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5899 	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5900 #endif
5901 
5902 	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5903 			/ ReturnBW
5904 			+ (*StutterPeriod * TotalDataReadBandwidth
5905 					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5906 			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5907 #ifdef __DML_VBA_DEBUG__
5908 	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5909 			AverageDCCCompressionRate / ReturnBW);
5910 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5911 			__func__, (*StutterPeriod * TotalDataReadBandwidth));
5912 	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5913 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5914 	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5915 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5916 #endif
5917 	StutterBurstTime = dml_max(StutterBurstTime,
5918 			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5919 					* SwathWidthYCriticalSurface / ReturnBW);
5920 
5921 #ifdef __DML_VBA_DEBUG__
5922 	dml_print("DML::%s: Time to finish residue swath=%f\n",
5923 			__func__,
5924 			LinesToFinishSwathTransferStutterCriticalSurface *
5925 			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5926 #endif
5927 
5928 	TotalActiveWriteback = 0;
5929 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5930 		if (WritebackEnable[k])
5931 			TotalActiveWriteback = TotalActiveWriteback + 1;
5932 	}
5933 
5934 	if (TotalActiveWriteback == 0) {
5935 #ifdef __DML_VBA_DEBUG__
5936 		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5937 		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5938 		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5939 		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5940 #endif
5941 		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5942 				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5943 		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5944 				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5945 		*NumberOfStutterBurstsPerFrame = (
5946 				*StutterEfficiencyNotIncludingVBlank > 0 ?
5947 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5948 		*Z8NumberOfStutterBurstsPerFrame = (
5949 				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5950 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5951 	} else {
5952 		*StutterEfficiencyNotIncludingVBlank = 0.;
5953 		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
5954 		*NumberOfStutterBurstsPerFrame = 0;
5955 		*Z8NumberOfStutterBurstsPerFrame = 0;
5956 	}
5957 #ifdef __DML_VBA_DEBUG__
5958 	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5959 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5960 			__func__, *StutterEfficiencyNotIncludingVBlank);
5961 	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5962 			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
5963 	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5964 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5965 #endif
5966 
5967 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5968 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5969 			if (BlendingAndTiming[k] == k) {
5970 				if (TotalNumberOfActiveOTG == 0) {
5971 					doublePixelClock = PixelClock[k];
5972 					doubleHTotal = HTotal[k];
5973 					doubleVTotal = VTotal[k];
5974 				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5975 						|| doubleVTotal != VTotal[k]) {
5976 					SameTiming = false;
5977 				}
5978 				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5979 			}
5980 		}
5981 	}
5982 
5983 	if (*StutterEfficiencyNotIncludingVBlank > 0) {
5984 		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5985 
5986 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5987 				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5988 			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5989 						+ StutterBurstTime * VActiveTimeCriticalSurface
5990 						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5991 		} else {
5992 			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
5993 		}
5994 	} else {
5995 		*StutterEfficiency = 0;
5996 	}
5997 
5998 	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
5999 		LastZ8StutterPeriod = VActiveTimeCriticalSurface
6000 				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6001 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6002 				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6003 			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6004 				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6005 		} else {
6006 			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6007 		}
6008 	} else {
6009 		*Z8StutterEfficiency = 0.;
6010 	}
6011 
6012 #ifdef __DML_VBA_DEBUG__
6013 	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6014 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6015 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6016 	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6017 	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6018 	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6019 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6020 			__func__, *StutterEfficiencyNotIncludingVBlank);
6021 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6022 #endif
6023 
6024 	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6025 			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6026 	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6027 	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6028 			- DETBufferSizeYCriticalSurface;
6029 
6030 	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6031 			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6032 			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6033 			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6034 
6035 #ifdef __DML_VBA_DEBUG__
6036 	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6037 	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6038 	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6039 	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6040 #endif
6041 } // CalculateStutterEfficiency
6042 
6043 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6044 		unsigned int    ConfigReturnBufferSizeInKByte,
6045 		unsigned int    ROBBufferSizeInKByte,
6046 		unsigned int MaxNumDPP,
6047 		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6048 		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6049 
6050 		/* Output */
6051 		unsigned int *MaxTotalDETInKByte,
6052 		unsigned int *nomDETInKByte,
6053 		unsigned int *MinCompressedBufferSizeInKByte)
6054 {
6055 	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6056 	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6057 
6058 	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6059 			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6060 	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6061 	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6062 
6063 #ifdef __DML_VBA_DEBUG__
6064 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6065 	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6066 	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6067 	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6068 	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6069 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6070 #endif
6071 
6072 	if (det_buff_size_override_en) {
6073 		*nomDETInKByte = det_buff_size_override_val;
6074 #ifdef __DML_VBA_DEBUG__
6075 		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6076 #endif
6077 	}
6078 } // CalculateMaxDETAndMinCompressedBufferSize
6079 
6080 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6081 		double ReturnBW,
6082 		bool NotUrgentLatencyHiding[],
6083 		double ReadBandwidthLuma[],
6084 		double ReadBandwidthChroma[],
6085 		double cursor_bw[],
6086 		double meta_row_bandwidth[],
6087 		double dpte_row_bandwidth[],
6088 		unsigned int NumberOfDPP[],
6089 		double UrgentBurstFactorLuma[],
6090 		double UrgentBurstFactorChroma[],
6091 		double UrgentBurstFactorCursor[])
6092 {
6093 	unsigned int k;
6094 	bool NotEnoughUrgentLatencyHiding = false;
6095 	bool CalculateVActiveBandwithSupport_val = false;
6096 	double VActiveBandwith = 0;
6097 
6098 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6099 		if (NotUrgentLatencyHiding[k]) {
6100 			NotEnoughUrgentLatencyHiding = true;
6101 		}
6102 	}
6103 
6104 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6105 		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6106 	}
6107 
6108 	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6109 
6110 #ifdef __DML_VBA_DEBUG__
6111 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6112 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6113 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6114 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6115 #endif
6116 	return CalculateVActiveBandwithSupport_val;
6117 }
6118 
6119 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6120 		double ReturnBW,
6121 		bool NotUrgentLatencyHiding[],
6122 		double ReadBandwidthLuma[],
6123 		double ReadBandwidthChroma[],
6124 		double PrefetchBandwidthLuma[],
6125 		double PrefetchBandwidthChroma[],
6126 		double cursor_bw[],
6127 		double meta_row_bandwidth[],
6128 		double dpte_row_bandwidth[],
6129 		double cursor_bw_pre[],
6130 		double prefetch_vmrow_bw[],
6131 		unsigned int NumberOfDPP[],
6132 		double UrgentBurstFactorLuma[],
6133 		double UrgentBurstFactorChroma[],
6134 		double UrgentBurstFactorCursor[],
6135 		double UrgentBurstFactorLumaPre[],
6136 		double UrgentBurstFactorChromaPre[],
6137 		double UrgentBurstFactorCursorPre[],
6138 
6139 		/* output */
6140 		double  *PrefetchBandwidth,
6141 		double  *FractionOfUrgentBandwidth,
6142 		bool *PrefetchBandwidthSupport)
6143 {
6144 	unsigned int k;
6145 	bool NotEnoughUrgentLatencyHiding = false;
6146 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6147 		if (NotUrgentLatencyHiding[k]) {
6148 			NotEnoughUrgentLatencyHiding = true;
6149 		}
6150 	}
6151 
6152 	*PrefetchBandwidth = 0;
6153 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6154 		*PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6155 				ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6156 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6157 	}
6158 
6159 	*PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6160 	*FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6161 }
6162 
6163 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6164 		double ReturnBW,
6165 		double ReadBandwidthLuma[],
6166 		double ReadBandwidthChroma[],
6167 		double PrefetchBandwidthLuma[],
6168 		double PrefetchBandwidthChroma[],
6169 		double cursor_bw[],
6170 		double cursor_bw_pre[],
6171 		unsigned int NumberOfDPP[],
6172 		double UrgentBurstFactorLuma[],
6173 		double UrgentBurstFactorChroma[],
6174 		double UrgentBurstFactorCursor[],
6175 		double UrgentBurstFactorLumaPre[],
6176 		double UrgentBurstFactorChromaPre[],
6177 		double UrgentBurstFactorCursorPre[])
6178 {
6179 	unsigned int k;
6180 	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6181 
6182 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6183 		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6184 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6185 	}
6186 
6187 	return CalculateBandwidthAvailableForImmediateFlip_val;
6188 }
6189 
6190 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6191 		double ReturnBW,
6192 		enum immediate_flip_requirement ImmediateFlipRequirement[],
6193 		double final_flip_bw[],
6194 		double ReadBandwidthLuma[],
6195 		double ReadBandwidthChroma[],
6196 		double PrefetchBandwidthLuma[],
6197 		double PrefetchBandwidthChroma[],
6198 		double cursor_bw[],
6199 		double meta_row_bandwidth[],
6200 		double dpte_row_bandwidth[],
6201 		double cursor_bw_pre[],
6202 		double prefetch_vmrow_bw[],
6203 		unsigned int NumberOfDPP[],
6204 		double UrgentBurstFactorLuma[],
6205 		double UrgentBurstFactorChroma[],
6206 		double UrgentBurstFactorCursor[],
6207 		double UrgentBurstFactorLumaPre[],
6208 		double UrgentBurstFactorChromaPre[],
6209 		double UrgentBurstFactorCursorPre[],
6210 
6211 		/* output */
6212 		double  *TotalBandwidth,
6213 		double  *FractionOfUrgentBandwidth,
6214 		bool *ImmediateFlipBandwidthSupport)
6215 {
6216 	unsigned int k;
6217 	*TotalBandwidth = 0;
6218 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6219 		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6220 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6221 					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6222 					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6223 		} else {
6224 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6225 					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6226 					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6227 		}
6228 	}
6229 	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6230 	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6231 }
6232