1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29 
30 unsigned int dml32_dscceComputeDelay(
31 		unsigned int bpc,
32 		double BPP,
33 		unsigned int sliceWidth,
34 		unsigned int numSlices,
35 		enum output_format_class pixelFormat,
36 		enum output_encoder_class Output)
37 {
38 	// valid bpc         = source bits per component in the set of {8, 10, 12}
39 	// valid bpp         = increments of 1/16 of a bit
40 	//                    min = 6/7/8 in N420/N422/444, respectively
41 	//                    max = such that compression is 1:1
42 	//valid sliceWidth  = number of pixels per slice line,
43 	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
44 	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
45 	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
46 
47 	// fixed value
48 	unsigned int rcModelSize = 8192;
49 
50 	// N422/N420 operate at 2 pixels per clock
51 	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
52 	Delay, pixels;
53 
54 	if (pixelFormat == dm_420)
55 		pixelsPerClock = 2;
56 	else if (pixelFormat == dm_n422)
57 		pixelsPerClock = 2;
58 	// #all other modes operate at 1 pixel per clock
59 	else
60 		pixelsPerClock = 1;
61 
62 	//initial transmit delay as per PPS
63 	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
64 
65 	//compute ssm delay
66 	if (bpc == 8)
67 		D = 81;
68 	else if (bpc == 10)
69 		D = 89;
70 	else
71 		D = 113;
72 
73 	//divide by pixel per cycle to compute slice width as seen by DSC
74 	w = sliceWidth / pixelsPerClock;
75 
76 	//422 mode has an additional cycle of delay
77 	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
78 		s = 0;
79 	else
80 		s = 1;
81 
82 	//main calculation for the dscce
83 	ix = initalXmitDelay + 45;
84 	wx = (w + 2) / 3;
85 	p = 3 * wx - w;
86 	l0 = ix / w;
87 	a = ix + p * l0;
88 	ax = (a + 2) / 3 + D + 6 + 1;
89 	L = (ax + wx - 1) / wx;
90 	if ((ix % w) == 0 && p != 0)
91 		lstall = 1;
92 	else
93 		lstall = 0;
94 	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
95 
96 	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
97 	pixels = Delay * 3 * pixelsPerClock;
98 
99 #ifdef __DML_VBA_DEBUG__
100 	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
101 	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
102 	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
103 	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
104 	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
105 	dml_print("DML::%s: Output: %d\n", __func__, Output);
106 	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
107 #endif
108 
109 	return pixels;
110 }
111 
112 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
113 {
114 	unsigned int Delay = 0;
115 
116 	if (pixelFormat == dm_420) {
117 		//   sfr
118 		Delay = Delay + 2;
119 		//   dsccif
120 		Delay = Delay + 0;
121 		//   dscc - input deserializer
122 		Delay = Delay + 3;
123 		//   dscc gets pixels every other cycle
124 		Delay = Delay + 2;
125 		//   dscc - input cdc fifo
126 		Delay = Delay + 12;
127 		//   dscc gets pixels every other cycle
128 		Delay = Delay + 13;
129 		//   dscc - cdc uncertainty
130 		Delay = Delay + 2;
131 		//   dscc - output cdc fifo
132 		Delay = Delay + 7;
133 		//   dscc gets pixels every other cycle
134 		Delay = Delay + 3;
135 		//   dscc - cdc uncertainty
136 		Delay = Delay + 2;
137 		//   dscc - output serializer
138 		Delay = Delay + 1;
139 		//   sft
140 		Delay = Delay + 1;
141 	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
142 		//   sfr
143 		Delay = Delay + 2;
144 		//   dsccif
145 		Delay = Delay + 1;
146 		//   dscc - input deserializer
147 		Delay = Delay + 5;
148 		//  dscc - input cdc fifo
149 		Delay = Delay + 25;
150 		//   dscc - cdc uncertainty
151 		Delay = Delay + 2;
152 		//   dscc - output cdc fifo
153 		Delay = Delay + 10;
154 		//   dscc - cdc uncertainty
155 		Delay = Delay + 2;
156 		//   dscc - output serializer
157 		Delay = Delay + 1;
158 		//   sft
159 		Delay = Delay + 1;
160 	} else {
161 		//   sfr
162 		Delay = Delay + 2;
163 		//   dsccif
164 		Delay = Delay + 0;
165 		//   dscc - input deserializer
166 		Delay = Delay + 3;
167 		//   dscc - input cdc fifo
168 		Delay = Delay + 12;
169 		//   dscc - cdc uncertainty
170 		Delay = Delay + 2;
171 		//   dscc - output cdc fifo
172 		Delay = Delay + 7;
173 		//   dscc - output serializer
174 		Delay = Delay + 1;
175 		//   dscc - cdc uncertainty
176 		Delay = Delay + 2;
177 		//   sft
178 		Delay = Delay + 1;
179 	}
180 
181 	return Delay;
182 }
183 
184 
185 bool IsVertical(enum dm_rotation_angle Scan)
186 {
187 	bool is_vert = false;
188 
189 	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
190 		is_vert = true;
191 	else
192 		is_vert = false;
193 	return is_vert;
194 }
195 
196 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
197 		double HRatio,
198 		double HRatioChroma,
199 		double VRatio,
200 		double VRatioChroma,
201 		double MaxDCHUBToPSCLThroughput,
202 		double MaxPSCLToLBThroughput,
203 		double PixelClock,
204 		enum source_format_class SourcePixelFormat,
205 		unsigned int HTaps,
206 		unsigned int HTapsChroma,
207 		unsigned int VTaps,
208 		unsigned int VTapsChroma,
209 
210 		/* output */
211 		double *PSCL_THROUGHPUT,
212 		double *PSCL_THROUGHPUT_CHROMA,
213 		double *DPPCLKUsingSingleDPP)
214 {
215 	double DPPCLKUsingSingleDPPLuma;
216 	double DPPCLKUsingSingleDPPChroma;
217 
218 	if (HRatio > 1) {
219 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
220 				dml_ceil((double) HTaps / 6.0, 1.0));
221 	} else {
222 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
223 	}
224 
225 	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
226 			*PSCL_THROUGHPUT, 1);
227 
228 	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
229 		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
230 
231 	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
232 			SourcePixelFormat != dm_rgbe_alpha)) {
233 		*PSCL_THROUGHPUT_CHROMA = 0;
234 		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
235 	} else {
236 		if (HRatioChroma > 1) {
237 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
238 					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
239 		} else {
240 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
241 		}
242 		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
243 				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
244 		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
245 			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
246 		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
247 	}
248 }
249 
250 void dml32_CalculateBytePerPixelAndBlockSizes(
251 		enum source_format_class SourcePixelFormat,
252 		enum dm_swizzle_mode SurfaceTiling,
253 
254 		/* Output */
255 		unsigned int *BytePerPixelY,
256 		unsigned int *BytePerPixelC,
257 		double  *BytePerPixelDETY,
258 		double  *BytePerPixelDETC,
259 		unsigned int *BlockHeight256BytesY,
260 		unsigned int *BlockHeight256BytesC,
261 		unsigned int *BlockWidth256BytesY,
262 		unsigned int *BlockWidth256BytesC,
263 		unsigned int *MacroTileHeightY,
264 		unsigned int *MacroTileHeightC,
265 		unsigned int *MacroTileWidthY,
266 		unsigned int *MacroTileWidthC)
267 {
268 	if (SourcePixelFormat == dm_444_64) {
269 		*BytePerPixelDETY = 8;
270 		*BytePerPixelDETC = 0;
271 		*BytePerPixelY = 8;
272 		*BytePerPixelC = 0;
273 	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
274 		*BytePerPixelDETY = 4;
275 		*BytePerPixelDETC = 0;
276 		*BytePerPixelY = 4;
277 		*BytePerPixelC = 0;
278 	} else if (SourcePixelFormat == dm_444_16) {
279 		*BytePerPixelDETY = 2;
280 		*BytePerPixelDETC = 0;
281 		*BytePerPixelY = 2;
282 		*BytePerPixelC = 0;
283 	} else if (SourcePixelFormat == dm_444_8) {
284 		*BytePerPixelDETY = 1;
285 		*BytePerPixelDETC = 0;
286 		*BytePerPixelY = 1;
287 		*BytePerPixelC = 0;
288 	} else if (SourcePixelFormat == dm_rgbe_alpha) {
289 		*BytePerPixelDETY = 4;
290 		*BytePerPixelDETC = 1;
291 		*BytePerPixelY = 4;
292 		*BytePerPixelC = 1;
293 	} else if (SourcePixelFormat == dm_420_8) {
294 		*BytePerPixelDETY = 1;
295 		*BytePerPixelDETC = 2;
296 		*BytePerPixelY = 1;
297 		*BytePerPixelC = 2;
298 	} else if (SourcePixelFormat == dm_420_12) {
299 		*BytePerPixelDETY = 2;
300 		*BytePerPixelDETC = 4;
301 		*BytePerPixelY = 2;
302 		*BytePerPixelC = 4;
303 	} else {
304 		*BytePerPixelDETY = 4.0 / 3;
305 		*BytePerPixelDETC = 8.0 / 3;
306 		*BytePerPixelY = 2;
307 		*BytePerPixelC = 4;
308 	}
309 #ifdef __DML_VBA_DEBUG__
310 	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
311 	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
312 	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
313 	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
314 	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
315 #endif
316 	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
317 			|| SourcePixelFormat == dm_444_16
318 			|| SourcePixelFormat == dm_444_8
319 			|| SourcePixelFormat == dm_mono_16
320 			|| SourcePixelFormat == dm_mono_8
321 			|| SourcePixelFormat == dm_rgbe)) {
322 		if (SurfaceTiling == dm_sw_linear)
323 			*BlockHeight256BytesY = 1;
324 		else if (SourcePixelFormat == dm_444_64)
325 			*BlockHeight256BytesY = 4;
326 		else if (SourcePixelFormat == dm_444_8)
327 			*BlockHeight256BytesY = 16;
328 		else
329 			*BlockHeight256BytesY = 8;
330 
331 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
332 		*BlockHeight256BytesC = 0;
333 		*BlockWidth256BytesC = 0;
334 	} else {
335 		if (SurfaceTiling == dm_sw_linear) {
336 			*BlockHeight256BytesY = 1;
337 			*BlockHeight256BytesC = 1;
338 		} else if (SourcePixelFormat == dm_rgbe_alpha) {
339 			*BlockHeight256BytesY = 8;
340 			*BlockHeight256BytesC = 16;
341 		} else if (SourcePixelFormat == dm_420_8) {
342 			*BlockHeight256BytesY = 16;
343 			*BlockHeight256BytesC = 8;
344 		} else {
345 			*BlockHeight256BytesY = 8;
346 			*BlockHeight256BytesC = 8;
347 		}
348 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
349 		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
350 	}
351 #ifdef __DML_VBA_DEBUG__
352 	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
353 	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
354 	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
355 	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
356 #endif
357 
358 	if (SurfaceTiling == dm_sw_linear) {
359 		*MacroTileHeightY = *BlockHeight256BytesY;
360 		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
361 		*MacroTileHeightC = *BlockHeight256BytesC;
362 		if (*MacroTileHeightC == 0)
363 			*MacroTileWidthC = 0;
364 		else
365 			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
366 	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
367 			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
368 		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
369 		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
370 		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
371 		if (*MacroTileHeightC == 0)
372 			*MacroTileWidthC = 0;
373 		else
374 			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
375 	} else {
376 		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
377 		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
378 		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
379 		if (*MacroTileHeightC == 0)
380 			*MacroTileWidthC = 0;
381 		else
382 			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
383 	}
384 
385 #ifdef __DML_VBA_DEBUG__
386 	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
387 	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
388 	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
389 	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
390 #endif
391 } // CalculateBytePerPixelAndBlockSizes
392 
393 void dml32_CalculateSwathAndDETConfiguration(
394 		unsigned int DETSizeOverride[],
395 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
396 		unsigned int ConfigReturnBufferSizeInKByte,
397 		unsigned int MaxTotalDETInKByte,
398 		unsigned int MinCompressedBufferSizeInKByte,
399 		double ForceSingleDPP,
400 		unsigned int NumberOfActiveSurfaces,
401 		unsigned int nomDETInKByte,
402 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
403 		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
404 		unsigned int PixelChunkSizeKBytes,
405 		unsigned int ROBSizeKBytes,
406 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
407 		enum output_encoder_class Output[],
408 		double ReadBandwidthLuma[],
409 		double ReadBandwidthChroma[],
410 		double MaximumSwathWidthLuma[],
411 		double MaximumSwathWidthChroma[],
412 		enum dm_rotation_angle SourceRotation[],
413 		bool ViewportStationary[],
414 		enum source_format_class SourcePixelFormat[],
415 		enum dm_swizzle_mode SurfaceTiling[],
416 		unsigned int ViewportWidth[],
417 		unsigned int ViewportHeight[],
418 		unsigned int ViewportXStart[],
419 		unsigned int ViewportYStart[],
420 		unsigned int ViewportXStartC[],
421 		unsigned int ViewportYStartC[],
422 		unsigned int SurfaceWidthY[],
423 		unsigned int SurfaceWidthC[],
424 		unsigned int SurfaceHeightY[],
425 		unsigned int SurfaceHeightC[],
426 		unsigned int Read256BytesBlockHeightY[],
427 		unsigned int Read256BytesBlockHeightC[],
428 		unsigned int Read256BytesBlockWidthY[],
429 		unsigned int Read256BytesBlockWidthC[],
430 		enum odm_combine_mode ODMMode[],
431 		unsigned int BlendingAndTiming[],
432 		unsigned int BytePerPixY[],
433 		unsigned int BytePerPixC[],
434 		double BytePerPixDETY[],
435 		double BytePerPixDETC[],
436 		unsigned int HActive[],
437 		double HRatio[],
438 		double HRatioChroma[],
439 		unsigned int DPPPerSurface[],
440 
441 		/* Output */
442 		unsigned int swath_width_luma_ub[],
443 		unsigned int swath_width_chroma_ub[],
444 		double SwathWidth[],
445 		double SwathWidthChroma[],
446 		unsigned int SwathHeightY[],
447 		unsigned int SwathHeightC[],
448 		unsigned int DETBufferSizeInKByte[],
449 		unsigned int DETBufferSizeY[],
450 		unsigned int DETBufferSizeC[],
451 		bool *UnboundedRequestEnabled,
452 		unsigned int *CompressedBufferSizeInkByte,
453 		unsigned int *CompBufReservedSpaceKBytes,
454 		bool *CompBufReservedSpaceNeedAdjustment,
455 		bool ViewportSizeSupportPerSurface[],
456 		bool *ViewportSizeSupport)
457 {
458 	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
459 	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
460 	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
461 	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
462 	unsigned int RoundedUpSwathSizeBytesY;
463 	unsigned int RoundedUpSwathSizeBytesC;
464 	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
465 	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
466 	unsigned int k;
467 	unsigned int TotalActiveDPP = 0;
468 	bool NoChromaSurfaces = true;
469 	unsigned int DETBufferSizeInKByteForSwathCalculation;
470 
471 #ifdef __DML_VBA_DEBUG__
472 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
473 	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
474 	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
475 #endif
476 	dml32_CalculateSwathWidth(ForceSingleDPP,
477 			NumberOfActiveSurfaces,
478 			SourcePixelFormat,
479 			SourceRotation,
480 			ViewportStationary,
481 			ViewportWidth,
482 			ViewportHeight,
483 			ViewportXStart,
484 			ViewportYStart,
485 			ViewportXStartC,
486 			ViewportYStartC,
487 			SurfaceWidthY,
488 			SurfaceWidthC,
489 			SurfaceHeightY,
490 			SurfaceHeightC,
491 			ODMMode,
492 			BytePerPixY,
493 			BytePerPixC,
494 			Read256BytesBlockHeightY,
495 			Read256BytesBlockHeightC,
496 			Read256BytesBlockWidthY,
497 			Read256BytesBlockWidthC,
498 			BlendingAndTiming,
499 			HActive,
500 			HRatio,
501 			DPPPerSurface,
502 
503 			/* Output */
504 			SwathWidthdoubleDPP,
505 			SwathWidthdoubleDPPChroma,
506 			SwathWidth,
507 			SwathWidthChroma,
508 			MaximumSwathHeightY,
509 			MaximumSwathHeightC,
510 			swath_width_luma_ub,
511 			swath_width_chroma_ub);
512 
513 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
514 		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
515 		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
516 #ifdef __DML_VBA_DEBUG__
517 		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
518 		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
519 		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
520 		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
521 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
522 				RoundedUpMaxSwathSizeBytesY[k]);
523 		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
524 		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
525 		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
526 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
527 				RoundedUpMaxSwathSizeBytesC[k]);
528 #endif
529 
530 		if (SourcePixelFormat[k] == dm_420_10) {
531 			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
532 			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
533 		}
534 	}
535 
536 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
537 		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
538 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
539 				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
540 			NoChromaSurfaces = false;
541 		}
542 	}
543 
544 	// By default, just set the reserved space to 2 pixel chunks size
545 	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
546 
547 	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
548 	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
549 	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
550 	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
551 
552 	if (*CompBufReservedSpaceNeedAdjustment == 1) {
553 		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
554 	}
555 
556 	#ifdef __DML_VBA_DEBUG__
557 		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
558 		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
559 	#endif
560 
561 	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
562 
563 	dml32_CalculateDETBufferSize(DETSizeOverride,
564 			UseMALLForPStateChange,
565 			ForceSingleDPP,
566 			NumberOfActiveSurfaces,
567 			*UnboundedRequestEnabled,
568 			nomDETInKByte,
569 			MaxTotalDETInKByte,
570 			ConfigReturnBufferSizeInKByte,
571 			MinCompressedBufferSizeInKByte,
572 			CompressedBufferSegmentSizeInkByteFinal,
573 			SourcePixelFormat,
574 			ReadBandwidthLuma,
575 			ReadBandwidthChroma,
576 			RoundedUpMaxSwathSizeBytesY,
577 			RoundedUpMaxSwathSizeBytesC,
578 			DPPPerSurface,
579 
580 			/* Output */
581 			DETBufferSizeInKByte,    // per hubp pipe
582 			CompressedBufferSizeInkByte);
583 
584 #ifdef __DML_VBA_DEBUG__
585 	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
586 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
587 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
588 	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
589 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
590 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
591 #endif
592 
593 	*ViewportSizeSupport = true;
594 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
595 
596 		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
597 				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
598 #ifdef __DML_VBA_DEBUG__
599 		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
600 				DETBufferSizeInKByteForSwathCalculation);
601 #endif
602 
603 		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
604 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
605 			SwathHeightY[k] = MaximumSwathHeightY[k];
606 			SwathHeightC[k] = MaximumSwathHeightC[k];
607 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
608 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
609 		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
610 				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
611 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
612 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
613 			SwathHeightC[k] = MaximumSwathHeightC[k];
614 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
615 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
616 		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
617 				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
618 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
619 			SwathHeightY[k] = MaximumSwathHeightY[k];
620 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
621 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
622 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
623 		} else {
624 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
625 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
626 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
627 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
628 		}
629 
630 		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
631 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
632 				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
633 						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
634 			*ViewportSizeSupport = false;
635 			ViewportSizeSupportPerSurface[k] = false;
636 		} else {
637 			ViewportSizeSupportPerSurface[k] = true;
638 		}
639 
640 		if (SwathHeightC[k] == 0) {
641 #ifdef __DML_VBA_DEBUG__
642 			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
643 #endif
644 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
645 			DETBufferSizeC[k] = 0;
646 		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
647 #ifdef __DML_VBA_DEBUG__
648 			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
649 #endif
650 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
651 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
652 		} else {
653 #ifdef __DML_VBA_DEBUG__
654 			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
655 #endif
656 			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
657 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
658 		}
659 
660 #ifdef __DML_VBA_DEBUG__
661 		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
662 		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
663 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
664 				k, RoundedUpMaxSwathSizeBytesY[k]);
665 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
666 				k, RoundedUpMaxSwathSizeBytesC[k]);
667 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
668 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
669 		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
670 		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
671 		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
672 		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
673 				ViewportSizeSupportPerSurface[k]);
674 #endif
675 
676 	}
677 } // CalculateSwathAndDETConfiguration
678 
679 void dml32_CalculateSwathWidth(
680 		bool				ForceSingleDPP,
681 		unsigned int			NumberOfActiveSurfaces,
682 		enum source_format_class	SourcePixelFormat[],
683 		enum dm_rotation_angle		SourceRotation[],
684 		bool				ViewportStationary[],
685 		unsigned int			ViewportWidth[],
686 		unsigned int			ViewportHeight[],
687 		unsigned int			ViewportXStart[],
688 		unsigned int			ViewportYStart[],
689 		unsigned int			ViewportXStartC[],
690 		unsigned int			ViewportYStartC[],
691 		unsigned int			SurfaceWidthY[],
692 		unsigned int			SurfaceWidthC[],
693 		unsigned int			SurfaceHeightY[],
694 		unsigned int			SurfaceHeightC[],
695 		enum odm_combine_mode		ODMMode[],
696 		unsigned int			BytePerPixY[],
697 		unsigned int			BytePerPixC[],
698 		unsigned int			Read256BytesBlockHeightY[],
699 		unsigned int			Read256BytesBlockHeightC[],
700 		unsigned int			Read256BytesBlockWidthY[],
701 		unsigned int			Read256BytesBlockWidthC[],
702 		unsigned int			BlendingAndTiming[],
703 		unsigned int			HActive[],
704 		double				HRatio[],
705 		unsigned int			DPPPerSurface[],
706 
707 		/* Output */
708 		double				SwathWidthdoubleDPPY[],
709 		double				SwathWidthdoubleDPPC[],
710 		double				SwathWidthY[], // per-pipe
711 		double				SwathWidthC[], // per-pipe
712 		unsigned int			MaximumSwathHeightY[],
713 		unsigned int			MaximumSwathHeightC[],
714 		unsigned int			swath_width_luma_ub[], // per-pipe
715 		unsigned int			swath_width_chroma_ub[]) // per-pipe
716 {
717 	unsigned int k, j;
718 	enum odm_combine_mode MainSurfaceODMMode;
719 
720 	unsigned int surface_width_ub_l;
721 	unsigned int surface_height_ub_l;
722 	unsigned int surface_width_ub_c;
723 	unsigned int surface_height_ub_c;
724 
725 #ifdef __DML_VBA_DEBUG__
726 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
727 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
728 #endif
729 
730 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
731 		if (!IsVertical(SourceRotation[k]))
732 			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
733 		else
734 			SwathWidthdoubleDPPY[k] = ViewportHeight[k];
735 
736 #ifdef __DML_VBA_DEBUG__
737 		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
738 		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
739 #endif
740 
741 		MainSurfaceODMMode = ODMMode[k];
742 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
743 			if (BlendingAndTiming[k] == j)
744 				MainSurfaceODMMode = ODMMode[j];
745 		}
746 
747 		if (ForceSingleDPP) {
748 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
749 		} else {
750 			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
751 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
752 						dml_round(HActive[k] / 4.0 * HRatio[k]));
753 			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
754 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
755 						dml_round(HActive[k] / 2.0 * HRatio[k]));
756 			} else if (DPPPerSurface[k] == 2) {
757 				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
758 			} else {
759 				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
760 			}
761 		}
762 
763 #ifdef __DML_VBA_DEBUG__
764 		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
765 		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
766 		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
767 		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
768 		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
769 #endif
770 
771 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
772 				SourcePixelFormat[k] == dm_420_12) {
773 			SwathWidthC[k] = SwathWidthY[k] / 2;
774 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
775 		} else {
776 			SwathWidthC[k] = SwathWidthY[k];
777 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
778 		}
779 
780 		if (ForceSingleDPP == true) {
781 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
782 			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
783 		}
784 
785 		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
786 		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
787 		surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
788 		surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
789 
790 #ifdef __DML_VBA_DEBUG__
791 		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
792 		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
793 		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
794 		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
795 		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
796 		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
797 		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
798 		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
799 		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
800 		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
801 #endif
802 
803 		if (!IsVertical(SourceRotation[k])) {
804 			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
805 			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
806 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
807 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
808 						dml_floor(ViewportXStart[k] +
809 								SwathWidthY[k] +
810 								Read256BytesBlockWidthY[k] - 1,
811 								Read256BytesBlockWidthY[k]) -
812 								dml_floor(ViewportXStart[k],
813 								Read256BytesBlockWidthY[k]));
814 			} else {
815 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
816 						dml_ceil(SwathWidthY[k] - 1,
817 								Read256BytesBlockWidthY[k]) +
818 								Read256BytesBlockWidthY[k]);
819 			}
820 			if (BytePerPixC[k] > 0) {
821 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
822 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
823 							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
824 									Read256BytesBlockWidthC[k] - 1,
825 									Read256BytesBlockWidthC[k]) -
826 									dml_floor(ViewportXStartC[k],
827 									Read256BytesBlockWidthC[k]));
828 				} else {
829 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
830 							dml_ceil(SwathWidthC[k] - 1,
831 								Read256BytesBlockWidthC[k]) +
832 								Read256BytesBlockWidthC[k]);
833 				}
834 			} else {
835 				swath_width_chroma_ub[k] = 0;
836 			}
837 		} else {
838 			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
839 			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
840 
841 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
842 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
843 						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
844 						Read256BytesBlockHeightY[k]) -
845 						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
846 			} else {
847 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
848 						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
849 			}
850 			if (BytePerPixC[k] > 0) {
851 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
852 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
853 							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
854 									Read256BytesBlockHeightC[k] - 1,
855 									Read256BytesBlockHeightC[k]) -
856 									dml_floor(ViewportYStartC[k],
857 											Read256BytesBlockHeightC[k]));
858 				} else {
859 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
860 							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
861 							Read256BytesBlockHeightC[k]);
862 				}
863 			} else {
864 				swath_width_chroma_ub[k] = 0;
865 			}
866 		}
867 
868 #ifdef __DML_VBA_DEBUG__
869 		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
870 		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
871 		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
872 		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
873 #endif
874 
875 	}
876 } // CalculateSwathWidth
877 
878 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
879 			unsigned int TotalNumberOfActiveDPP,
880 			bool NoChroma,
881 			enum output_encoder_class Output,
882 			enum dm_swizzle_mode SurfaceTiling,
883 			bool CompBufReservedSpaceNeedAdjustment,
884 			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
885 {
886 	bool ret_val = false;
887 
888 	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
889 			TotalNumberOfActiveDPP == 1 && NoChroma);
890 	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
891 		ret_val = false;
892 
893 	if (SurfaceTiling == dm_sw_linear)
894 		ret_val = false;
895 
896 	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
897 		ret_val = false;
898 
899 #ifdef __DML_VBA_DEBUG__
900 	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
901 	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
902 	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
903 #endif
904 
905 	return (ret_val);
906 }
907 
908 void dml32_CalculateDETBufferSize(
909 		unsigned int DETSizeOverride[],
910 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
911 		bool ForceSingleDPP,
912 		unsigned int NumberOfActiveSurfaces,
913 		bool UnboundedRequestEnabled,
914 		unsigned int nomDETInKByte,
915 		unsigned int MaxTotalDETInKByte,
916 		unsigned int ConfigReturnBufferSizeInKByte,
917 		unsigned int MinCompressedBufferSizeInKByte,
918 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
919 		enum source_format_class SourcePixelFormat[],
920 		double ReadBandwidthLuma[],
921 		double ReadBandwidthChroma[],
922 		unsigned int RoundedUpMaxSwathSizeBytesY[],
923 		unsigned int RoundedUpMaxSwathSizeBytesC[],
924 		unsigned int DPPPerSurface[],
925 		/* Output */
926 		unsigned int DETBufferSizeInKByte[],
927 		unsigned int *CompressedBufferSizeInkByte)
928 {
929 	unsigned int DETBufferSizePoolInKByte;
930 	unsigned int NextDETBufferPieceInKByte;
931 	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
932 	bool NextPotentialSurfaceToAssignDETPieceFound;
933 	unsigned int NextSurfaceToAssignDETPiece;
934 	double TotalBandwidth;
935 	double BandwidthOfSurfacesNotAssignedDETPiece;
936 	unsigned int max_minDET;
937 	unsigned int minDET;
938 	unsigned int minDET_pipe;
939 	unsigned int j, k;
940 
941 #ifdef __DML_VBA_DEBUG__
942 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
943 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
944 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
945 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
946 	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
947 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
948 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
949 	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
950 			CompressedBufferSegmentSizeInkByteFinal);
951 #endif
952 
953 	// Note: Will use default det size if that fits 2 swaths
954 	if (UnboundedRequestEnabled) {
955 		if (DETSizeOverride[0] > 0) {
956 			DETBufferSizeInKByte[0] = DETSizeOverride[0];
957 		} else {
958 			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
959 					((double) RoundedUpMaxSwathSizeBytesY[0] +
960 							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
961 		}
962 		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
963 	} else {
964 		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
965 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
966 			DETBufferSizeInKByte[k] = nomDETInKByte;
967 			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
968 					SourcePixelFormat[k] == dm_420_12) {
969 				max_minDET = nomDETInKByte - 64;
970 			} else {
971 				max_minDET = nomDETInKByte;
972 			}
973 			minDET = 128;
974 			minDET_pipe = 0;
975 
976 			// add DET resource until can hold 2 full swaths
977 			while (minDET <= max_minDET && minDET_pipe == 0) {
978 				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
979 						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
980 					minDET_pipe = minDET;
981 				minDET = minDET + 64;
982 			}
983 
984 #ifdef __DML_VBA_DEBUG__
985 			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
986 			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
987 			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
988 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
989 					RoundedUpMaxSwathSizeBytesY[k]);
990 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
991 					RoundedUpMaxSwathSizeBytesC[k]);
992 #endif
993 
994 			if (minDET_pipe == 0) {
995 				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
996 						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
997 #ifdef __DML_VBA_DEBUG__
998 				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
999 						__func__, k, minDET_pipe);
1000 #endif
1001 			}
1002 
1003 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1004 				DETBufferSizeInKByte[k] = 0;
1005 			} else if (DETSizeOverride[k] > 0) {
1006 				DETBufferSizeInKByte[k] = DETSizeOverride[k];
1007 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1008 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1009 			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1010 				DETBufferSizeInKByte[k] = minDET_pipe;
1011 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1012 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1013 			}
1014 
1015 #ifdef __DML_VBA_DEBUG__
1016 			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1017 			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1018 			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1019 			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1020 #endif
1021 		}
1022 
1023 		TotalBandwidth = 0;
1024 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1025 			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1026 				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1027 		}
1028 #ifdef __DML_VBA_DEBUG__
1029 		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1030 		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1031 			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1032 		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1033 		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1034 #endif
1035 		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1036 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1037 
1038 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1039 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1040 			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1041 					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1042 					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1043 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1044 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1045 						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1046 			} else {
1047 				DETPieceAssignedToThisSurfaceAlready[k] = false;
1048 			}
1049 #ifdef __DML_VBA_DEBUG__
1050 			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1051 					DETPieceAssignedToThisSurfaceAlready[k]);
1052 			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1053 					BandwidthOfSurfacesNotAssignedDETPiece);
1054 #endif
1055 		}
1056 
1057 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1058 			NextPotentialSurfaceToAssignDETPieceFound = false;
1059 			NextSurfaceToAssignDETPiece = 0;
1060 
1061 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1062 #ifdef __DML_VBA_DEBUG__
1063 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1064 						ReadBandwidthLuma[k]);
1065 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1066 						ReadBandwidthChroma[k]);
1067 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1068 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1069 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1070 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1071 				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1072 						NextSurfaceToAssignDETPiece);
1073 #endif
1074 				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1075 						(!NextPotentialSurfaceToAssignDETPieceFound ||
1076 						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1077 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1078 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1079 					NextSurfaceToAssignDETPiece = k;
1080 					NextPotentialSurfaceToAssignDETPieceFound = true;
1081 				}
1082 #ifdef __DML_VBA_DEBUG__
1083 				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1084 						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1085 				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1086 						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1087 #endif
1088 			}
1089 
1090 			if (NextPotentialSurfaceToAssignDETPieceFound) {
1091 				// Note: To show the banker's rounding behavior in VBA and also the fact
1092 				// that the DET buffer size varies due to precision issue
1093 				//
1094 				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
1095 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1096 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1097 				// BandwidthOfSurfacesNotAssignedDETPiece /
1098 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1099 				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1100 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1101 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1102 				 //BandwidthOfSurfacesNotAssignedDETPiece /
1103 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1104 				//
1105 				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1106 				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1107 
1108 				NextDETBufferPieceInKByte = dml_min(
1109 					dml_round((double) DETBufferSizePoolInKByte *
1110 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1111 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1112 						BandwidthOfSurfacesNotAssignedDETPiece /
1113 						((ForceSingleDPP ? 1 :
1114 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1115 						(ForceSingleDPP ? 1 :
1116 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1117 						dml_floor((double) DETBufferSizePoolInKByte,
1118 						(ForceSingleDPP ? 1 :
1119 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1120 
1121 				// Above calculation can assign the entire DET buffer allocation to a single pipe.
1122 				// We should limit the per-pipe DET size to the nominal / max per pipe.
1123 				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1124 					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1125 							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1126 						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1127 								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1128 					} else {
1129 						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1130 						// already has the max per-pipe value
1131 						NextDETBufferPieceInKByte = 0;
1132 					}
1133 				}
1134 
1135 #ifdef __DML_VBA_DEBUG__
1136 				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1137 					DETBufferSizePoolInKByte);
1138 				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1139 					NextSurfaceToAssignDETPiece);
1140 				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1141 					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1142 				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1143 					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1144 				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1145 					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1146 				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1147 					NextDETBufferPieceInKByte);
1148 				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1149 					__func__, j, NextSurfaceToAssignDETPiece,
1150 					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1151 #endif
1152 
1153 				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1154 						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1155 						+ NextDETBufferPieceInKByte
1156 						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1157 #ifdef __DML_VBA_DEBUG__
1158 				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1159 #endif
1160 
1161 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1162 				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1163 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1164 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1165 								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1166 			}
1167 		}
1168 		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1169 	}
1170 	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1171 
1172 #ifdef __DML_VBA_DEBUG__
1173 	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1174 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1175 	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1176 		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1177 				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1178 	}
1179 #endif
1180 } // CalculateDETBufferSize
1181 
1182 void dml32_CalculateODMMode(
1183 		unsigned int MaximumPixelsPerLinePerDSCUnit,
1184 		unsigned int HActive,
1185 		enum output_encoder_class Output,
1186 		enum odm_combine_policy ODMUse,
1187 		double StateDispclk,
1188 		double MaxDispclk,
1189 		bool DSCEnable,
1190 		unsigned int TotalNumberOfActiveDPP,
1191 		unsigned int MaxNumDPP,
1192 		double PixelClock,
1193 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 		double DISPCLKRampingMargin,
1195 		double DISPCLKDPPCLKVCOSpeed,
1196 
1197 		/* Output */
1198 		bool *TotalAvailablePipesSupport,
1199 		unsigned int *NumberOfDPP,
1200 		enum odm_combine_mode *ODMMode,
1201 		double *RequiredDISPCLKPerSurface)
1202 {
1203 
1204 	double SurfaceRequiredDISPCLKWithoutODMCombine;
1205 	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1206 	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1207 
1208 	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1209 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1210 			MaxDispclk);
1211 	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1212 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1213 			MaxDispclk);
1214 	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1215 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1216 			MaxDispclk);
1217 	*TotalAvailablePipesSupport = true;
1218 	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1219 
1220 	if (ODMUse == dm_odm_combine_policy_none)
1221 		*ODMMode = dm_odm_combine_mode_disabled;
1222 
1223 	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1224 	*NumberOfDPP = 0;
1225 
1226 	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1227 	// (ODMUse == "" || ODMUse == "CombineAsNeeded")
1228 
1229 	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1230 			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1231 					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit)))))) {
1232 		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1233 			*ODMMode = dm_odm_combine_mode_4to1;
1234 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1235 			*NumberOfDPP = 4;
1236 		} else {
1237 			*TotalAvailablePipesSupport = false;
1238 		}
1239 	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1240 			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1241 					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1242 					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit)))))) {
1243 		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1244 			*ODMMode = dm_odm_combine_mode_2to1;
1245 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1246 			*NumberOfDPP = 2;
1247 		} else {
1248 			*TotalAvailablePipesSupport = false;
1249 		}
1250 	} else {
1251 		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1252 			*NumberOfDPP = 1;
1253 		else
1254 			*TotalAvailablePipesSupport = false;
1255 	}
1256 }
1257 
1258 double dml32_CalculateRequiredDispclk(
1259 		enum odm_combine_mode ODMMode,
1260 		double PixelClock,
1261 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1262 		double DISPCLKRampingMargin,
1263 		double DISPCLKDPPCLKVCOSpeed,
1264 		double MaxDispclk)
1265 {
1266 	double RequiredDispclk = 0.;
1267 	double PixelClockAfterODM;
1268 	double DISPCLKWithRampingRoundedToDFSGranularity;
1269 	double DISPCLKWithoutRampingRoundedToDFSGranularity;
1270 	double MaxDispclkRoundedDownToDFSGranularity;
1271 
1272 	if (ODMMode == dm_odm_combine_mode_4to1)
1273 		PixelClockAfterODM = PixelClock / 4;
1274 	else if (ODMMode == dm_odm_combine_mode_2to1)
1275 		PixelClockAfterODM = PixelClock / 2;
1276 	else
1277 		PixelClockAfterODM = PixelClock;
1278 
1279 
1280 	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1281 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1282 					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1283 
1284 	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1285 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1286 
1287 	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1288 
1289 	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1290 		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1291 	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1292 		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1293 	else
1294 		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1295 
1296 	return RequiredDispclk;
1297 }
1298 
1299 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1300 {
1301 	if (Clock <= 0.0)
1302 		return 0.0;
1303 
1304 	if (round_up)
1305 		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1306 	else
1307 		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1308 }
1309 
1310 void dml32_CalculateOutputLink(
1311 		double PHYCLKPerState,
1312 		double PHYCLKD18PerState,
1313 		double PHYCLKD32PerState,
1314 		double Downspreading,
1315 		bool IsMainSurfaceUsingTheIndicatedTiming,
1316 		enum output_encoder_class Output,
1317 		enum output_format_class OutputFormat,
1318 		unsigned int HTotal,
1319 		unsigned int HActive,
1320 		double PixelClockBackEnd,
1321 		double ForcedOutputLinkBPP,
1322 		unsigned int DSCInputBitPerComponent,
1323 		unsigned int NumberOfDSCSlices,
1324 		double AudioSampleRate,
1325 		unsigned int AudioSampleLayout,
1326 		enum odm_combine_mode ODMModeNoDSC,
1327 		enum odm_combine_mode ODMModeDSC,
1328 		bool DSCEnable,
1329 		unsigned int OutputLinkDPLanes,
1330 		enum dm_output_link_dp_rate OutputLinkDPRate,
1331 
1332 		/* Output */
1333 		bool *RequiresDSC,
1334 		double *RequiresFEC,
1335 		double  *OutBpp,
1336 		enum dm_output_type *OutputType,
1337 		enum dm_output_rate *OutputRate,
1338 		unsigned int *RequiredSlots)
1339 {
1340 	bool LinkDSCEnable;
1341 	unsigned int dummy;
1342 	*RequiresDSC = false;
1343 	*RequiresFEC = false;
1344 	*OutBpp = 0;
1345 	*OutputType = dm_output_type_unknown;
1346 	*OutputRate = dm_output_rate_unknown;
1347 
1348 	if (IsMainSurfaceUsingTheIndicatedTiming) {
1349 		if (Output == dm_hdmi) {
1350 			*RequiresDSC = false;
1351 			*RequiresFEC = false;
1352 			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1353 					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1354 					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1355 					ODMModeNoDSC, ODMModeDSC, &dummy);
1356 			//OutputTypeAndRate = "HDMI";
1357 			*OutputType = dm_output_type_hdmi;
1358 
1359 		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1360 			if (DSCEnable == true) {
1361 				*RequiresDSC = true;
1362 				LinkDSCEnable = true;
1363 				if (Output == dm_dp || Output == dm_dp2p0)
1364 					*RequiresFEC = true;
1365 				else
1366 					*RequiresFEC = false;
1367 			} else {
1368 				*RequiresDSC = false;
1369 				LinkDSCEnable = false;
1370 				if (Output == dm_dp2p0)
1371 					*RequiresFEC = true;
1372 				else
1373 					*RequiresFEC = false;
1374 			}
1375 			if (Output == dm_dp2p0) {
1376 				*OutBpp = 0;
1377 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1378 						PHYCLKD32PerState >= 10000 / 32) {
1379 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1380 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1381 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1382 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1383 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1384 					if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1385 							ForcedOutputLinkBPP == 0) {
1386 						*RequiresDSC = true;
1387 						LinkDSCEnable = true;
1388 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1389 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1390 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1391 								OutputFormat, DSCInputBitPerComponent,
1392 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1393 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1394 					}
1395 					//OutputTypeAndRate = Output & " UHBR10";
1396 					*OutputType = dm_output_type_dp2p0;
1397 					*OutputRate = dm_output_rate_dp_rate_uhbr10;
1398 				}
1399 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1400 						*OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1401 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1402 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1403 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1404 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1405 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1406 
1407 					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1408 							ForcedOutputLinkBPP == 0) {
1409 						*RequiresDSC = true;
1410 						LinkDSCEnable = true;
1411 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1412 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1413 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1414 								OutputFormat, DSCInputBitPerComponent,
1415 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1416 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1417 					}
1418 					//OutputTypeAndRate = Output & " UHBR13p5";
1419 					*OutputType = dm_output_type_dp2p0;
1420 					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1421 				}
1422 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1423 						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1424 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1425 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1426 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1427 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1428 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1429 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1430 						*RequiresDSC = true;
1431 						LinkDSCEnable = true;
1432 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1433 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1434 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1435 								OutputFormat, DSCInputBitPerComponent,
1436 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1437 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1438 					}
1439 					//OutputTypeAndRate = Output & " UHBR20";
1440 					*OutputType = dm_output_type_dp2p0;
1441 					*OutputRate = dm_output_rate_dp_rate_uhbr20;
1442 				}
1443 			} else {
1444 				*OutBpp = 0;
1445 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1446 						PHYCLKPerState >= 270) {
1447 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1448 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1449 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1450 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1451 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1452 					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1453 							ForcedOutputLinkBPP == 0) {
1454 						*RequiresDSC = true;
1455 						LinkDSCEnable = true;
1456 						if (Output == dm_dp)
1457 							*RequiresFEC = true;
1458 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1459 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 								OutputFormat, DSCInputBitPerComponent,
1462 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 					}
1465 					//OutputTypeAndRate = Output & " HBR";
1466 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1467 					*OutputRate = dm_output_rate_dp_rate_hbr;
1468 				}
1469 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1470 						*OutBpp == 0 && PHYCLKPerState >= 540) {
1471 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1472 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1473 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1474 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1475 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1476 
1477 					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1478 							ForcedOutputLinkBPP == 0) {
1479 						*RequiresDSC = true;
1480 						LinkDSCEnable = true;
1481 						if (Output == dm_dp)
1482 							*RequiresFEC = true;
1483 
1484 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1485 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 								OutputFormat, DSCInputBitPerComponent,
1488 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 					}
1491 					//OutputTypeAndRate = Output & " HBR2";
1492 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 					*OutputRate = dm_output_rate_dp_rate_hbr2;
1494 				}
1495 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1496 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1497 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1498 							ForcedOutputLinkBPP, LinkDSCEnable, Output,
1499 							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1500 							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1501 							RequiredSlots);
1502 
1503 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1504 						*RequiresDSC = true;
1505 						LinkDSCEnable = true;
1506 						if (Output == dm_dp)
1507 							*RequiresFEC = true;
1508 
1509 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1510 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1511 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1512 								OutputFormat, DSCInputBitPerComponent,
1513 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1514 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1515 					}
1516 					//OutputTypeAndRate = Output & " HBR3";
1517 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1518 					*OutputRate = dm_output_rate_dp_rate_hbr3;
1519 				}
1520 			}
1521 		}
1522 	}
1523 }
1524 
1525 void dml32_CalculateDPPCLK(
1526 		unsigned int NumberOfActiveSurfaces,
1527 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1528 		double DISPCLKDPPCLKVCOSpeed,
1529 		double DPPCLKUsingSingleDPP[],
1530 		unsigned int DPPPerSurface[],
1531 
1532 		/* output */
1533 		double *GlobalDPPCLK,
1534 		double Dppclk[])
1535 {
1536 	unsigned int k;
1537 	*GlobalDPPCLK = 0;
1538 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1539 		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1540 		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1541 	}
1542 	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1543 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
1544 		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1545 }
1546 
1547 double dml32_TruncToValidBPP(
1548 		double LinkBitRate,
1549 		unsigned int Lanes,
1550 		unsigned int HTotal,
1551 		unsigned int HActive,
1552 		double PixelClock,
1553 		double DesiredBPP,
1554 		bool DSCEnable,
1555 		enum output_encoder_class Output,
1556 		enum output_format_class Format,
1557 		unsigned int DSCInputBitPerComponent,
1558 		unsigned int DSCSlices,
1559 		unsigned int AudioRate,
1560 		unsigned int AudioLayout,
1561 		enum odm_combine_mode ODMModeNoDSC,
1562 		enum odm_combine_mode ODMModeDSC,
1563 		/* Output */
1564 		unsigned int *RequiredSlots)
1565 {
1566 	double    MaxLinkBPP;
1567 	unsigned int   MinDSCBPP;
1568 	double    MaxDSCBPP;
1569 	unsigned int   NonDSCBPP0;
1570 	unsigned int   NonDSCBPP1;
1571 	unsigned int   NonDSCBPP2;
1572 	unsigned int   NonDSCBPP3;
1573 
1574 	if (Format == dm_420) {
1575 		NonDSCBPP0 = 12;
1576 		NonDSCBPP1 = 15;
1577 		NonDSCBPP2 = 18;
1578 		MinDSCBPP = 6;
1579 		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1580 	} else if (Format == dm_444) {
1581 		NonDSCBPP0 = 18;
1582 		NonDSCBPP1 = 24;
1583 		NonDSCBPP2 = 30;
1584 		NonDSCBPP3 = 36;
1585 		MinDSCBPP = 8;
1586 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1587 	} else {
1588 		if (Output == dm_hdmi) {
1589 			NonDSCBPP0 = 24;
1590 			NonDSCBPP1 = 24;
1591 			NonDSCBPP2 = 24;
1592 		} else {
1593 			NonDSCBPP0 = 16;
1594 			NonDSCBPP1 = 20;
1595 			NonDSCBPP2 = 24;
1596 		}
1597 		if (Format == dm_n422) {
1598 			MinDSCBPP = 7;
1599 			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1600 		} else {
1601 			MinDSCBPP = 8;
1602 			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1603 		}
1604 	}
1605 	if (Output == dm_dp2p0) {
1606 		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1607 	} else if (DSCEnable && Output == dm_dp) {
1608 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1609 	} else {
1610 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1611 	}
1612 
1613 	if (DSCEnable) {
1614 		if (ODMModeDSC == dm_odm_combine_mode_4to1)
1615 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1616 		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1617 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1618 		else if (ODMModeDSC == dm_odm_split_mode_1to2)
1619 			MaxLinkBPP = 2 * MaxLinkBPP;
1620 	} else {
1621 		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1622 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1623 		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1624 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1625 		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1626 			MaxLinkBPP = 2 * MaxLinkBPP;
1627 	}
1628 
1629 	if (DesiredBPP == 0) {
1630 		if (DSCEnable) {
1631 			if (MaxLinkBPP < MinDSCBPP)
1632 				return BPP_INVALID;
1633 			else if (MaxLinkBPP >= MaxDSCBPP)
1634 				return MaxDSCBPP;
1635 			else
1636 				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1637 		} else {
1638 			if (MaxLinkBPP >= NonDSCBPP3)
1639 				return NonDSCBPP3;
1640 			else if (MaxLinkBPP >= NonDSCBPP2)
1641 				return NonDSCBPP2;
1642 			else if (MaxLinkBPP >= NonDSCBPP1)
1643 				return NonDSCBPP1;
1644 			else if (MaxLinkBPP >= NonDSCBPP0)
1645 				return 16.0;
1646 			else
1647 				return BPP_INVALID;
1648 		}
1649 	} else {
1650 		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1651 				DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1652 				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1653 			return BPP_INVALID;
1654 		else
1655 			return DesiredBPP;
1656 	}
1657 
1658 	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1659 
1660 	return BPP_INVALID;
1661 } // TruncToValidBPP
1662 
1663 double dml32_RequiredDTBCLK(
1664 		bool              DSCEnable,
1665 		double               PixelClock,
1666 		enum output_format_class  OutputFormat,
1667 		double               OutputBpp,
1668 		unsigned int              DSCSlices,
1669 		unsigned int                 HTotal,
1670 		unsigned int                 HActive,
1671 		unsigned int              AudioRate,
1672 		unsigned int              AudioLayout)
1673 {
1674 	double PixelWordRate;
1675 	double HCActive;
1676 	double HCBlank;
1677 	double AverageTribyteRate;
1678 	double HActiveTribyteRate;
1679 
1680 	if (DSCEnable != true)
1681 		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1682 
1683 	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1684 	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1685 			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1686 	HCBlank = 64 + 32 *
1687 			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1688 	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1689 	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1690 	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1691 }
1692 
1693 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1694 		enum odm_combine_mode ODMMode,
1695 		unsigned int DSCInputBitPerComponent,
1696 		double OutputBpp,
1697 		unsigned int HActive,
1698 		unsigned int HTotal,
1699 		unsigned int NumberOfDSCSlices,
1700 		enum output_format_class  OutputFormat,
1701 		enum output_encoder_class Output,
1702 		double PixelClock,
1703 		double PixelClockBackEnd)
1704 {
1705 	unsigned int DSCDelayRequirement_val;
1706 
1707 	if (DSCEnabled == true && OutputBpp != 0) {
1708 		if (ODMMode == dm_odm_combine_mode_4to1) {
1709 			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1710 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1711 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1712 		} else if (ODMMode == dm_odm_combine_mode_2to1) {
1713 			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1714 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1715 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1716 		} else {
1717 			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1718 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1719 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1720 		}
1721 
1722 		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1723 				dml_ceil(DSCDelayRequirement_val / HActive, 1);
1724 
1725 		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1726 
1727 	} else {
1728 		DSCDelayRequirement_val = 0;
1729 	}
1730 
1731 #ifdef __DML_VBA_DEBUG__
1732 	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1733 	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1734 	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1735 	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1736 	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1737 	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1738 	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1739 #endif
1740 
1741 	return DSCDelayRequirement_val;
1742 }
1743 
1744 void dml32_CalculateSurfaceSizeInMall(
1745 		unsigned int NumberOfActiveSurfaces,
1746 		unsigned int MALLAllocatedForDCN,
1747 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1748 		bool DCCEnable[],
1749 		bool ViewportStationary[],
1750 		unsigned int ViewportXStartY[],
1751 		unsigned int ViewportYStartY[],
1752 		unsigned int ViewportXStartC[],
1753 		unsigned int ViewportYStartC[],
1754 		unsigned int ViewportWidthY[],
1755 		unsigned int ViewportHeightY[],
1756 		unsigned int BytesPerPixelY[],
1757 		unsigned int ViewportWidthC[],
1758 		unsigned int ViewportHeightC[],
1759 		unsigned int BytesPerPixelC[],
1760 		unsigned int SurfaceWidthY[],
1761 		unsigned int SurfaceWidthC[],
1762 		unsigned int SurfaceHeightY[],
1763 		unsigned int SurfaceHeightC[],
1764 		unsigned int Read256BytesBlockWidthY[],
1765 		unsigned int Read256BytesBlockWidthC[],
1766 		unsigned int Read256BytesBlockHeightY[],
1767 		unsigned int Read256BytesBlockHeightC[],
1768 		unsigned int ReadBlockWidthY[],
1769 		unsigned int ReadBlockWidthC[],
1770 		unsigned int ReadBlockHeightY[],
1771 		unsigned int ReadBlockHeightC[],
1772 
1773 		/* Output */
1774 		unsigned int    SurfaceSizeInMALL[],
1775 		bool *ExceededMALLSize)
1776 {
1777 	unsigned int TotalSurfaceSizeInMALL  = 0;
1778 	unsigned int k;
1779 
1780 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1781 		if (ViewportStationary[k]) {
1782 			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1783 					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1784 						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1785 						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1786 						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1787 						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1788 						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1789 
1790 			if (ReadBlockWidthC[k] > 0) {
1791 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1792 						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1793 							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1794 							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1795 							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1796 							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1797 							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1798 							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1799 							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1800 							BytesPerPixelC[k];
1801 			}
1802 			if (DCCEnable[k] == true) {
1803 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1804 						dml_min(dml_ceil(SurfaceWidthY[k], 8 * Read256BytesBlockWidthY[k]),
1805 							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1806 							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1807 							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1808 							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1809 							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1810 							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1811 							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8
1812 							* Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256;
1813 				if (Read256BytesBlockWidthC[k] > 0) {
1814 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1815 							dml_min(dml_ceil(SurfaceWidthC[k], 8 *
1816 								Read256BytesBlockWidthC[k]),
1817 								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1818 								* Read256BytesBlockWidthC[k] - 1, 8 *
1819 								Read256BytesBlockWidthC[k]) -
1820 								dml_floor(ViewportXStartC[k], 8 *
1821 								Read256BytesBlockWidthC[k])) *
1822 								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1823 								Read256BytesBlockHeightC[k]),
1824 								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1825 								8 * Read256BytesBlockHeightC[k] - 1, 8 *
1826 								Read256BytesBlockHeightC[k]) -
1827 								dml_floor(ViewportYStartC[k], 8 *
1828 								Read256BytesBlockHeightC[k])) *
1829 								BytesPerPixelC[k] / 256;
1830 				}
1831 			}
1832 		} else {
1833 			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1834 					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1835 					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1836 							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1837 							BytesPerPixelY[k];
1838 			if (ReadBlockWidthC[k] > 0) {
1839 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1840 						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1841 								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1842 						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1843 								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1844 								BytesPerPixelC[k];
1845 			}
1846 			if (DCCEnable[k] == true) {
1847 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1848 						dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] + 8 *
1849 								Read256BytesBlockWidthY[k] - 1), 8 *
1850 								Read256BytesBlockWidthY[k]) *
1851 						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1852 								Read256BytesBlockHeightY[k] - 1), 8 *
1853 								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256;
1854 
1855 				if (Read256BytesBlockWidthC[k] > 0) {
1856 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1857 							dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] + 8 *
1858 									Read256BytesBlockWidthC[k] - 1), 8 *
1859 									Read256BytesBlockWidthC[k]) *
1860 							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1861 									Read256BytesBlockHeightC[k] - 1), 8 *
1862 									Read256BytesBlockHeightC[k]) *
1863 									BytesPerPixelC[k] / 256;
1864 				}
1865 			}
1866 		}
1867 	}
1868 
1869 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1870 		if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1871 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1872 	}
1873 	*ExceededMALLSize =  (TotalSurfaceSizeInMALL <= MALLAllocatedForDCN * 1024 * 1024 ? false : true);
1874 } // CalculateSurfaceSizeInMall
1875 
1876 void dml32_CalculateVMRowAndSwath(
1877 		unsigned int NumberOfActiveSurfaces,
1878 		DmlPipe myPipe[],
1879 		unsigned int SurfaceSizeInMALL[],
1880 		unsigned int PTEBufferSizeInRequestsLuma,
1881 		unsigned int PTEBufferSizeInRequestsChroma,
1882 		unsigned int DCCMetaBufferSizeBytes,
1883 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1884 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1885 		unsigned int MALLAllocatedForDCN,
1886 		double SwathWidthY[],
1887 		double SwathWidthC[],
1888 		bool GPUVMEnable,
1889 		bool HostVMEnable,
1890 		unsigned int HostVMMaxNonCachedPageTableLevels,
1891 		unsigned int GPUVMMaxPageTableLevels,
1892 		unsigned int GPUVMMinPageSizeKBytes[],
1893 		unsigned int HostVMMinPageSize,
1894 
1895 		/* Output */
1896 		bool PTEBufferSizeNotExceeded[],
1897 		bool DCCMetaBufferSizeNotExceeded[],
1898 		unsigned int dpte_row_width_luma_ub[],
1899 		unsigned int dpte_row_width_chroma_ub[],
1900 		unsigned int dpte_row_height_luma[],
1901 		unsigned int dpte_row_height_chroma[],
1902 		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1903 		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1904 		unsigned int meta_req_width[],
1905 		unsigned int meta_req_width_chroma[],
1906 		unsigned int meta_req_height[],
1907 		unsigned int meta_req_height_chroma[],
1908 		unsigned int meta_row_width[],
1909 		unsigned int meta_row_width_chroma[],
1910 		unsigned int meta_row_height[],
1911 		unsigned int meta_row_height_chroma[],
1912 		unsigned int vm_group_bytes[],
1913 		unsigned int dpte_group_bytes[],
1914 		unsigned int PixelPTEReqWidthY[],
1915 		unsigned int PixelPTEReqHeightY[],
1916 		unsigned int PTERequestSizeY[],
1917 		unsigned int PixelPTEReqWidthC[],
1918 		unsigned int PixelPTEReqHeightC[],
1919 		unsigned int PTERequestSizeC[],
1920 		unsigned int dpde0_bytes_per_frame_ub_l[],
1921 		unsigned int meta_pte_bytes_per_frame_ub_l[],
1922 		unsigned int dpde0_bytes_per_frame_ub_c[],
1923 		unsigned int meta_pte_bytes_per_frame_ub_c[],
1924 		double PrefetchSourceLinesY[],
1925 		double PrefetchSourceLinesC[],
1926 		double VInitPreFillY[],
1927 		double VInitPreFillC[],
1928 		unsigned int MaxNumSwathY[],
1929 		unsigned int MaxNumSwathC[],
1930 		double meta_row_bw[],
1931 		double dpte_row_bw[],
1932 		double PixelPTEBytesPerRow[],
1933 		double PDEAndMetaPTEBytesFrame[],
1934 		double MetaRowByte[],
1935 		bool use_one_row_for_frame[],
1936 		bool use_one_row_for_frame_flip[],
1937 		bool UsesMALLForStaticScreen[],
1938 		bool PTE_BUFFER_MODE[],
1939 		unsigned int BIGK_FRAGMENT_SIZE[])
1940 {
1941 	unsigned int k;
1942 	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1943 	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1944 	unsigned int PDEAndMetaPTEBytesFrameY;
1945 	unsigned int PDEAndMetaPTEBytesFrameC;
1946 	unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1947 	unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1948 	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1949 	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1950 	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1951 	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1952 	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1953 	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1954 	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1955 	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1956 	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1957 
1958 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1959 		if (HostVMEnable == true) {
1960 			vm_group_bytes[k] = 512;
1961 			dpte_group_bytes[k] = 512;
1962 		} else if (GPUVMEnable == true) {
1963 			vm_group_bytes[k] = 2048;
1964 			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1965 				dpte_group_bytes[k] = 512;
1966 			else
1967 				dpte_group_bytes[k] = 2048;
1968 		} else {
1969 			vm_group_bytes[k] = 0;
1970 			dpte_group_bytes[k] = 0;
1971 		}
1972 
1973 		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
1974 				myPipe[k].SourcePixelFormat == dm_420_12 ||
1975 				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
1976 			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
1977 					!IsVertical(myPipe[k].SourceRotation)) {
1978 				PTEBufferSizeInRequestsForLuma[k] =
1979 						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
1980 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
1981 			} else {
1982 				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
1983 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
1984 			}
1985 
1986 			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
1987 					myPipe[k].ViewportStationary,
1988 					myPipe[k].DCCEnable,
1989 					myPipe[k].DPPPerSurface,
1990 					myPipe[k].BlockHeight256BytesC,
1991 					myPipe[k].BlockWidth256BytesC,
1992 					myPipe[k].SourcePixelFormat,
1993 					myPipe[k].SurfaceTiling,
1994 					myPipe[k].BytePerPixelC,
1995 					myPipe[k].SourceRotation,
1996 					SwathWidthC[k],
1997 					myPipe[k].ViewportHeightChroma,
1998 					myPipe[k].ViewportXStartC,
1999 					myPipe[k].ViewportYStartC,
2000 					GPUVMEnable,
2001 					HostVMEnable,
2002 					HostVMMaxNonCachedPageTableLevels,
2003 					GPUVMMaxPageTableLevels,
2004 					GPUVMMinPageSizeKBytes[k],
2005 					HostVMMinPageSize,
2006 					PTEBufferSizeInRequestsForChroma[k],
2007 					myPipe[k].PitchC,
2008 					myPipe[k].DCCMetaPitchC,
2009 					myPipe[k].BlockWidthC,
2010 					myPipe[k].BlockHeightC,
2011 
2012 					/* Output */
2013 					&MetaRowByteC[k],
2014 					&PixelPTEBytesPerRowC[k],
2015 					&dpte_row_width_chroma_ub[k],
2016 					&dpte_row_height_chroma[k],
2017 					&dpte_row_height_linear_chroma[k],
2018 					&PixelPTEBytesPerRowC_one_row_per_frame[k],
2019 					&dpte_row_width_chroma_ub_one_row_per_frame[k],
2020 					&dpte_row_height_chroma_one_row_per_frame[k],
2021 					&meta_req_width_chroma[k],
2022 					&meta_req_height_chroma[k],
2023 					&meta_row_width_chroma[k],
2024 					&meta_row_height_chroma[k],
2025 					&PixelPTEReqWidthC[k],
2026 					&PixelPTEReqHeightC[k],
2027 					&PTERequestSizeC[k],
2028 					&dpde0_bytes_per_frame_ub_c[k],
2029 					&meta_pte_bytes_per_frame_ub_c[k]);
2030 
2031 			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2032 					myPipe[k].VRatioChroma,
2033 					myPipe[k].VTapsChroma,
2034 					myPipe[k].InterlaceEnable,
2035 					myPipe[k].ProgressiveToInterlaceUnitInOPP,
2036 					myPipe[k].SwathHeightC,
2037 					myPipe[k].SourceRotation,
2038 					myPipe[k].ViewportStationary,
2039 					SwathWidthC[k],
2040 					myPipe[k].ViewportHeightChroma,
2041 					myPipe[k].ViewportXStartC,
2042 					myPipe[k].ViewportYStartC,
2043 
2044 					/* Output */
2045 					&VInitPreFillC[k],
2046 					&MaxNumSwathC[k]);
2047 		} else {
2048 			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2049 			PTEBufferSizeInRequestsForChroma[k] = 0;
2050 			PixelPTEBytesPerRowC[k] = 0;
2051 			PDEAndMetaPTEBytesFrameC = 0;
2052 			MetaRowByteC[k] = 0;
2053 			MaxNumSwathC[k] = 0;
2054 			PrefetchSourceLinesC[k] = 0;
2055 			dpte_row_height_chroma_one_row_per_frame[k] = 0;
2056 			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2057 			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2058 		}
2059 
2060 		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2061 				myPipe[k].ViewportStationary,
2062 				myPipe[k].DCCEnable,
2063 				myPipe[k].DPPPerSurface,
2064 				myPipe[k].BlockHeight256BytesY,
2065 				myPipe[k].BlockWidth256BytesY,
2066 				myPipe[k].SourcePixelFormat,
2067 				myPipe[k].SurfaceTiling,
2068 				myPipe[k].BytePerPixelY,
2069 				myPipe[k].SourceRotation,
2070 				SwathWidthY[k],
2071 				myPipe[k].ViewportHeight,
2072 				myPipe[k].ViewportXStart,
2073 				myPipe[k].ViewportYStart,
2074 				GPUVMEnable,
2075 				HostVMEnable,
2076 				HostVMMaxNonCachedPageTableLevels,
2077 				GPUVMMaxPageTableLevels,
2078 				GPUVMMinPageSizeKBytes[k],
2079 				HostVMMinPageSize,
2080 				PTEBufferSizeInRequestsForLuma[k],
2081 				myPipe[k].PitchY,
2082 				myPipe[k].DCCMetaPitchY,
2083 				myPipe[k].BlockWidthY,
2084 				myPipe[k].BlockHeightY,
2085 
2086 				/* Output */
2087 				&MetaRowByteY[k],
2088 				&PixelPTEBytesPerRowY[k],
2089 				&dpte_row_width_luma_ub[k],
2090 				&dpte_row_height_luma[k],
2091 				&dpte_row_height_linear_luma[k],
2092 				&PixelPTEBytesPerRowY_one_row_per_frame[k],
2093 				&dpte_row_width_luma_ub_one_row_per_frame[k],
2094 				&dpte_row_height_luma_one_row_per_frame[k],
2095 				&meta_req_width[k],
2096 				&meta_req_height[k],
2097 				&meta_row_width[k],
2098 				&meta_row_height[k],
2099 				&PixelPTEReqWidthY[k],
2100 				&PixelPTEReqHeightY[k],
2101 				&PTERequestSizeY[k],
2102 				&dpde0_bytes_per_frame_ub_l[k],
2103 				&meta_pte_bytes_per_frame_ub_l[k]);
2104 
2105 		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2106 				myPipe[k].VRatio,
2107 				myPipe[k].VTaps,
2108 				myPipe[k].InterlaceEnable,
2109 				myPipe[k].ProgressiveToInterlaceUnitInOPP,
2110 				myPipe[k].SwathHeightY,
2111 				myPipe[k].SourceRotation,
2112 				myPipe[k].ViewportStationary,
2113 				SwathWidthY[k],
2114 				myPipe[k].ViewportHeight,
2115 				myPipe[k].ViewportXStart,
2116 				myPipe[k].ViewportYStart,
2117 
2118 				/* Output */
2119 				&VInitPreFillY[k],
2120 				&MaxNumSwathY[k]);
2121 
2122 		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2123 		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2124 
2125 		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2126 				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2127 			PTEBufferSizeNotExceeded[k] = true;
2128 		} else {
2129 			PTEBufferSizeNotExceeded[k] = false;
2130 		}
2131 
2132 		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2133 			PTEBufferSizeInRequestsForLuma[k] &&
2134 			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2135 	}
2136 
2137 	dml32_CalculateMALLUseForStaticScreen(
2138 			NumberOfActiveSurfaces,
2139 			MALLAllocatedForDCN,
2140 			UseMALLForStaticScreen,   // mode
2141 			SurfaceSizeInMALL,
2142 			one_row_per_frame_fits_in_buffer,
2143 			/* Output */
2144 			UsesMALLForStaticScreen); // boolen
2145 
2146 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2147 		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2148 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2149 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2150 				(GPUVMMinPageSizeKBytes[k] > 64);
2151 		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2152 	}
2153 
2154 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2155 #ifdef __DML_VBA_DEBUG__
2156 		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2157 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2158 #endif
2159 		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2160 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2161 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2162 				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2163 
2164 		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2165 				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2166 
2167 		if (use_one_row_for_frame[k]) {
2168 			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2169 			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2170 			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2171 			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2172 			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2173 			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2174 			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2175 		}
2176 
2177 		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2178 			DCCMetaBufferSizeNotExceeded[k] = true;
2179 		else
2180 			DCCMetaBufferSizeNotExceeded[k] = false;
2181 
2182 		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2183 		if (use_one_row_for_frame[k])
2184 			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2185 
2186 		dml32_CalculateRowBandwidth(
2187 				GPUVMEnable,
2188 				myPipe[k].SourcePixelFormat,
2189 				myPipe[k].VRatio,
2190 				myPipe[k].VRatioChroma,
2191 				myPipe[k].DCCEnable,
2192 				myPipe[k].HTotal / myPipe[k].PixelClock,
2193 				MetaRowByteY[k], MetaRowByteC[k],
2194 				meta_row_height[k],
2195 				meta_row_height_chroma[k],
2196 				PixelPTEBytesPerRowY[k],
2197 				PixelPTEBytesPerRowC[k],
2198 				dpte_row_height_luma[k],
2199 				dpte_row_height_chroma[k],
2200 
2201 				/* Output */
2202 				&meta_row_bw[k],
2203 				&dpte_row_bw[k]);
2204 #ifdef __DML_VBA_DEBUG__
2205 		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2206 		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2207 				__func__, k, use_one_row_for_frame_flip[k]);
2208 		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2209 				__func__, k, UseMALLForPStateChange[k]);
2210 		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2211 		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2212 				__func__, k, dpte_row_width_luma_ub[k]);
2213 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2214 		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2215 				__func__, k, dpte_row_height_chroma[k]);
2216 		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2217 				__func__, k, dpte_row_width_chroma_ub[k]);
2218 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2219 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2220 		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2221 				__func__, k, PTEBufferSizeNotExceeded[k]);
2222 		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2223 		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2224 #endif
2225 	}
2226 } // CalculateVMRowAndSwath
2227 
2228 unsigned int dml32_CalculateVMAndRowBytes(
2229 		bool ViewportStationary,
2230 		bool DCCEnable,
2231 		unsigned int NumberOfDPPs,
2232 		unsigned int BlockHeight256Bytes,
2233 		unsigned int BlockWidth256Bytes,
2234 		enum source_format_class SourcePixelFormat,
2235 		unsigned int SurfaceTiling,
2236 		unsigned int BytePerPixel,
2237 		enum dm_rotation_angle SourceRotation,
2238 		double SwathWidth,
2239 		unsigned int ViewportHeight,
2240 		unsigned int    ViewportXStart,
2241 		unsigned int    ViewportYStart,
2242 		bool GPUVMEnable,
2243 		bool HostVMEnable,
2244 		unsigned int HostVMMaxNonCachedPageTableLevels,
2245 		unsigned int GPUVMMaxPageTableLevels,
2246 		unsigned int GPUVMMinPageSizeKBytes,
2247 		unsigned int HostVMMinPageSize,
2248 		unsigned int PTEBufferSizeInRequests,
2249 		unsigned int Pitch,
2250 		unsigned int DCCMetaPitch,
2251 		unsigned int MacroTileWidth,
2252 		unsigned int MacroTileHeight,
2253 
2254 		/* Output */
2255 		unsigned int *MetaRowByte,
2256 		unsigned int *PixelPTEBytesPerRow,
2257 		unsigned int    *dpte_row_width_ub,
2258 		unsigned int *dpte_row_height,
2259 		unsigned int *dpte_row_height_linear,
2260 		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2261 		unsigned int    *dpte_row_width_ub_one_row_per_frame,
2262 		unsigned int    *dpte_row_height_one_row_per_frame,
2263 		unsigned int *MetaRequestWidth,
2264 		unsigned int *MetaRequestHeight,
2265 		unsigned int *meta_row_width,
2266 		unsigned int *meta_row_height,
2267 		unsigned int *PixelPTEReqWidth,
2268 		unsigned int *PixelPTEReqHeight,
2269 		unsigned int *PTERequestSize,
2270 		unsigned int    *DPDE0BytesFrame,
2271 		unsigned int    *MetaPTEBytesFrame)
2272 {
2273 	unsigned int MPDEBytesFrame;
2274 	unsigned int DCCMetaSurfaceBytes;
2275 	unsigned int ExtraDPDEBytesFrame;
2276 	unsigned int PDEAndMetaPTEBytesFrame;
2277 	unsigned int HostVMDynamicLevels = 0;
2278 	unsigned int    MacroTileSizeBytes;
2279 	unsigned int    vp_height_meta_ub;
2280 	unsigned int    vp_height_dpte_ub;
2281 	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2282 
2283 	if (GPUVMEnable == true && HostVMEnable == true) {
2284 		if (HostVMMinPageSize < 2048)
2285 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2286 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2287 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2288 		else
2289 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2290 	}
2291 
2292 	*MetaRequestHeight = 8 * BlockHeight256Bytes;
2293 	*MetaRequestWidth = 8 * BlockWidth256Bytes;
2294 	if (SurfaceTiling == dm_sw_linear) {
2295 		*meta_row_height = 32;
2296 		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2297 				- dml_floor(ViewportXStart, *MetaRequestWidth);
2298 	} else if (!IsVertical(SourceRotation)) {
2299 		*meta_row_height = *MetaRequestHeight;
2300 		if (ViewportStationary && NumberOfDPPs == 1) {
2301 			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2302 					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2303 		} else {
2304 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2305 		}
2306 		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2307 	} else {
2308 		*meta_row_height = *MetaRequestWidth;
2309 		if (ViewportStationary && NumberOfDPPs == 1) {
2310 			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2311 					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2312 		} else {
2313 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2314 		}
2315 		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2316 	}
2317 
2318 	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2319 		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2320 				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2321 	} else if (!IsVertical(SourceRotation)) {
2322 		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2323 	} else {
2324 		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2325 	}
2326 
2327 	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2328 
2329 	if (GPUVMEnable == true) {
2330 		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2331 				(8 * 4.0 * 1024), 1) + 1) * 64;
2332 		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2333 	} else {
2334 		*MetaPTEBytesFrame = 0;
2335 		MPDEBytesFrame = 0;
2336 	}
2337 
2338 	if (DCCEnable != true) {
2339 		*MetaPTEBytesFrame = 0;
2340 		MPDEBytesFrame = 0;
2341 		*MetaRowByte = 0;
2342 	}
2343 
2344 	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2345 
2346 	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2347 		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2348 			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2349 					MacroTileHeight - 1, MacroTileHeight) -
2350 					dml_floor(ViewportYStart, MacroTileHeight);
2351 		} else if (!IsVertical(SourceRotation)) {
2352 			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2353 		} else {
2354 			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2355 		}
2356 		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2357 				(8 * 2097152), 1) + 1);
2358 		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2359 	} else {
2360 		*DPDE0BytesFrame = 0;
2361 		ExtraDPDEBytesFrame = 0;
2362 		vp_height_dpte_ub = 0;
2363 	}
2364 
2365 	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2366 
2367 #ifdef __DML_VBA_DEBUG__
2368 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2369 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2370 	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2371 	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2372 	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2373 	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2374 	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2375 	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2376 	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2377 	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2378 	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2379 	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2380 	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2381 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2382 	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2383 	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2384 	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2385 #endif
2386 
2387 	if (HostVMEnable == true)
2388 		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2389 
2390 	if (SurfaceTiling == dm_sw_linear) {
2391 		*PixelPTEReqHeight = 1;
2392 		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2393 		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2394 		*PTERequestSize = 64;
2395 	} else if (GPUVMMinPageSizeKBytes == 4) {
2396 		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2397 		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2398 		*PTERequestSize = 128;
2399 	} else {
2400 		*PixelPTEReqHeight = MacroTileHeight;
2401 		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2402 		*PTERequestSize = 64;
2403 	}
2404 #ifdef __DML_VBA_DEBUG__
2405 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2406 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2407 	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2408 	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2409 	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2410 	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2411 	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2412 #endif
2413 
2414 	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2415 	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2416 			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2417 					(double) *PixelPTEReqWidth;
2418 	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2419 			*PTERequestSize;
2420 
2421 	if (SurfaceTiling == dm_sw_linear) {
2422 		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2423 				*PixelPTEReqWidth / Pitch), 1));
2424 #ifdef __DML_VBA_DEBUG__
2425 		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2426 				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2427 		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2428 				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2429 		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2430 				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2431 		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2432 				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2433 						*PixelPTEReqWidth / Pitch), 1));
2434 		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2435 #endif
2436 		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2437 				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2438 		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2439 
2440 		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2441 		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2442 				PixelPTEReqWidth_linear / Pitch), 1);
2443 		if (*dpte_row_height_linear > 128)
2444 			*dpte_row_height_linear = 128;
2445 
2446 	} else if (!IsVertical(SourceRotation)) {
2447 		*dpte_row_height = *PixelPTEReqHeight;
2448 
2449 		if (GPUVMMinPageSizeKBytes > 64) {
2450 			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2451 					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2452 		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
2453 			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2454 					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2455 					dml_floor(ViewportXStart, *PixelPTEReqWidth);
2456 		} else {
2457 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2458 					*PixelPTEReqWidth;
2459 		}
2460 
2461 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2462 	} else {
2463 		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2464 
2465 		if (ViewportStationary && (NumberOfDPPs == 1)) {
2466 			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2467 					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2468 		} else {
2469 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2470 					* *PixelPTEReqHeight;
2471 		}
2472 
2473 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2474 	}
2475 
2476 	if (GPUVMEnable != true)
2477 		*PixelPTEBytesPerRow = 0;
2478 	if (HostVMEnable == true)
2479 		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2480 
2481 #ifdef __DML_VBA_DEBUG__
2482 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2483 	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2484 	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2485 	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2486 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2487 	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2488 	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2489 	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2490 			__func__, *dpte_row_width_ub_one_row_per_frame);
2491 	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2492 			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
2493 	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2494 			*MetaPTEBytesFrame);
2495 #endif
2496 
2497 	return PDEAndMetaPTEBytesFrame;
2498 } // CalculateVMAndRowBytes
2499 
2500 double dml32_CalculatePrefetchSourceLines(
2501 		double VRatio,
2502 		unsigned int VTaps,
2503 		bool Interlace,
2504 		bool ProgressiveToInterlaceUnitInOPP,
2505 		unsigned int SwathHeight,
2506 		enum dm_rotation_angle SourceRotation,
2507 		bool ViewportStationary,
2508 		double SwathWidth,
2509 		unsigned int ViewportHeight,
2510 		unsigned int ViewportXStart,
2511 		unsigned int ViewportYStart,
2512 
2513 		/* Output */
2514 		double *VInitPreFill,
2515 		unsigned int *MaxNumSwath)
2516 {
2517 
2518 	unsigned int vp_start_rot;
2519 	unsigned int sw0_tmp;
2520 	unsigned int MaxPartialSwath;
2521 	double numLines;
2522 
2523 #ifdef __DML_VBA_DEBUG__
2524 	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2525 	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2526 	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2527 	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2528 	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2529 	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2530 #endif
2531 	if (ProgressiveToInterlaceUnitInOPP)
2532 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2533 	else
2534 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2535 
2536 	if (ViewportStationary) {
2537 		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2538 			vp_start_rot = SwathHeight -
2539 					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2540 		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2541 			vp_start_rot = ViewportXStart;
2542 		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2543 			vp_start_rot = SwathHeight -
2544 					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2545 		} else {
2546 			vp_start_rot = ViewportYStart;
2547 		}
2548 		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2549 		if (sw0_tmp < *VInitPreFill)
2550 			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2551 		else
2552 			*MaxNumSwath = 1;
2553 		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2554 	} else {
2555 		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2556 		if (*VInitPreFill > 1)
2557 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2558 		else
2559 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2560 	}
2561 	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2562 
2563 #ifdef __DML_VBA_DEBUG__
2564 	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2565 	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2566 	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2567 	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2568 	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2569 #endif
2570 	return numLines;
2571 
2572 } // CalculatePrefetchSourceLines
2573 
2574 void dml32_CalculateMALLUseForStaticScreen(
2575 		unsigned int NumberOfActiveSurfaces,
2576 		unsigned int MALLAllocatedForDCNFinal,
2577 		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2578 		unsigned int SurfaceSizeInMALL[],
2579 		bool one_row_per_frame_fits_in_buffer[],
2580 
2581 		/* output */
2582 		bool UsesMALLForStaticScreen[])
2583 {
2584 	unsigned int k;
2585 	unsigned int SurfaceToAddToMALL;
2586 	bool CanAddAnotherSurfaceToMALL;
2587 	unsigned int TotalSurfaceSizeInMALL;
2588 
2589 	TotalSurfaceSizeInMALL = 0;
2590 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2591 		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2592 		if (UsesMALLForStaticScreen[k])
2593 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2594 #ifdef __DML_VBA_DEBUG__
2595 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2596 		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2597 #endif
2598 	}
2599 
2600 	SurfaceToAddToMALL = 0;
2601 	CanAddAnotherSurfaceToMALL = true;
2602 	while (CanAddAnotherSurfaceToMALL) {
2603 		CanAddAnotherSurfaceToMALL = false;
2604 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2605 			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2606 					!UsesMALLForStaticScreen[k] &&
2607 					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2608 					one_row_per_frame_fits_in_buffer[k] &&
2609 					(!CanAddAnotherSurfaceToMALL ||
2610 					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2611 				CanAddAnotherSurfaceToMALL = true;
2612 				SurfaceToAddToMALL = k;
2613 #ifdef __DML_VBA_DEBUG__
2614 				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2615 						__func__, k, UseMALLForStaticScreen[k]);
2616 #endif
2617 			}
2618 		}
2619 		if (CanAddAnotherSurfaceToMALL) {
2620 			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2621 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2622 
2623 #ifdef __DML_VBA_DEBUG__
2624 			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2625 			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2626 #endif
2627 
2628 		}
2629 	}
2630 }
2631 
2632 void dml32_CalculateRowBandwidth(
2633 		bool GPUVMEnable,
2634 		enum source_format_class SourcePixelFormat,
2635 		double VRatio,
2636 		double VRatioChroma,
2637 		bool DCCEnable,
2638 		double LineTime,
2639 		unsigned int MetaRowByteLuma,
2640 		unsigned int MetaRowByteChroma,
2641 		unsigned int meta_row_height_luma,
2642 		unsigned int meta_row_height_chroma,
2643 		unsigned int PixelPTEBytesPerRowLuma,
2644 		unsigned int PixelPTEBytesPerRowChroma,
2645 		unsigned int dpte_row_height_luma,
2646 		unsigned int dpte_row_height_chroma,
2647 		/* Output */
2648 		double *meta_row_bw,
2649 		double *dpte_row_bw)
2650 {
2651 	if (DCCEnable != true) {
2652 		*meta_row_bw = 0;
2653 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2654 			SourcePixelFormat == dm_rgbe_alpha) {
2655 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2656 				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2657 	} else {
2658 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2659 	}
2660 
2661 	if (GPUVMEnable != true) {
2662 		*dpte_row_bw = 0;
2663 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2664 			SourcePixelFormat == dm_rgbe_alpha) {
2665 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2666 				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2667 	} else {
2668 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2669 	}
2670 }
2671 
2672 double dml32_CalculateUrgentLatency(
2673 		double UrgentLatencyPixelDataOnly,
2674 		double UrgentLatencyPixelMixedWithVMData,
2675 		double UrgentLatencyVMDataOnly,
2676 		bool   DoUrgentLatencyAdjustment,
2677 		double UrgentLatencyAdjustmentFabricClockComponent,
2678 		double UrgentLatencyAdjustmentFabricClockReference,
2679 		double FabricClock)
2680 {
2681 	double   ret;
2682 
2683 	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2684 	if (DoUrgentLatencyAdjustment == true) {
2685 		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2686 				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2687 	}
2688 	return ret;
2689 }
2690 
2691 void dml32_CalculateUrgentBurstFactor(
2692 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2693 		unsigned int    swath_width_luma_ub,
2694 		unsigned int    swath_width_chroma_ub,
2695 		unsigned int SwathHeightY,
2696 		unsigned int SwathHeightC,
2697 		double  LineTime,
2698 		double  UrgentLatency,
2699 		double  CursorBufferSize,
2700 		unsigned int CursorWidth,
2701 		unsigned int CursorBPP,
2702 		double  VRatio,
2703 		double  VRatioC,
2704 		double  BytePerPixelInDETY,
2705 		double  BytePerPixelInDETC,
2706 		unsigned int    DETBufferSizeY,
2707 		unsigned int    DETBufferSizeC,
2708 		/* Output */
2709 		double *UrgentBurstFactorCursor,
2710 		double *UrgentBurstFactorLuma,
2711 		double *UrgentBurstFactorChroma,
2712 		bool   *NotEnoughUrgentLatencyHiding)
2713 {
2714 	double       LinesInDETLuma;
2715 	double       LinesInDETChroma;
2716 	unsigned int LinesInCursorBuffer;
2717 	double       CursorBufferSizeInTime;
2718 	double       DETBufferSizeInTimeLuma;
2719 	double       DETBufferSizeInTimeChroma;
2720 
2721 	*NotEnoughUrgentLatencyHiding = 0;
2722 
2723 	if (CursorWidth > 0) {
2724 		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2725 				(CursorWidth * CursorBPP / 8.0)), 1.0);
2726 		if (VRatio > 0) {
2727 			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2728 			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2729 				*NotEnoughUrgentLatencyHiding = 1;
2730 				*UrgentBurstFactorCursor = 0;
2731 			} else {
2732 				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
2733 						(CursorBufferSizeInTime - UrgentLatency);
2734 			}
2735 		} else {
2736 			*UrgentBurstFactorCursor = 1;
2737 		}
2738 	}
2739 
2740 	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2741 			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2742 
2743 	if (VRatio > 0) {
2744 		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2745 		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2746 			*NotEnoughUrgentLatencyHiding = 1;
2747 			*UrgentBurstFactorLuma = 0;
2748 		} else {
2749 			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2750 		}
2751 	} else {
2752 		*UrgentBurstFactorLuma = 1;
2753 	}
2754 
2755 	if (BytePerPixelInDETC > 0) {
2756 		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2757 					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2758 					/ swath_width_chroma_ub;
2759 
2760 		if (VRatio > 0) {
2761 			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2762 			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2763 				*NotEnoughUrgentLatencyHiding = 1;
2764 				*UrgentBurstFactorChroma = 0;
2765 			} else {
2766 				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2767 						/ (DETBufferSizeInTimeChroma - UrgentLatency);
2768 			}
2769 		} else {
2770 			*UrgentBurstFactorChroma = 1;
2771 		}
2772 	}
2773 } // CalculateUrgentBurstFactor
2774 
2775 void dml32_CalculateDCFCLKDeepSleep(
2776 		unsigned int NumberOfActiveSurfaces,
2777 		unsigned int BytePerPixelY[],
2778 		unsigned int BytePerPixelC[],
2779 		double VRatio[],
2780 		double VRatioChroma[],
2781 		double SwathWidthY[],
2782 		double SwathWidthC[],
2783 		unsigned int DPPPerSurface[],
2784 		double HRatio[],
2785 		double HRatioChroma[],
2786 		double PixelClock[],
2787 		double PSCL_THROUGHPUT[],
2788 		double PSCL_THROUGHPUT_CHROMA[],
2789 		double Dppclk[],
2790 		double ReadBandwidthLuma[],
2791 		double ReadBandwidthChroma[],
2792 		unsigned int ReturnBusWidth,
2793 
2794 		/* Output */
2795 		double *DCFClkDeepSleep)
2796 {
2797 	unsigned int k;
2798 	double   DisplayPipeLineDeliveryTimeLuma;
2799 	double   DisplayPipeLineDeliveryTimeChroma;
2800 	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2801 	double ReadBandwidth = 0.0;
2802 
2803 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2804 
2805 		if (VRatio[k] <= 1) {
2806 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2807 					/ PixelClock[k];
2808 		} else {
2809 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2810 		}
2811 		if (BytePerPixelC[k] == 0) {
2812 			DisplayPipeLineDeliveryTimeChroma = 0;
2813 		} else {
2814 			if (VRatioChroma[k] <= 1) {
2815 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2816 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2817 			} else {
2818 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2819 						/ Dppclk[k];
2820 			}
2821 		}
2822 
2823 		if (BytePerPixelC[k] > 0) {
2824 			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2825 					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2826 					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2827 					32.0 / DisplayPipeLineDeliveryTimeChroma);
2828 		} else {
2829 			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2830 					64.0 / DisplayPipeLineDeliveryTimeLuma;
2831 		}
2832 		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2833 
2834 #ifdef __DML_VBA_DEBUG__
2835 		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2836 		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2837 #endif
2838 	}
2839 
2840 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2841 		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2842 
2843 	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2844 
2845 #ifdef __DML_VBA_DEBUG__
2846 	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2847 	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2848 	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2849 	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2850 #endif
2851 
2852 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2853 		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2854 #ifdef __DML_VBA_DEBUG__
2855 	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2856 #endif
2857 } // CalculateDCFCLKDeepSleep
2858 
2859 double dml32_CalculateWriteBackDelay(
2860 		enum source_format_class WritebackPixelFormat,
2861 		double WritebackHRatio,
2862 		double WritebackVRatio,
2863 		unsigned int WritebackVTaps,
2864 		unsigned int         WritebackDestinationWidth,
2865 		unsigned int         WritebackDestinationHeight,
2866 		unsigned int         WritebackSourceHeight,
2867 		unsigned int HTotal)
2868 {
2869 	double CalculateWriteBackDelay;
2870 	double Line_length;
2871 	double Output_lines_last_notclamped;
2872 	double WritebackVInit;
2873 
2874 	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2875 	Line_length = dml_max((double) WritebackDestinationWidth,
2876 			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2877 	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2878 			dml_ceil(((double)WritebackSourceHeight -
2879 					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2880 	if (Output_lines_last_notclamped < 0) {
2881 		CalculateWriteBackDelay = 0;
2882 	} else {
2883 		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2884 				(HTotal - WritebackDestinationWidth) + 80;
2885 	}
2886 	return CalculateWriteBackDelay;
2887 }
2888 
2889 void dml32_UseMinimumDCFCLK(
2890 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2891 		bool DRRDisplay[],
2892 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2893 		unsigned int MaxInterDCNTileRepeaters,
2894 		unsigned int MaxPrefetchMode,
2895 		double DRAMClockChangeLatencyFinal,
2896 		double FCLKChangeLatency,
2897 		double SREnterPlusExitTime,
2898 		unsigned int ReturnBusWidth,
2899 		unsigned int RoundTripPingLatencyCycles,
2900 		unsigned int ReorderingBytes,
2901 		unsigned int PixelChunkSizeInKByte,
2902 		unsigned int MetaChunkSize,
2903 		bool GPUVMEnable,
2904 		unsigned int GPUVMMaxPageTableLevels,
2905 		bool HostVMEnable,
2906 		unsigned int NumberOfActiveSurfaces,
2907 		double HostVMMinPageSize,
2908 		unsigned int HostVMMaxNonCachedPageTableLevels,
2909 		bool DynamicMetadataVMEnabled,
2910 		bool ImmediateFlipRequirement,
2911 		bool ProgressiveToInterlaceUnitInOPP,
2912 		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2913 		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2914 		unsigned int VTotal[],
2915 		unsigned int VActive[],
2916 		unsigned int DynamicMetadataTransmittedBytes[],
2917 		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2918 		bool Interlace[],
2919 		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2920 		double RequiredDISPCLK[][2],
2921 		double UrgLatency[],
2922 		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2923 		double ProjectedDCFClkDeepSleep[][2],
2924 		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2925 		unsigned int TotalNumberOfActiveDPP[][2],
2926 		unsigned int TotalNumberOfDCCActiveDPP[][2],
2927 		unsigned int dpte_group_bytes[],
2928 		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2929 		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2930 		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2931 		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2932 		unsigned int BytePerPixelY[],
2933 		unsigned int BytePerPixelC[],
2934 		unsigned int HTotal[],
2935 		double PixelClock[],
2936 		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2937 		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2938 		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2939 		bool DynamicMetadataEnable[],
2940 		double ReadBandwidthLuma[],
2941 		double ReadBandwidthChroma[],
2942 		double DCFCLKPerState[],
2943 		/* Output */
2944 		double DCFCLKState[][2])
2945 {
2946 	unsigned int i, j, k;
2947 	unsigned int     dummy1;
2948 	double dummy2, dummy3;
2949 	double   NormalEfficiency;
2950 	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2951 
2952 	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2953 	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2954 		for  (j = 0; j <= 1; ++j) {
2955 			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2956 			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2957 			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2958 			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2959 			double MinimumTWait = 0.0;
2960 			double DPTEBandwidth;
2961 			double DCFCLKRequiredForAverageBandwidth;
2962 			unsigned int ExtraLatencyBytes;
2963 			double ExtraLatencyCycles;
2964 			double DCFCLKRequiredForPeakBandwidth;
2965 			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2966 			double MinimumTvmPlus2Tr0;
2967 
2968 			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2969 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2970 				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
2971 						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
2972 								/ (15.75 * HTotal[k] / PixelClock[k]);
2973 			}
2974 
2975 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
2976 				NoOfDPPState[k] = NoOfDPP[i][j][k];
2977 
2978 			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
2979 			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
2980 
2981 			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
2982 					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
2983 					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
2984 					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
2985 					HostVMMaxNonCachedPageTableLevels);
2986 			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
2987 					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
2988 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2989 				double DCFCLKCyclesRequiredInPrefetch;
2990 				double PrefetchTime;
2991 
2992 				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
2993 						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
2994 						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
2995 								* BytePerPixelC[k]) / NormalEfficiency
2996 						/ ReturnBusWidth;
2997 				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
2998 						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
2999 								/ NormalEfficiency / ReturnBusWidth
3000 								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3001 						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3002 								/ ReturnBusWidth
3003 						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3004 						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
3005 				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3006 						* HTotal[k] / PixelClock[k];
3007 				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3008 						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3009 						UrgLatency[i] * GPUVMMaxPageTableLevels *
3010 						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3011 
3012 				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3013 						UseMALLForPStateChange[k],
3014 						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3015 						DRRDisplay[k],
3016 						DRAMClockChangeLatencyFinal,
3017 						FCLKChangeLatency,
3018 						UrgLatency[i],
3019 						SREnterPlusExitTime);
3020 
3021 				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3022 						MinimumTWait - UrgLatency[i] *
3023 						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3024 						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3025 						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3026 						DynamicMetadataVMExtraLatency[k];
3027 
3028 				if (PrefetchTime > 0) {
3029 					double ExpectedVRatioPrefetch;
3030 
3031 					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3032 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3033 							DCFCLKCyclesRequiredInPrefetch);
3034 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3035 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3036 							PrefetchPixelLinesTime[k] *
3037 							dml_max(1.0, ExpectedVRatioPrefetch) *
3038 							dml_max(1.0, ExpectedVRatioPrefetch / 4);
3039 					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3040 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3041 								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3042 								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3043 								NormalEfficiency / ReturnBusWidth;
3044 					}
3045 				} else {
3046 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3047 				}
3048 				if (DynamicMetadataEnable[k] == true) {
3049 					double TSetupPipe;
3050 					double TdmbfPipe;
3051 					double TdmsksPipe;
3052 					double TdmecPipe;
3053 					double AllowedTimeForUrgentExtraLatency;
3054 
3055 					dml32_CalculateVUpdateAndDynamicMetadataParameters(
3056 							MaxInterDCNTileRepeaters,
3057 							RequiredDPPCLKPerSurface[i][j][k],
3058 							RequiredDISPCLK[i][j],
3059 							ProjectedDCFClkDeepSleep[i][j],
3060 							PixelClock[k],
3061 							HTotal[k],
3062 							VTotal[k] - VActive[k],
3063 							DynamicMetadataTransmittedBytes[k],
3064 							DynamicMetadataLinesBeforeActiveRequired[k],
3065 							Interlace[k],
3066 							ProgressiveToInterlaceUnitInOPP,
3067 
3068 							/* output */
3069 							&TSetupPipe,
3070 							&TdmbfPipe,
3071 							&TdmecPipe,
3072 							&TdmsksPipe,
3073 							&dummy1,
3074 							&dummy2,
3075 							&dummy3);
3076 					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3077 							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3078 							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3079 					if (AllowedTimeForUrgentExtraLatency > 0)
3080 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3081 								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3082 								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3083 					else
3084 						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3085 				}
3086 			}
3087 			DCFCLKRequiredForPeakBandwidth = 0;
3088 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3089 				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3090 						DCFCLKRequiredForPeakBandwidthPerSurface[k];
3091 			}
3092 			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3093 					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3094 					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3095 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3096 				double MaximumTvmPlus2Tr0PlusTsw;
3097 
3098 				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3099 						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3100 				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3101 					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3102 				} else {
3103 					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3104 							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3105 								MinimumTvmPlus2Tr0 -
3106 								PrefetchPixelLinesTime[k] / 4),
3107 							(2 * ExtraLatencyCycles +
3108 								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3109 								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3110 				}
3111 			}
3112 			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3113 					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3114 		}
3115 	}
3116 }
3117 
3118 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3119 		unsigned int TotalNumberOfActiveDPP,
3120 		unsigned int PixelChunkSizeInKByte,
3121 		unsigned int TotalNumberOfDCCActiveDPP,
3122 		unsigned int MetaChunkSize,
3123 		bool GPUVMEnable,
3124 		bool HostVMEnable,
3125 		unsigned int NumberOfActiveSurfaces,
3126 		unsigned int NumberOfDPP[],
3127 		unsigned int dpte_group_bytes[],
3128 		double HostVMInefficiencyFactor,
3129 		double HostVMMinPageSize,
3130 		unsigned int HostVMMaxNonCachedPageTableLevels)
3131 {
3132 	unsigned int k;
3133 	double   ret;
3134 	unsigned int  HostVMDynamicLevels;
3135 
3136 	if (GPUVMEnable == true && HostVMEnable == true) {
3137 		if (HostVMMinPageSize < 2048)
3138 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3139 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3140 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3141 		else
3142 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3143 	} else {
3144 		HostVMDynamicLevels = 0;
3145 	}
3146 
3147 	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3148 			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3149 
3150 	if (GPUVMEnable == true) {
3151 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3152 			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3153 					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3154 		}
3155 	}
3156 	return ret;
3157 }
3158 
3159 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3160 		unsigned int MaxInterDCNTileRepeaters,
3161 		double Dppclk,
3162 		double Dispclk,
3163 		double DCFClkDeepSleep,
3164 		double PixelClock,
3165 		unsigned int HTotal,
3166 		unsigned int VBlank,
3167 		unsigned int DynamicMetadataTransmittedBytes,
3168 		unsigned int DynamicMetadataLinesBeforeActiveRequired,
3169 		unsigned int InterlaceEnable,
3170 		bool ProgressiveToInterlaceUnitInOPP,
3171 
3172 		/* output */
3173 		double *TSetup,
3174 		double *Tdmbf,
3175 		double *Tdmec,
3176 		double *Tdmsks,
3177 		unsigned int *VUpdateOffsetPix,
3178 		double *VUpdateWidthPix,
3179 		double *VReadyOffsetPix)
3180 {
3181 	double TotalRepeaterDelayTime;
3182 
3183 	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3184 	*VUpdateWidthPix  =
3185 			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3186 	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3187 			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3188 	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3189 	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3190 	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3191 	*Tdmec = HTotal / PixelClock;
3192 
3193 	if (DynamicMetadataLinesBeforeActiveRequired == 0)
3194 		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3195 	else
3196 		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3197 
3198 	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3199 		*Tdmsks = *Tdmsks / 2;
3200 #ifdef __DML_VBA_DEBUG__
3201 	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3202 	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3203 	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3204 
3205 	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3206 			__func__, DynamicMetadataLinesBeforeActiveRequired);
3207 	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3208 	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3209 	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3210 	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3211 #endif
3212 }
3213 
3214 double dml32_CalculateTWait(
3215 		unsigned int PrefetchMode,
3216 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3217 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3218 		bool DRRDisplay,
3219 		double DRAMClockChangeLatency,
3220 		double FCLKChangeLatency,
3221 		double UrgentLatency,
3222 		double SREnterPlusExitTime)
3223 {
3224 	double TWait = 0.0;
3225 
3226 	if (PrefetchMode == 0 &&
3227 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3228 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3229 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3230 			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3231 		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3232 	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3233 		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3234 	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3235 		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3236 	} else {
3237 		TWait = UrgentLatency;
3238 	}
3239 
3240 #ifdef __DML_VBA_DEBUG__
3241 	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3242 	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3243 #endif
3244 	return TWait;
3245 } // CalculateTWait
3246 
3247 // Function: get_return_bw_mbps
3248 // Megabyte per second
3249 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3250 		const int VoltageLevel,
3251 		const bool HostVMEnable,
3252 		const double DCFCLK,
3253 		const double FabricClock,
3254 		const double DRAMSpeed)
3255 {
3256 	double ReturnBW = 0.;
3257 	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3258 	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3259 	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3260 	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3261 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3262 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3263 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3264 	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3265 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3266 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3267 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3268 
3269 	if (HostVMEnable != true)
3270 		ReturnBW = PixelDataOnlyReturnBW;
3271 	else
3272 		ReturnBW = PixelMixedWithVMDataReturnBW;
3273 
3274 #ifdef __DML_VBA_DEBUG__
3275 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3276 	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3277 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3278 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3279 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3280 	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3281 	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3282 	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3283 	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3284 	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3285 	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3286 #endif
3287 	return ReturnBW;
3288 }
3289 
3290 // Function: get_return_bw_mbps_vm_only
3291 // Megabyte per second
3292 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3293 		const int VoltageLevel,
3294 		const double DCFCLK,
3295 		const double FabricClock,
3296 		const double DRAMSpeed)
3297 {
3298 	double VMDataOnlyReturnBW = dml_min3(
3299 			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3300 			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3301 					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3302 			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3303 					* (VoltageLevel < 2 ?
3304 							soc->pct_ideal_dram_bw_after_urgent_strobe :
3305 							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3306 #ifdef __DML_VBA_DEBUG__
3307 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3308 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3309 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3310 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3311 	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3312 #endif
3313 	return VMDataOnlyReturnBW;
3314 }
3315 
3316 double dml32_CalculateExtraLatency(
3317 		unsigned int RoundTripPingLatencyCycles,
3318 		unsigned int ReorderingBytes,
3319 		double DCFCLK,
3320 		unsigned int TotalNumberOfActiveDPP,
3321 		unsigned int PixelChunkSizeInKByte,
3322 		unsigned int TotalNumberOfDCCActiveDPP,
3323 		unsigned int MetaChunkSize,
3324 		double ReturnBW,
3325 		bool GPUVMEnable,
3326 		bool HostVMEnable,
3327 		unsigned int NumberOfActiveSurfaces,
3328 		unsigned int NumberOfDPP[],
3329 		unsigned int dpte_group_bytes[],
3330 		double HostVMInefficiencyFactor,
3331 		double HostVMMinPageSize,
3332 		unsigned int HostVMMaxNonCachedPageTableLevels)
3333 {
3334 	double ExtraLatencyBytes;
3335 	double ExtraLatency;
3336 
3337 	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3338 			ReorderingBytes,
3339 			TotalNumberOfActiveDPP,
3340 			PixelChunkSizeInKByte,
3341 			TotalNumberOfDCCActiveDPP,
3342 			MetaChunkSize,
3343 			GPUVMEnable,
3344 			HostVMEnable,
3345 			NumberOfActiveSurfaces,
3346 			NumberOfDPP,
3347 			dpte_group_bytes,
3348 			HostVMInefficiencyFactor,
3349 			HostVMMinPageSize,
3350 			HostVMMaxNonCachedPageTableLevels);
3351 
3352 	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3353 
3354 #ifdef __DML_VBA_DEBUG__
3355 	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3356 	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3357 	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3358 	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3359 	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3360 #endif
3361 
3362 	return ExtraLatency;
3363 } // CalculateExtraLatency
3364 
3365 bool dml32_CalculatePrefetchSchedule(
3366 		double HostVMInefficiencyFactor,
3367 		DmlPipe *myPipe,
3368 		unsigned int DSCDelay,
3369 		double DPPCLKDelaySubtotalPlusCNVCFormater,
3370 		double DPPCLKDelaySCL,
3371 		double DPPCLKDelaySCLLBOnly,
3372 		double DPPCLKDelayCNVCCursor,
3373 		double DISPCLKDelaySubtotal,
3374 		unsigned int DPP_RECOUT_WIDTH,
3375 		enum output_format_class OutputFormat,
3376 		unsigned int MaxInterDCNTileRepeaters,
3377 		unsigned int VStartup,
3378 		unsigned int MaxVStartup,
3379 		unsigned int GPUVMPageTableLevels,
3380 		bool GPUVMEnable,
3381 		bool HostVMEnable,
3382 		unsigned int HostVMMaxNonCachedPageTableLevels,
3383 		double HostVMMinPageSize,
3384 		bool DynamicMetadataEnable,
3385 		bool DynamicMetadataVMEnabled,
3386 		int DynamicMetadataLinesBeforeActiveRequired,
3387 		unsigned int DynamicMetadataTransmittedBytes,
3388 		double UrgentLatency,
3389 		double UrgentExtraLatency,
3390 		double TCalc,
3391 		unsigned int PDEAndMetaPTEBytesFrame,
3392 		unsigned int MetaRowByte,
3393 		unsigned int PixelPTEBytesPerRow,
3394 		double PrefetchSourceLinesY,
3395 		unsigned int SwathWidthY,
3396 		unsigned int VInitPreFillY,
3397 		unsigned int MaxNumSwathY,
3398 		double PrefetchSourceLinesC,
3399 		unsigned int SwathWidthC,
3400 		unsigned int VInitPreFillC,
3401 		unsigned int MaxNumSwathC,
3402 		unsigned int swath_width_luma_ub,
3403 		unsigned int swath_width_chroma_ub,
3404 		unsigned int SwathHeightY,
3405 		unsigned int SwathHeightC,
3406 		double TWait,
3407 		/* Output */
3408 		double   *DSTXAfterScaler,
3409 		double   *DSTYAfterScaler,
3410 		double *DestinationLinesForPrefetch,
3411 		double *PrefetchBandwidth,
3412 		double *DestinationLinesToRequestVMInVBlank,
3413 		double *DestinationLinesToRequestRowInVBlank,
3414 		double *VRatioPrefetchY,
3415 		double *VRatioPrefetchC,
3416 		double *RequiredPrefetchPixDataBWLuma,
3417 		double *RequiredPrefetchPixDataBWChroma,
3418 		bool   *NotEnoughTimeForDynamicMetadata,
3419 		double *Tno_bw,
3420 		double *prefetch_vmrow_bw,
3421 		double *Tdmdl_vm,
3422 		double *Tdmdl,
3423 		double *TSetup,
3424 		unsigned int   *VUpdateOffsetPix,
3425 		double   *VUpdateWidthPix,
3426 		double   *VReadyOffsetPix)
3427 {
3428 	bool MyError = false;
3429 	unsigned int DPPCycles, DISPCLKCycles;
3430 	double DSTTotalPixelsAfterScaler;
3431 	double LineTime;
3432 	double dst_y_prefetch_equ;
3433 	double prefetch_bw_oto;
3434 	double Tvm_oto;
3435 	double Tr0_oto;
3436 	double Tvm_oto_lines;
3437 	double Tr0_oto_lines;
3438 	double dst_y_prefetch_oto;
3439 	double TimeForFetchingMetaPTE = 0;
3440 	double TimeForFetchingRowInVBlank = 0;
3441 	double LinesToRequestPrefetchPixelData = 0;
3442 	unsigned int HostVMDynamicLevelsTrips;
3443 	double  trip_to_mem;
3444 	double  Tvm_trips;
3445 	double  Tr0_trips;
3446 	double  Tvm_trips_rounded;
3447 	double  Tr0_trips_rounded;
3448 	double  Lsw_oto;
3449 	double  Tpre_rounded;
3450 	double  prefetch_bw_equ;
3451 	double  Tvm_equ;
3452 	double  Tr0_equ;
3453 	double  Tdmbf;
3454 	double  Tdmec;
3455 	double  Tdmsks;
3456 	double  prefetch_sw_bytes;
3457 	double  bytes_pp;
3458 	double  dep_bytes;
3459 	unsigned int max_vratio_pre = __DML_MAX_VRATIO_PRE__;
3460 	double  min_Lsw;
3461 	double  Tsw_est1 = 0;
3462 	double  Tsw_est3 = 0;
3463 
3464 	if (GPUVMEnable == true && HostVMEnable == true)
3465 		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
3466 	else
3467 		HostVMDynamicLevelsTrips = 0;
3468 #ifdef __DML_VBA_DEBUG__
3469 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
3470 	dml_print("DML::%s: GPUVMPageTableLevels = %d\n", __func__, GPUVMPageTableLevels);
3471 	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3472 	dml_print("DML::%s: HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3473 			__func__, HostVMEnable, HostVMInefficiencyFactor);
3474 #endif
3475 	dml32_CalculateVUpdateAndDynamicMetadataParameters(
3476 			MaxInterDCNTileRepeaters,
3477 			myPipe->Dppclk,
3478 			myPipe->Dispclk,
3479 			myPipe->DCFClkDeepSleep,
3480 			myPipe->PixelClock,
3481 			myPipe->HTotal,
3482 			myPipe->VBlank,
3483 			DynamicMetadataTransmittedBytes,
3484 			DynamicMetadataLinesBeforeActiveRequired,
3485 			myPipe->InterlaceEnable,
3486 			myPipe->ProgressiveToInterlaceUnitInOPP,
3487 			TSetup,
3488 
3489 			/* output */
3490 			&Tdmbf,
3491 			&Tdmec,
3492 			&Tdmsks,
3493 			VUpdateOffsetPix,
3494 			VUpdateWidthPix,
3495 			VReadyOffsetPix);
3496 
3497 	LineTime = myPipe->HTotal / myPipe->PixelClock;
3498 	trip_to_mem = UrgentLatency;
3499 	Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3500 
3501 	if (DynamicMetadataVMEnabled == true)
3502 		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
3503 	else
3504 		*Tdmdl = TWait + UrgentExtraLatency;
3505 
3506 #ifdef __DML_VBA_ALLOW_DELTA__
3507 	if (DynamicMetadataEnable == false)
3508 		*Tdmdl = 0.0;
3509 #endif
3510 
3511 	if (DynamicMetadataEnable == true) {
3512 		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3513 			*NotEnoughTimeForDynamicMetadata = true;
3514 #ifdef __DML_VBA_DEBUG__
3515 			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3516 			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3517 					__func__, Tdmbf);
3518 			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3519 			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3520 					__func__, Tdmsks);
3521 			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3522 					__func__, *Tdmdl);
3523 #endif
3524 		} else {
3525 			*NotEnoughTimeForDynamicMetadata = false;
3526 		}
3527 	} else {
3528 		*NotEnoughTimeForDynamicMetadata = false;
3529 	}
3530 
3531 	*Tdmdl_vm =  (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true &&
3532 			GPUVMEnable == true ? TWait + Tvm_trips : 0);
3533 
3534 	if (myPipe->ScalerEnabled)
3535 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
3536 	else
3537 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
3538 
3539 	DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
3540 
3541 	DISPCLKCycles = DISPCLKDelaySubtotal;
3542 
3543 	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3544 		return true;
3545 
3546 	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3547 			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3548 
3549 	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3550 			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3551 			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3552 					myPipe->HActive / 2 : 0)
3553 			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3554 
3555 #ifdef __DML_VBA_DEBUG__
3556 	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3557 	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3558 	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3559 	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3560 	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3561 	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3562 	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3563 	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3564 	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3565 #endif
3566 
3567 	if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3568 		*DSTYAfterScaler = 1;
3569 	else
3570 		*DSTYAfterScaler = 0;
3571 
3572 	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3573 	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3574 	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3575 #ifdef __DML_VBA_DEBUG__
3576 	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3577 	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3578 #endif
3579 
3580 	MyError = false;
3581 
3582 	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3583 
3584 	if (GPUVMEnable == true) {
3585 		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3586 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3587 		if (GPUVMPageTableLevels >= 3) {
3588 			*Tno_bw = UrgentExtraLatency + trip_to_mem *
3589 					(double) ((GPUVMPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3590 		} else if (GPUVMPageTableLevels == 1 && myPipe->DCCEnable != true) {
3591 			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3592 					4.0 * LineTime; // VBA_ERROR
3593 			*Tno_bw = UrgentExtraLatency;
3594 		} else {
3595 			*Tno_bw = 0;
3596 		}
3597 	} else if (myPipe->DCCEnable == true) {
3598 		Tvm_trips_rounded = LineTime / 4.0;
3599 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3600 		*Tno_bw = 0;
3601 	} else {
3602 		Tvm_trips_rounded = LineTime / 4.0;
3603 		Tr0_trips_rounded = LineTime / 2.0;
3604 		*Tno_bw = 0;
3605 	}
3606 	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3607 	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3608 
3609 	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3610 			|| myPipe->SourcePixelFormat == dm_420_12) {
3611 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3612 	} else {
3613 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3614 	}
3615 
3616 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3617 			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3618 	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3619 			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3620 
3621 	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3622 	min_Lsw = dml_max(min_Lsw, 1.0);
3623 	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3624 
3625 	if (GPUVMEnable == true) {
3626 		Tvm_oto = dml_max3(
3627 				Tvm_trips,
3628 				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3629 				LineTime / 4.0);
3630 	} else
3631 		Tvm_oto = LineTime / 4.0;
3632 
3633 	if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3634 		Tr0_oto = dml_max4(
3635 				Tr0_trips,
3636 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3637 				(LineTime - Tvm_oto)/2.0,
3638 				LineTime / 4.0);
3639 #ifdef __DML_VBA_DEBUG__
3640 		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3641 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3642 		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3643 		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3644 		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3645 #endif
3646 	} else
3647 		Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3648 
3649 	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3650 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3651 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3652 
3653 	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3654 			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3655 
3656 #ifdef __DML_VBA_DEBUG__
3657 	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3658 	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3659 	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3660 	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3661 	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3662 	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3663 	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3664 	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3665 	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3666 	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3667 	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3668 	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3669 	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3670 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3671 	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3672 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3673 	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3674 	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3675 	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3676 	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3677 	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3678 	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3679 	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3680 	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3681 	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3682 	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3683 	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3684 #endif
3685 
3686 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3687 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
3688 #ifdef __DML_VBA_DEBUG__
3689 	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3690 	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3691 	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3692 	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3693 			__func__, VStartup * LineTime);
3694 	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3695 	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3696 	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3697 	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3698 	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3699 	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3700 	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3701 			__func__, *DSTYAfterScaler);
3702 #endif
3703 	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3704 			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3705 
3706 	if (prefetch_sw_bytes < dep_bytes)
3707 		prefetch_sw_bytes = 2 * dep_bytes;
3708 
3709 	*PrefetchBandwidth = 0;
3710 	*DestinationLinesToRequestVMInVBlank = 0;
3711 	*DestinationLinesToRequestRowInVBlank = 0;
3712 	*VRatioPrefetchY = 0;
3713 	*VRatioPrefetchC = 0;
3714 	*RequiredPrefetchPixDataBWLuma = 0;
3715 	if (dst_y_prefetch_equ > 1) {
3716 		double PrefetchBandwidth1;
3717 		double PrefetchBandwidth2;
3718 		double PrefetchBandwidth3;
3719 		double PrefetchBandwidth4;
3720 
3721 		if (Tpre_rounded - *Tno_bw > 0) {
3722 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3723 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3724 					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3725 			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3726 		} else
3727 			PrefetchBandwidth1 = 0;
3728 
3729 		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3730 				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3731 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3732 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3733 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3734 		}
3735 
3736 		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3737 			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3738 			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3739 		else
3740 			PrefetchBandwidth2 = 0;
3741 
3742 		if (Tpre_rounded - Tvm_trips_rounded > 0) {
3743 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3744 					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3745 			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3746 		} else
3747 			PrefetchBandwidth3 = 0;
3748 
3749 
3750 		if (VStartup == MaxVStartup &&
3751 				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3752 				LineTime - Tvm_trips_rounded > 0) {
3753 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3754 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3755 		}
3756 
3757 		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3758 			PrefetchBandwidth4 = prefetch_sw_bytes /
3759 					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3760 		} else {
3761 			PrefetchBandwidth4 = 0;
3762 		}
3763 
3764 #ifdef __DML_VBA_DEBUG__
3765 		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3766 		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3767 		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3768 		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3769 		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3770 		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3771 		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3772 		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3773 		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3774 #endif
3775 		{
3776 			bool Case1OK;
3777 			bool Case2OK;
3778 			bool Case3OK;
3779 
3780 			if (PrefetchBandwidth1 > 0) {
3781 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3782 						>= Tvm_trips_rounded
3783 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3784 								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
3785 					Case1OK = true;
3786 				} else {
3787 					Case1OK = false;
3788 				}
3789 			} else {
3790 				Case1OK = false;
3791 			}
3792 
3793 			if (PrefetchBandwidth2 > 0) {
3794 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3795 						>= Tvm_trips_rounded
3796 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3797 						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
3798 					Case2OK = true;
3799 				} else {
3800 					Case2OK = false;
3801 				}
3802 			} else {
3803 				Case2OK = false;
3804 			}
3805 
3806 			if (PrefetchBandwidth3 > 0) {
3807 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3808 						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3809 								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3810 								Tr0_trips_rounded) {
3811 					Case3OK = true;
3812 				} else {
3813 					Case3OK = false;
3814 				}
3815 			} else {
3816 				Case3OK = false;
3817 			}
3818 
3819 			if (Case1OK)
3820 				prefetch_bw_equ = PrefetchBandwidth1;
3821 			else if (Case2OK)
3822 				prefetch_bw_equ = PrefetchBandwidth2;
3823 			else if (Case3OK)
3824 				prefetch_bw_equ = PrefetchBandwidth3;
3825 			else
3826 				prefetch_bw_equ = PrefetchBandwidth4;
3827 
3828 #ifdef __DML_VBA_DEBUG__
3829 			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3830 			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3831 			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3832 			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3833 #endif
3834 
3835 			if (prefetch_bw_equ > 0) {
3836 				if (GPUVMEnable == true) {
3837 					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3838 							HostVMInefficiencyFactor / prefetch_bw_equ,
3839 							Tvm_trips, LineTime / 4);
3840 				} else {
3841 					Tvm_equ = LineTime / 4;
3842 				}
3843 
3844 				if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
3845 					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3846 							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3847 							(LineTime - Tvm_equ) / 2, LineTime / 4);
3848 				} else {
3849 					Tr0_equ = (LineTime - Tvm_equ) / 2;
3850 				}
3851 			} else {
3852 				Tvm_equ = 0;
3853 				Tr0_equ = 0;
3854 #ifdef __DML_VBA_DEBUG__
3855 				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3856 #endif
3857 			}
3858 		}
3859 
3860 		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3861 			*DestinationLinesForPrefetch = dst_y_prefetch_oto;
3862 			TimeForFetchingMetaPTE = Tvm_oto;
3863 			TimeForFetchingRowInVBlank = Tr0_oto;
3864 			*PrefetchBandwidth = prefetch_bw_oto;
3865 		} else {
3866 			*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3867 			TimeForFetchingMetaPTE = Tvm_equ;
3868 			TimeForFetchingRowInVBlank = Tr0_equ;
3869 			*PrefetchBandwidth = prefetch_bw_equ;
3870 		}
3871 
3872 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3873 
3874 		*DestinationLinesToRequestRowInVBlank =
3875 				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3876 
3877 		LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3878 				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3879 
3880 #ifdef __DML_VBA_DEBUG__
3881 		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3882 		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3883 				__func__, *DestinationLinesToRequestVMInVBlank);
3884 		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3885 		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3886 		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3887 				__func__, *DestinationLinesToRequestRowInVBlank);
3888 		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3889 		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3890 #endif
3891 
3892 		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3893 			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3894 			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3895 #ifdef __DML_VBA_DEBUG__
3896 			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3897 			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3898 			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3899 #endif
3900 			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3901 				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3902 					*VRatioPrefetchY =
3903 							dml_max((double) PrefetchSourceLinesY /
3904 									LinesToRequestPrefetchPixelData,
3905 									(double) MaxNumSwathY * SwathHeightY /
3906 									(LinesToRequestPrefetchPixelData -
3907 									(VInitPreFillY - 3.0) / 2.0));
3908 					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3909 				} else {
3910 					MyError = true;
3911 					*VRatioPrefetchY = 0;
3912 				}
3913 #ifdef __DML_VBA_DEBUG__
3914 				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3915 				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3916 				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3917 #endif
3918 			}
3919 
3920 			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3921 			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3922 
3923 #ifdef __DML_VBA_DEBUG__
3924 			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3925 			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3926 			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3927 #endif
3928 			if ((SwathHeightC > 4)) {
3929 				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3930 					*VRatioPrefetchC =
3931 						dml_max(*VRatioPrefetchC,
3932 							(double) MaxNumSwathC * SwathHeightC /
3933 							(LinesToRequestPrefetchPixelData -
3934 							(VInitPreFillC - 3.0) / 2.0));
3935 					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3936 				} else {
3937 					MyError = true;
3938 					*VRatioPrefetchC = 0;
3939 				}
3940 #ifdef __DML_VBA_DEBUG__
3941 				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3942 				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3943 				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3944 #endif
3945 			}
3946 
3947 			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3948 					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3949 					/ LineTime;
3950 
3951 #ifdef __DML_VBA_DEBUG__
3952 			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3953 			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3954 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3955 			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3956 					__func__, *RequiredPrefetchPixDataBWLuma);
3957 #endif
3958 			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3959 					LinesToRequestPrefetchPixelData
3960 					* myPipe->BytePerPixelC
3961 					* swath_width_chroma_ub / LineTime;
3962 		} else {
3963 			MyError = true;
3964 #ifdef __DML_VBA_DEBUG__
3965 			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3966 					__func__, LinesToRequestPrefetchPixelData);
3967 #endif
3968 			*VRatioPrefetchY = 0;
3969 			*VRatioPrefetchC = 0;
3970 			*RequiredPrefetchPixDataBWLuma = 0;
3971 			*RequiredPrefetchPixDataBWChroma = 0;
3972 		}
3973 #ifdef __DML_VBA_DEBUG__
3974 		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
3975 			(double)LinesToRequestPrefetchPixelData * LineTime +
3976 			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
3977 		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
3978 		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
3979 			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
3980 		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
3981 		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
3982 			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
3983 			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
3984 		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
3985 				PixelPTEBytesPerRow);
3986 #endif
3987 	} else {
3988 		MyError = true;
3989 #ifdef __DML_VBA_DEBUG__
3990 		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
3991 				__func__, dst_y_prefetch_equ);
3992 #endif
3993 	}
3994 
3995 	{
3996 		double prefetch_vm_bw;
3997 		double prefetch_row_bw;
3998 
3999 		if (PDEAndMetaPTEBytesFrame == 0) {
4000 			prefetch_vm_bw = 0;
4001 		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
4002 #ifdef __DML_VBA_DEBUG__
4003 			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4004 			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4005 			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4006 					__func__, *DestinationLinesToRequestVMInVBlank);
4007 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4008 #endif
4009 			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4010 					(*DestinationLinesToRequestVMInVBlank * LineTime);
4011 #ifdef __DML_VBA_DEBUG__
4012 			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4013 #endif
4014 		} else {
4015 			prefetch_vm_bw = 0;
4016 			MyError = true;
4017 #ifdef __DML_VBA_DEBUG__
4018 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4019 					__func__, *DestinationLinesToRequestVMInVBlank);
4020 #endif
4021 		}
4022 
4023 		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4024 			prefetch_row_bw = 0;
4025 		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
4026 			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4027 					(*DestinationLinesToRequestRowInVBlank * LineTime);
4028 
4029 #ifdef __DML_VBA_DEBUG__
4030 			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4031 			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4032 			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4033 					__func__, *DestinationLinesToRequestRowInVBlank);
4034 			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4035 #endif
4036 		} else {
4037 			prefetch_row_bw = 0;
4038 			MyError = true;
4039 #ifdef __DML_VBA_DEBUG__
4040 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4041 					__func__, *DestinationLinesToRequestRowInVBlank);
4042 #endif
4043 		}
4044 
4045 		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4046 	}
4047 
4048 	if (MyError) {
4049 		*PrefetchBandwidth = 0;
4050 		TimeForFetchingMetaPTE = 0;
4051 		TimeForFetchingRowInVBlank = 0;
4052 		*DestinationLinesToRequestVMInVBlank = 0;
4053 		*DestinationLinesToRequestRowInVBlank = 0;
4054 		*DestinationLinesForPrefetch = 0;
4055 		LinesToRequestPrefetchPixelData = 0;
4056 		*VRatioPrefetchY = 0;
4057 		*VRatioPrefetchC = 0;
4058 		*RequiredPrefetchPixDataBWLuma = 0;
4059 		*RequiredPrefetchPixDataBWChroma = 0;
4060 	}
4061 
4062 	return MyError;
4063 } // CalculatePrefetchSchedule
4064 
4065 void dml32_CalculateFlipSchedule(
4066 		double HostVMInefficiencyFactor,
4067 		double UrgentExtraLatency,
4068 		double UrgentLatency,
4069 		unsigned int GPUVMMaxPageTableLevels,
4070 		bool HostVMEnable,
4071 		unsigned int HostVMMaxNonCachedPageTableLevels,
4072 		bool GPUVMEnable,
4073 		double HostVMMinPageSize,
4074 		double PDEAndMetaPTEBytesPerFrame,
4075 		double MetaRowBytes,
4076 		double DPTEBytesPerRow,
4077 		double BandwidthAvailableForImmediateFlip,
4078 		unsigned int TotImmediateFlipBytes,
4079 		enum source_format_class SourcePixelFormat,
4080 		double LineTime,
4081 		double VRatio,
4082 		double VRatioChroma,
4083 		double Tno_bw,
4084 		bool DCCEnable,
4085 		unsigned int dpte_row_height,
4086 		unsigned int meta_row_height,
4087 		unsigned int dpte_row_height_chroma,
4088 		unsigned int meta_row_height_chroma,
4089 		bool    use_one_row_for_frame_flip,
4090 
4091 		/* Output */
4092 		double *DestinationLinesToRequestVMInImmediateFlip,
4093 		double *DestinationLinesToRequestRowInImmediateFlip,
4094 		double *final_flip_bw,
4095 		bool *ImmediateFlipSupportedForPipe)
4096 {
4097 	double min_row_time = 0.0;
4098 	unsigned int HostVMDynamicLevelsTrips;
4099 	double TimeForFetchingMetaPTEImmediateFlip;
4100 	double TimeForFetchingRowInVBlankImmediateFlip;
4101 	double ImmediateFlipBW;
4102 
4103 	if (GPUVMEnable == true && HostVMEnable == true)
4104 		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4105 	else
4106 		HostVMDynamicLevelsTrips = 0;
4107 
4108 #ifdef __DML_VBA_DEBUG__
4109 	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4110 	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4111 #endif
4112 
4113 	if (TotImmediateFlipBytes > 0) {
4114 		if (use_one_row_for_frame_flip) {
4115 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4116 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4117 		} else {
4118 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4119 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4120 		}
4121 		if (GPUVMEnable == true) {
4122 			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4123 					HostVMInefficiencyFactor / ImmediateFlipBW,
4124 					UrgentExtraLatency + UrgentLatency *
4125 					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4126 					LineTime / 4.0);
4127 		} else {
4128 			TimeForFetchingMetaPTEImmediateFlip = 0;
4129 		}
4130 		if ((GPUVMEnable == true || DCCEnable == true)) {
4131 			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4132 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4133 					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4134 		} else {
4135 			TimeForFetchingRowInVBlankImmediateFlip = 0;
4136 		}
4137 
4138 		*DestinationLinesToRequestVMInImmediateFlip =
4139 				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4140 		*DestinationLinesToRequestRowInImmediateFlip =
4141 				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4142 
4143 		if (GPUVMEnable == true) {
4144 			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4145 					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4146 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4147 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4148 		} else if ((GPUVMEnable == true || DCCEnable == true)) {
4149 			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4150 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4151 		} else {
4152 			*final_flip_bw = 0;
4153 		}
4154 	} else {
4155 		TimeForFetchingMetaPTEImmediateFlip = 0;
4156 		TimeForFetchingRowInVBlankImmediateFlip = 0;
4157 		*DestinationLinesToRequestVMInImmediateFlip = 0;
4158 		*DestinationLinesToRequestRowInImmediateFlip = 0;
4159 		*final_flip_bw = 0;
4160 	}
4161 
4162 	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4163 		if (GPUVMEnable == true && DCCEnable != true) {
4164 			min_row_time = dml_min(dpte_row_height *
4165 					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4166 		} else if (GPUVMEnable != true && DCCEnable == true) {
4167 			min_row_time = dml_min(meta_row_height *
4168 					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4169 		} else {
4170 			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4171 					LineTime / VRatio, dpte_row_height_chroma * LineTime /
4172 					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4173 		}
4174 	} else {
4175 		if (GPUVMEnable == true && DCCEnable != true) {
4176 			min_row_time = dpte_row_height * LineTime / VRatio;
4177 		} else if (GPUVMEnable != true && DCCEnable == true) {
4178 			min_row_time = meta_row_height * LineTime / VRatio;
4179 		} else {
4180 			min_row_time =
4181 				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4182 		}
4183 	}
4184 
4185 	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4186 			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4187 					> min_row_time) {
4188 		*ImmediateFlipSupportedForPipe = false;
4189 	} else {
4190 		*ImmediateFlipSupportedForPipe = true;
4191 	}
4192 
4193 #ifdef __DML_VBA_DEBUG__
4194 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4195 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4196 	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4197 			__func__, *DestinationLinesToRequestVMInImmediateFlip);
4198 	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4199 			__func__, *DestinationLinesToRequestRowInImmediateFlip);
4200 	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4201 	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4202 			__func__, TimeForFetchingRowInVBlankImmediateFlip);
4203 	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4204 	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4205 #endif
4206 } // CalculateFlipSchedule
4207 
4208 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4209 		bool USRRetrainingRequiredFinal,
4210 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
4211 		unsigned int PrefetchMode,
4212 		unsigned int NumberOfActiveSurfaces,
4213 		unsigned int MaxLineBufferLines,
4214 		unsigned int LineBufferSize,
4215 		unsigned int WritebackInterfaceBufferSize,
4216 		double DCFCLK,
4217 		double ReturnBW,
4218 		bool SynchronizeTimingsFinal,
4219 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
4220 		bool DRRDisplay[],
4221 		unsigned int dpte_group_bytes[],
4222 		unsigned int meta_row_height[],
4223 		unsigned int meta_row_height_chroma[],
4224 		SOCParametersList mmSOCParameters,
4225 		unsigned int WritebackChunkSize,
4226 		double SOCCLK,
4227 		double DCFClkDeepSleep,
4228 		unsigned int DETBufferSizeY[],
4229 		unsigned int DETBufferSizeC[],
4230 		unsigned int SwathHeightY[],
4231 		unsigned int SwathHeightC[],
4232 		unsigned int LBBitPerPixel[],
4233 		double SwathWidthY[],
4234 		double SwathWidthC[],
4235 		double HRatio[],
4236 		double HRatioChroma[],
4237 		unsigned int VTaps[],
4238 		unsigned int VTapsChroma[],
4239 		double VRatio[],
4240 		double VRatioChroma[],
4241 		unsigned int HTotal[],
4242 		unsigned int VTotal[],
4243 		unsigned int VActive[],
4244 		double PixelClock[],
4245 		unsigned int BlendingAndTiming[],
4246 		unsigned int DPPPerSurface[],
4247 		double BytePerPixelDETY[],
4248 		double BytePerPixelDETC[],
4249 		double DSTXAfterScaler[],
4250 		double DSTYAfterScaler[],
4251 		bool WritebackEnable[],
4252 		enum source_format_class WritebackPixelFormat[],
4253 		double WritebackDestinationWidth[],
4254 		double WritebackDestinationHeight[],
4255 		double WritebackSourceHeight[],
4256 		bool UnboundedRequestEnabled,
4257 		unsigned int CompressedBufferSizeInkByte,
4258 
4259 		/* Output */
4260 		Watermarks *Watermark,
4261 		enum clock_change_support *DRAMClockChangeSupport,
4262 		double MaxActiveDRAMClockChangeLatencySupported[],
4263 		unsigned int SubViewportLinesNeededInMALL[],
4264 		enum dm_fclock_change_support *FCLKChangeSupport,
4265 		double *MinActiveFCLKChangeLatencySupported,
4266 		bool *USRRetrainingSupport,
4267 		double ActiveDRAMClockChangeLatencyMargin[])
4268 {
4269 	unsigned int i, j, k;
4270 	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4271 	unsigned int DRAMClockChangeSupportNumber = 0;
4272 	unsigned int LastSurfaceWithoutMargin;
4273 	unsigned int DRAMClockChangeMethod = 0;
4274 	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4275 	double MinActiveFCLKChangeMargin = 0.;
4276 	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4277 	double ActiveClockChangeLatencyHidingY;
4278 	double ActiveClockChangeLatencyHidingC;
4279 	double ActiveClockChangeLatencyHiding;
4280     double EffectiveDETBufferSizeY;
4281 	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4282 	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4283 	double TotalPixelBW = 0.0;
4284 	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4285 	double     EffectiveLBLatencyHidingY;
4286 	double     EffectiveLBLatencyHidingC;
4287 	double     LinesInDETY[DC__NUM_DPP__MAX];
4288 	double     LinesInDETC[DC__NUM_DPP__MAX];
4289 	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4290 	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4291 	double     FullDETBufferingTimeY;
4292 	double     FullDETBufferingTimeC;
4293 	double     WritebackDRAMClockChangeLatencyMargin;
4294 	double     WritebackFCLKChangeLatencyMargin;
4295 	double     WritebackLatencyHiding;
4296 	bool    SameTimingForFCLKChange;
4297 
4298 	unsigned int    TotalActiveWriteback = 0;
4299 	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4300 	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4301 
4302 	Watermark->UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4303 	Watermark->USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4304 			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4305 	Watermark->DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + Watermark->UrgentWatermark;
4306 	Watermark->FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + Watermark->UrgentWatermark;
4307 	Watermark->StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4308 			+ 10 / DCFClkDeepSleep;
4309 	Watermark->StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4310 			+ 10 / DCFClkDeepSleep;
4311 	Watermark->Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4312 			+ 10 / DCFClkDeepSleep;
4313 	Watermark->Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4314 			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4315 
4316 #ifdef __DML_VBA_DEBUG__
4317 	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4318 	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4319 	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4320 	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, Watermark->UrgentWatermark);
4321 	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, Watermark->USRRetrainingWatermark);
4322 	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, Watermark->DRAMClockChangeWatermark);
4323 	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, Watermark->FCLKChangeWatermark);
4324 	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, Watermark->StutterExitWatermark);
4325 	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, Watermark->StutterEnterPlusExitWatermark);
4326 	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, Watermark->Z8StutterExitWatermark);
4327 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4328 			__func__, Watermark->Z8StutterEnterPlusExitWatermark);
4329 #endif
4330 
4331 
4332 	TotalActiveWriteback = 0;
4333 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4334 		if (WritebackEnable[k] == true)
4335 			TotalActiveWriteback = TotalActiveWriteback + 1;
4336 	}
4337 
4338 	if (TotalActiveWriteback <= 1) {
4339 		Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4340 	} else {
4341 		Watermark->WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4342 				+ WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4343 	}
4344 	if (USRRetrainingRequiredFinal)
4345 		Watermark->WritebackUrgentWatermark = Watermark->WritebackUrgentWatermark
4346 				+ mmSOCParameters.USRRetrainingLatency;
4347 
4348 	if (TotalActiveWriteback <= 1) {
4349 		Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4350 				+ mmSOCParameters.WritebackLatency;
4351 		Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4352 				+ mmSOCParameters.WritebackLatency;
4353 	} else {
4354 		Watermark->WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4355 				+ mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4356 		Watermark->WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4357 				+ mmSOCParameters.WritebackLatency + WritebackChunkSize * 1024 / 32 / SOCCLK;
4358 	}
4359 
4360 	if (USRRetrainingRequiredFinal)
4361 		Watermark->WritebackDRAMClockChangeWatermark = Watermark->WritebackDRAMClockChangeWatermark
4362 				+ mmSOCParameters.USRRetrainingLatency;
4363 
4364 	if (USRRetrainingRequiredFinal)
4365 		Watermark->WritebackFCLKChangeWatermark = Watermark->WritebackFCLKChangeWatermark
4366 				+ mmSOCParameters.USRRetrainingLatency;
4367 
4368 #ifdef __DML_VBA_DEBUG__
4369 	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4370 			__func__, Watermark->WritebackDRAMClockChangeWatermark);
4371 	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, Watermark->WritebackFCLKChangeWatermark);
4372 	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, Watermark->WritebackUrgentWatermark);
4373 	dml_print("DML::%s: USRRetrainingRequiredFinal = %d\n", __func__, USRRetrainingRequiredFinal);
4374 	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4375 #endif
4376 
4377 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4378 		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] +
4379 				SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) / (HTotal[k] / PixelClock[k]);
4380 	}
4381 
4382 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4383 
4384 		LBLatencyHidingSourceLinesY[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (VTaps[k] - 1);
4385 		LBLatencyHidingSourceLinesC[k] = dml_min((double) MaxLineBufferLines, dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTapsChroma[k] - 1);
4386 
4387 
4388 #ifdef __DML_VBA_DEBUG__
4389 		dml_print("DML::%s: k=%d, MaxLineBufferLines = %d\n", __func__, k, MaxLineBufferLines);
4390 		dml_print("DML::%s: k=%d, LineBufferSize     = %d\n", __func__, k, LineBufferSize);
4391 		dml_print("DML::%s: k=%d, LBBitPerPixel      = %d\n", __func__, k, LBBitPerPixel[k]);
4392 		dml_print("DML::%s: k=%d, HRatio             = %f\n", __func__, k, HRatio[k]);
4393 		dml_print("DML::%s: k=%d, VTaps              = %d\n", __func__, k, VTaps[k]);
4394 #endif
4395 
4396 		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / VRatio[k] * (HTotal[k] / PixelClock[k]);
4397 		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / VRatioChroma[k] * (HTotal[k] / PixelClock[k]);
4398 		EffectiveDETBufferSizeY = DETBufferSizeY[k];
4399 
4400 		if (UnboundedRequestEnabled) {
4401 			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4402 					+ CompressedBufferSizeInkByte * 1024
4403 							* (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k])
4404 							/ (HTotal[k] / PixelClock[k]) / TotalPixelBW;
4405 		}
4406 
4407 		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4408 		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4409 		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k];
4410 
4411 		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4412 				- (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k];
4413 
4414 		if (NumberOfActiveSurfaces > 1) {
4415 			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4416 					- (1 - 1 / NumberOfActiveSurfaces) * SwathHeightY[k] * HTotal[k]
4417 							/ PixelClock[k] / VRatio[k];
4418 		}
4419 
4420 		if (BytePerPixelDETC[k] > 0) {
4421 			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4422 			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4423 			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k])
4424 					/ VRatioChroma[k];
4425 			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4426 					- (DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k]
4427 							/ PixelClock[k];
4428 			if (NumberOfActiveSurfaces > 1) {
4429 				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4430 						- (1 - 1 / NumberOfActiveSurfaces) * SwathHeightC[k] * HTotal[k]
4431 								/ PixelClock[k] / VRatioChroma[k];
4432 			}
4433 			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4434 					ActiveClockChangeLatencyHidingC);
4435 		} else {
4436 			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4437 		}
4438 
4439 		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4440 				- Watermark->DRAMClockChangeWatermark;
4441 		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->UrgentWatermark
4442 				- Watermark->FCLKChangeWatermark;
4443 		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - Watermark->USRRetrainingWatermark;
4444 
4445 		if (WritebackEnable[k]) {
4446 			WritebackLatencyHiding = WritebackInterfaceBufferSize * 1024
4447 					/ (WritebackDestinationWidth[k] * WritebackDestinationHeight[k]
4448 							/ (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4);
4449 			if (WritebackPixelFormat[k] == dm_444_64)
4450 				WritebackLatencyHiding = WritebackLatencyHiding / 2;
4451 
4452 			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4453 					- Watermark->WritebackDRAMClockChangeWatermark;
4454 
4455 			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4456 					- Watermark->WritebackFCLKChangeWatermark;
4457 
4458 			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4459 					WritebackFCLKChangeLatencyMargin);
4460 			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4461 					WritebackDRAMClockChangeLatencyMargin);
4462 		}
4463 		MaxActiveDRAMClockChangeLatencySupported[k] =
4464 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4465 						0 :
4466 						(ActiveDRAMClockChangeLatencyMargin[k]
4467 								+ mmSOCParameters.DRAMClockChangeLatency);
4468 	}
4469 
4470 	for (i = 0; i < NumberOfActiveSurfaces; ++i) {
4471 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
4472 			if (i == j ||
4473 					(BlendingAndTiming[i] == i && BlendingAndTiming[j] == i) ||
4474 					(BlendingAndTiming[j] == j && BlendingAndTiming[i] == j) ||
4475 					(BlendingAndTiming[i] == BlendingAndTiming[j] && BlendingAndTiming[i] != i) ||
4476 					(SynchronizeTimingsFinal && PixelClock[i] == PixelClock[j] &&
4477 					HTotal[i] == HTotal[j] && VTotal[i] == VTotal[j] &&
4478 					VActive[i] == VActive[j]) || (SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4479 					(DRRDisplay[i] || DRRDisplay[j]))) {
4480 				SynchronizedSurfaces[i][j] = true;
4481 			} else {
4482 				SynchronizedSurfaces[i][j] = false;
4483 			}
4484 		}
4485 	}
4486 
4487 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4488 		if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4489 				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4490 				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4491 			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4492 			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4493 			SurfaceWithMinActiveFCLKChangeMargin = k;
4494 		}
4495 	}
4496 
4497 	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4498 
4499 	SameTimingForFCLKChange = true;
4500 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4501 		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4502 			if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4503 					(SameTimingForFCLKChange ||
4504 					ActiveFCLKChangeLatencyMargin[k] <
4505 					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4506 				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4507 			}
4508 			SameTimingForFCLKChange = false;
4509 		}
4510 	}
4511 
4512 	if (MinActiveFCLKChangeMargin > 0) {
4513 		*FCLKChangeSupport = dm_fclock_change_vactive;
4514 	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4515 			(PrefetchMode <= 1)) {
4516 		*FCLKChangeSupport = dm_fclock_change_vblank;
4517 	} else {
4518 		*FCLKChangeSupport = dm_fclock_change_unsupported;
4519 	}
4520 
4521 	*USRRetrainingSupport = true;
4522 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4523 		if ((UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4524 				(USRRetrainingLatencyMargin[k] < 0)) {
4525 			*USRRetrainingSupport = false;
4526 		}
4527 	}
4528 
4529 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4530 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4531 				UseMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4532 				UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4533 				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4534 			if (PrefetchMode > 0) {
4535 				DRAMClockChangeSupportNumber = 2;
4536 			} else if (DRAMClockChangeSupportNumber == 0) {
4537 				DRAMClockChangeSupportNumber = 1;
4538 				LastSurfaceWithoutMargin = k;
4539 			} else if (DRAMClockChangeSupportNumber == 1 &&
4540 					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4541 				DRAMClockChangeSupportNumber = 2;
4542 			}
4543 		}
4544 	}
4545 
4546 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4547 		if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4548 			DRAMClockChangeMethod = 1;
4549 		else if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4550 			DRAMClockChangeMethod = 2;
4551 	}
4552 
4553 	if (DRAMClockChangeMethod == 0) {
4554 		if (DRAMClockChangeSupportNumber == 0)
4555 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4556 		else if (DRAMClockChangeSupportNumber == 1)
4557 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4558 		else
4559 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4560 	} else if (DRAMClockChangeMethod == 1) {
4561 		if (DRAMClockChangeSupportNumber == 0)
4562 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4563 		else if (DRAMClockChangeSupportNumber == 1)
4564 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4565 		else
4566 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4567 	} else {
4568 		if (DRAMClockChangeSupportNumber == 0)
4569 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4570 		else if (DRAMClockChangeSupportNumber == 1)
4571 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4572 		else
4573 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4574 	}
4575 
4576 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4577 		unsigned int dst_y_pstate;
4578 		unsigned int src_y_pstate_l;
4579 		unsigned int src_y_pstate_c;
4580 		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4581 
4582 		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (HTotal[k] / PixelClock[k]), 1);
4583 		src_y_pstate_l = dml_ceil(dst_y_pstate * VRatio[k], SwathHeightY[k]);
4584 		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4585 		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + meta_row_height[k];
4586 
4587 #ifdef __DML_VBA_DEBUG__
4588 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4589 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4590 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4591 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4592 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4593 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4594 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4595 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4596 dml_print("DML::%s: k=%d, meta_row_height   = %d\n", __func__, k, meta_row_height[k]);
4597 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4598 #endif
4599 		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4600 
4601 		if (BytePerPixelDETC[k] > 0) {
4602 			src_y_pstate_c = dml_ceil(dst_y_pstate * VRatioChroma[k], SwathHeightC[k]);
4603 			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4604 			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + meta_row_height_chroma[k];
4605 			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4606 
4607 #ifdef __DML_VBA_DEBUG__
4608 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4609 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4610 dml_print("DML::%s: k=%d, meta_row_height_chroma    = %d\n", __func__, k, meta_row_height_chroma[k]);
4611 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4612 #endif
4613 		}
4614 	}
4615 #ifdef __DML_VBA_DEBUG__
4616 	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4617 	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4618 	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4619 			__func__, *MinActiveFCLKChangeLatencySupported);
4620 	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4621 #endif
4622 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4623 
4624 double dml32_CalculateWriteBackDISPCLK(
4625 		enum source_format_class WritebackPixelFormat,
4626 		double PixelClock,
4627 		double WritebackHRatio,
4628 		double WritebackVRatio,
4629 		unsigned int WritebackHTaps,
4630 		unsigned int WritebackVTaps,
4631 		unsigned int   WritebackSourceWidth,
4632 		unsigned int   WritebackDestinationWidth,
4633 		unsigned int HTotal,
4634 		unsigned int WritebackLineBufferSize,
4635 		double DISPCLKDPPCLKVCOSpeed)
4636 {
4637 	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4638 
4639 	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4640 	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4641 	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4642 			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4643 	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4644 }
4645 
4646 void dml32_CalculateMinAndMaxPrefetchMode(
4647 		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4648 		unsigned int             *MinPrefetchMode,
4649 		unsigned int             *MaxPrefetchMode)
4650 {
4651 	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4652 		*MinPrefetchMode = 3;
4653 		*MaxPrefetchMode = 3;
4654 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4655 		*MinPrefetchMode = 2;
4656 		*MaxPrefetchMode = 2;
4657 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4658 		*MinPrefetchMode = 1;
4659 		*MaxPrefetchMode = 1;
4660 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4661 		*MinPrefetchMode = 0;
4662 		*MaxPrefetchMode = 0;
4663 	} else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4664 			dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4665 		*MinPrefetchMode = 0;
4666 		*MaxPrefetchMode = 3;
4667 	} else {
4668 		*MinPrefetchMode = 0;
4669 		*MaxPrefetchMode = 3;
4670 	}
4671 } // CalculateMinAndMaxPrefetchMode
4672 
4673 void dml32_CalculatePixelDeliveryTimes(
4674 		unsigned int             NumberOfActiveSurfaces,
4675 		double              VRatio[],
4676 		double              VRatioChroma[],
4677 		double              VRatioPrefetchY[],
4678 		double              VRatioPrefetchC[],
4679 		unsigned int             swath_width_luma_ub[],
4680 		unsigned int             swath_width_chroma_ub[],
4681 		unsigned int             DPPPerSurface[],
4682 		double              HRatio[],
4683 		double              HRatioChroma[],
4684 		double              PixelClock[],
4685 		double              PSCL_THROUGHPUT[],
4686 		double              PSCL_THROUGHPUT_CHROMA[],
4687 		double              Dppclk[],
4688 		unsigned int             BytePerPixelC[],
4689 		enum dm_rotation_angle   SourceRotation[],
4690 		unsigned int             NumberOfCursors[],
4691 		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4692 		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4693 		unsigned int             BlockWidth256BytesY[],
4694 		unsigned int             BlockHeight256BytesY[],
4695 		unsigned int             BlockWidth256BytesC[],
4696 		unsigned int             BlockHeight256BytesC[],
4697 
4698 		/* Output */
4699 		double              DisplayPipeLineDeliveryTimeLuma[],
4700 		double              DisplayPipeLineDeliveryTimeChroma[],
4701 		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4702 		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4703 		double              DisplayPipeRequestDeliveryTimeLuma[],
4704 		double              DisplayPipeRequestDeliveryTimeChroma[],
4705 		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4706 		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4707 		double              CursorRequestDeliveryTime[],
4708 		double              CursorRequestDeliveryTimePrefetch[])
4709 {
4710 	double   req_per_swath_ub;
4711 	unsigned int k;
4712 
4713 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4714 
4715 #ifdef __DML_VBA_DEBUG__
4716 		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4717 		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4718 		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4719 		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4720 		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4721 		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4722 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4723 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4724 		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4725 		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4726 		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4727 #endif
4728 
4729 		if (VRatio[k] <= 1) {
4730 			DisplayPipeLineDeliveryTimeLuma[k] =
4731 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4732 		} else {
4733 			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4734 		}
4735 
4736 		if (BytePerPixelC[k] == 0) {
4737 			DisplayPipeLineDeliveryTimeChroma[k] = 0;
4738 		} else {
4739 			if (VRatioChroma[k] <= 1) {
4740 				DisplayPipeLineDeliveryTimeChroma[k] =
4741 					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4742 			} else {
4743 				DisplayPipeLineDeliveryTimeChroma[k] =
4744 					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4745 			}
4746 		}
4747 
4748 		if (VRatioPrefetchY[k] <= 1) {
4749 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4750 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4751 		} else {
4752 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4753 					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4754 		}
4755 
4756 		if (BytePerPixelC[k] == 0) {
4757 			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4758 		} else {
4759 			if (VRatioPrefetchC[k] <= 1) {
4760 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4761 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4762 			} else {
4763 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4764 						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4765 			}
4766 		}
4767 #ifdef __DML_VBA_DEBUG__
4768 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4769 				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4770 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4771 				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4772 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4773 				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4774 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4775 				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4776 #endif
4777 	}
4778 
4779 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4780 		if (!IsVertical(SourceRotation[k]))
4781 			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4782 		else
4783 			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4784 #ifdef __DML_VBA_DEBUG__
4785 		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4786 #endif
4787 
4788 		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4789 		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4790 				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4791 		if (BytePerPixelC[k] == 0) {
4792 			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4793 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4794 		} else {
4795 			if (!IsVertical(SourceRotation[k]))
4796 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4797 			else
4798 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4799 #ifdef __DML_VBA_DEBUG__
4800 			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4801 #endif
4802 			DisplayPipeRequestDeliveryTimeChroma[k] =
4803 					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4804 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4805 					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4806 		}
4807 #ifdef __DML_VBA_DEBUG__
4808 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4809 				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4810 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4811 				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4812 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4813 				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4814 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4815 				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4816 #endif
4817 	}
4818 
4819 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4820 		unsigned int cursor_req_per_width;
4821 
4822 		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4823 				256.0 / 8.0, 1.0);
4824 		if (NumberOfCursors[k] > 0) {
4825 			if (VRatio[k] <= 1) {
4826 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4827 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4828 			} else {
4829 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4830 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4831 			}
4832 			if (VRatioPrefetchY[k] <= 1) {
4833 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4834 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4835 			} else {
4836 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4837 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4838 			}
4839 		} else {
4840 			CursorRequestDeliveryTime[k] = 0;
4841 			CursorRequestDeliveryTimePrefetch[k] = 0;
4842 		}
4843 #ifdef __DML_VBA_DEBUG__
4844 		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4845 				__func__, k, NumberOfCursors[k]);
4846 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4847 				__func__, k, CursorRequestDeliveryTime[k]);
4848 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4849 				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
4850 #endif
4851 	}
4852 } // CalculatePixelDeliveryTimes
4853 
4854 void dml32_CalculateMetaAndPTETimes(
4855 		bool use_one_row_for_frame[],
4856 		unsigned int NumberOfActiveSurfaces,
4857 		bool GPUVMEnable,
4858 		unsigned int MetaChunkSize,
4859 		unsigned int MinMetaChunkSizeBytes,
4860 		unsigned int    HTotal[],
4861 		double  VRatio[],
4862 		double  VRatioChroma[],
4863 		double  DestinationLinesToRequestRowInVBlank[],
4864 		double  DestinationLinesToRequestRowInImmediateFlip[],
4865 		bool DCCEnable[],
4866 		double  PixelClock[],
4867 		unsigned int BytePerPixelY[],
4868 		unsigned int BytePerPixelC[],
4869 		enum dm_rotation_angle SourceRotation[],
4870 		unsigned int dpte_row_height[],
4871 		unsigned int dpte_row_height_chroma[],
4872 		unsigned int meta_row_width[],
4873 		unsigned int meta_row_width_chroma[],
4874 		unsigned int meta_row_height[],
4875 		unsigned int meta_row_height_chroma[],
4876 		unsigned int meta_req_width[],
4877 		unsigned int meta_req_width_chroma[],
4878 		unsigned int meta_req_height[],
4879 		unsigned int meta_req_height_chroma[],
4880 		unsigned int dpte_group_bytes[],
4881 		unsigned int    PTERequestSizeY[],
4882 		unsigned int    PTERequestSizeC[],
4883 		unsigned int    PixelPTEReqWidthY[],
4884 		unsigned int    PixelPTEReqHeightY[],
4885 		unsigned int    PixelPTEReqWidthC[],
4886 		unsigned int    PixelPTEReqHeightC[],
4887 		unsigned int    dpte_row_width_luma_ub[],
4888 		unsigned int    dpte_row_width_chroma_ub[],
4889 
4890 		/* Output */
4891 		double DST_Y_PER_PTE_ROW_NOM_L[],
4892 		double DST_Y_PER_PTE_ROW_NOM_C[],
4893 		double DST_Y_PER_META_ROW_NOM_L[],
4894 		double DST_Y_PER_META_ROW_NOM_C[],
4895 		double TimePerMetaChunkNominal[],
4896 		double TimePerChromaMetaChunkNominal[],
4897 		double TimePerMetaChunkVBlank[],
4898 		double TimePerChromaMetaChunkVBlank[],
4899 		double TimePerMetaChunkFlip[],
4900 		double TimePerChromaMetaChunkFlip[],
4901 		double time_per_pte_group_nom_luma[],
4902 		double time_per_pte_group_vblank_luma[],
4903 		double time_per_pte_group_flip_luma[],
4904 		double time_per_pte_group_nom_chroma[],
4905 		double time_per_pte_group_vblank_chroma[],
4906 		double time_per_pte_group_flip_chroma[])
4907 {
4908 	unsigned int   meta_chunk_width;
4909 	unsigned int   min_meta_chunk_width;
4910 	unsigned int   meta_chunk_per_row_int;
4911 	unsigned int   meta_row_remainder;
4912 	unsigned int   meta_chunk_threshold;
4913 	unsigned int   meta_chunks_per_row_ub;
4914 	unsigned int   meta_chunk_width_chroma;
4915 	unsigned int   min_meta_chunk_width_chroma;
4916 	unsigned int   meta_chunk_per_row_int_chroma;
4917 	unsigned int   meta_row_remainder_chroma;
4918 	unsigned int   meta_chunk_threshold_chroma;
4919 	unsigned int   meta_chunks_per_row_ub_chroma;
4920 	unsigned int   dpte_group_width_luma;
4921 	unsigned int   dpte_groups_per_row_luma_ub;
4922 	unsigned int   dpte_group_width_chroma;
4923 	unsigned int   dpte_groups_per_row_chroma_ub;
4924 	unsigned int k;
4925 
4926 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4927 		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4928 		if (BytePerPixelC[k] == 0)
4929 			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4930 		else
4931 			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4932 		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4933 		if (BytePerPixelC[k] == 0)
4934 			DST_Y_PER_META_ROW_NOM_C[k] = 0;
4935 		else
4936 			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4937 	}
4938 
4939 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4940 		if (DCCEnable[k] == true) {
4941 			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4942 			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4943 			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4944 			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4945 			if (!IsVertical(SourceRotation[k]))
4946 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4947 			else
4948 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4949 
4950 			if (meta_row_remainder <= meta_chunk_threshold)
4951 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4952 			else
4953 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4954 
4955 			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4956 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4957 			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4958 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4959 			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4960 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4961 			if (BytePerPixelC[k] == 0) {
4962 				TimePerChromaMetaChunkNominal[k] = 0;
4963 				TimePerChromaMetaChunkVBlank[k] = 0;
4964 				TimePerChromaMetaChunkFlip[k] = 0;
4965 			} else {
4966 				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4967 						meta_row_height_chroma[k];
4968 				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4969 						meta_row_height_chroma[k];
4970 				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4971 						meta_chunk_width_chroma;
4972 				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4973 				if (!IsVertical(SourceRotation[k])) {
4974 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4975 							meta_req_width_chroma[k];
4976 				} else {
4977 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4978 							meta_req_height_chroma[k];
4979 				}
4980 				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4981 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4982 				else
4983 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4984 
4985 				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4986 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4987 				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4988 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4989 				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4990 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4991 			}
4992 		} else {
4993 			TimePerMetaChunkNominal[k] = 0;
4994 			TimePerMetaChunkVBlank[k] = 0;
4995 			TimePerMetaChunkFlip[k] = 0;
4996 			TimePerChromaMetaChunkNominal[k] = 0;
4997 			TimePerChromaMetaChunkVBlank[k] = 0;
4998 			TimePerChromaMetaChunkFlip[k] = 0;
4999 		}
5000 	}
5001 
5002 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5003 		if (GPUVMEnable == true) {
5004 			if (!IsVertical(SourceRotation[k])) {
5005 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5006 						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5007 			} else {
5008 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5009 						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5010 			}
5011 
5012 			if (use_one_row_for_frame[k]) {
5013 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5014 						(double) dpte_group_width_luma / 2.0, 1.0);
5015 			} else {
5016 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5017 						(double) dpte_group_width_luma, 1.0);
5018 			}
5019 #ifdef __DML_VBA_DEBUG__
5020 			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5021 					__func__, k, use_one_row_for_frame[k]);
5022 			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5023 					__func__, k, dpte_group_bytes[k]);
5024 			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5025 					__func__, k, PTERequestSizeY[k]);
5026 			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5027 					__func__, k, PixelPTEReqWidthY[k]);
5028 			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5029 					__func__, k, PixelPTEReqHeightY[k]);
5030 			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5031 					__func__, k, dpte_row_width_luma_ub[k]);
5032 			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5033 					__func__, k, dpte_group_width_luma);
5034 			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5035 					__func__, k, dpte_groups_per_row_luma_ub);
5036 #endif
5037 
5038 			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5039 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5040 			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5041 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5042 			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5043 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5044 			if (BytePerPixelC[k] == 0) {
5045 				time_per_pte_group_nom_chroma[k] = 0;
5046 				time_per_pte_group_vblank_chroma[k] = 0;
5047 				time_per_pte_group_flip_chroma[k] = 0;
5048 			} else {
5049 				if (!IsVertical(SourceRotation[k])) {
5050 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5051 							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5052 				} else {
5053 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5054 							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5055 				}
5056 
5057 				if (use_one_row_for_frame[k]) {
5058 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5059 							(double) dpte_group_width_chroma / 2.0, 1.0);
5060 				} else {
5061 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5062 							(double) dpte_group_width_chroma, 1.0);
5063 				}
5064 #ifdef __DML_VBA_DEBUG__
5065 				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5066 						__func__, k, dpte_row_width_chroma_ub[k]);
5067 				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5068 						__func__, k, dpte_group_width_chroma);
5069 				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5070 						__func__, k, dpte_groups_per_row_chroma_ub);
5071 #endif
5072 				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5073 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5074 				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5075 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5076 				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5077 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5078 			}
5079 		} else {
5080 			time_per_pte_group_nom_luma[k] = 0;
5081 			time_per_pte_group_vblank_luma[k] = 0;
5082 			time_per_pte_group_flip_luma[k] = 0;
5083 			time_per_pte_group_nom_chroma[k] = 0;
5084 			time_per_pte_group_vblank_chroma[k] = 0;
5085 			time_per_pte_group_flip_chroma[k] = 0;
5086 		}
5087 #ifdef __DML_VBA_DEBUG__
5088 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5089 				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
5090 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5091 				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5092 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5093 				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5094 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5095 				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5096 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5097 				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5098 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5099 				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5100 		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5101 				__func__, k, TimePerMetaChunkNominal[k]);
5102 		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5103 				__func__, k, TimePerMetaChunkVBlank[k]);
5104 		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5105 				__func__, k, TimePerMetaChunkFlip[k]);
5106 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5107 				__func__, k, TimePerChromaMetaChunkNominal[k]);
5108 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5109 				__func__, k, TimePerChromaMetaChunkVBlank[k]);
5110 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5111 				__func__, k, TimePerChromaMetaChunkFlip[k]);
5112 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5113 				__func__, k, time_per_pte_group_nom_luma[k]);
5114 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5115 				__func__, k, time_per_pte_group_vblank_luma[k]);
5116 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5117 				__func__, k, time_per_pte_group_flip_luma[k]);
5118 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5119 				__func__, k, time_per_pte_group_nom_chroma[k]);
5120 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5121 				__func__, k, time_per_pte_group_vblank_chroma[k]);
5122 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5123 				__func__, k, time_per_pte_group_flip_chroma[k]);
5124 #endif
5125 	}
5126 } // CalculateMetaAndPTETimes
5127 
5128 void dml32_CalculateVMGroupAndRequestTimes(
5129 		unsigned int     NumberOfActiveSurfaces,
5130 		bool     GPUVMEnable,
5131 		unsigned int     GPUVMMaxPageTableLevels,
5132 		unsigned int     HTotal[],
5133 		unsigned int     BytePerPixelC[],
5134 		double      DestinationLinesToRequestVMInVBlank[],
5135 		double      DestinationLinesToRequestVMInImmediateFlip[],
5136 		bool     DCCEnable[],
5137 		double      PixelClock[],
5138 		unsigned int        dpte_row_width_luma_ub[],
5139 		unsigned int        dpte_row_width_chroma_ub[],
5140 		unsigned int     vm_group_bytes[],
5141 		unsigned int     dpde0_bytes_per_frame_ub_l[],
5142 		unsigned int     dpde0_bytes_per_frame_ub_c[],
5143 		unsigned int        meta_pte_bytes_per_frame_ub_l[],
5144 		unsigned int        meta_pte_bytes_per_frame_ub_c[],
5145 
5146 		/* Output */
5147 		double      TimePerVMGroupVBlank[],
5148 		double      TimePerVMGroupFlip[],
5149 		double      TimePerVMRequestVBlank[],
5150 		double      TimePerVMRequestFlip[])
5151 {
5152 	unsigned int k;
5153 	unsigned int   num_group_per_lower_vm_stage;
5154 	unsigned int   num_req_per_lower_vm_stage;
5155 
5156 #ifdef __DML_VBA_DEBUG__
5157 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5158 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5159 #endif
5160 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5161 
5162 #ifdef __DML_VBA_DEBUG__
5163 		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5164 		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5165 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5166 				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
5167 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5168 				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
5169 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5170 				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5171 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5172 				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5173 #endif
5174 
5175 		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5176 			if (DCCEnable[k] == false) {
5177 				if (BytePerPixelC[k] > 0) {
5178 					num_group_per_lower_vm_stage = dml_ceil(
5179 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5180 							(double) (vm_group_bytes[k]), 1.0) +
5181 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5182 							(double) (vm_group_bytes[k]), 1.0);
5183 				} else {
5184 					num_group_per_lower_vm_stage = dml_ceil(
5185 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5186 							(double) (vm_group_bytes[k]), 1.0);
5187 				}
5188 			} else {
5189 				if (GPUVMMaxPageTableLevels == 1) {
5190 					if (BytePerPixelC[k] > 0) {
5191 						num_group_per_lower_vm_stage = dml_ceil(
5192 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5193 							(double) (vm_group_bytes[k]), 1.0) +
5194 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5195 							(double) (vm_group_bytes[k]), 1.0);
5196 					} else {
5197 						num_group_per_lower_vm_stage = dml_ceil(
5198 								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5199 								(double) (vm_group_bytes[k]), 1.0);
5200 					}
5201 				} else {
5202 					if (BytePerPixelC[k] > 0) {
5203 						num_group_per_lower_vm_stage = 2 + dml_ceil(
5204 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5205 							(double) (vm_group_bytes[k]), 1) +
5206 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5207 							(double) (vm_group_bytes[k]), 1) +
5208 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5209 							(double) (vm_group_bytes[k]), 1) +
5210 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5211 							(double) (vm_group_bytes[k]), 1);
5212 					} else {
5213 						num_group_per_lower_vm_stage = 1 + dml_ceil(
5214 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5215 							(double) (vm_group_bytes[k]), 1) + dml_ceil(
5216 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5217 							(double) (vm_group_bytes[k]), 1);
5218 					}
5219 				}
5220 			}
5221 
5222 			if (DCCEnable[k] == false) {
5223 				if (BytePerPixelC[k] > 0) {
5224 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5225 							dpde0_bytes_per_frame_ub_c[k] / 64;
5226 				} else {
5227 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5228 				}
5229 			} else {
5230 				if (GPUVMMaxPageTableLevels == 1) {
5231 					if (BytePerPixelC[k] > 0) {
5232 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5233 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5234 					} else {
5235 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5236 					}
5237 				} else {
5238 					if (BytePerPixelC[k] > 0) {
5239 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5240 								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5241 								meta_pte_bytes_per_frame_ub_l[k] / 64 +
5242 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5243 					} else {
5244 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5245 								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5246 					}
5247 				}
5248 			}
5249 
5250 			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5251 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5252 			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5253 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5254 			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5255 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5256 			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5257 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5258 
5259 			if (GPUVMMaxPageTableLevels > 2) {
5260 				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5261 				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5262 				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5263 				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5264 			}
5265 
5266 		} else {
5267 			TimePerVMGroupVBlank[k] = 0;
5268 			TimePerVMGroupFlip[k] = 0;
5269 			TimePerVMRequestVBlank[k] = 0;
5270 			TimePerVMRequestFlip[k] = 0;
5271 		}
5272 
5273 #ifdef __DML_VBA_DEBUG__
5274 		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5275 		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5276 		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5277 		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5278 #endif
5279 	}
5280 } // CalculateVMGroupAndRequestTimes
5281 
5282 void dml32_CalculateDCCConfiguration(
5283 		bool             DCCEnabled,
5284 		bool             DCCProgrammingAssumesScanDirectionUnknown,
5285 		enum source_format_class SourcePixelFormat,
5286 		unsigned int             SurfaceWidthLuma,
5287 		unsigned int             SurfaceWidthChroma,
5288 		unsigned int             SurfaceHeightLuma,
5289 		unsigned int             SurfaceHeightChroma,
5290 		unsigned int                nomDETInKByte,
5291 		unsigned int             RequestHeight256ByteLuma,
5292 		unsigned int             RequestHeight256ByteChroma,
5293 		enum dm_swizzle_mode     TilingFormat,
5294 		unsigned int             BytePerPixelY,
5295 		unsigned int             BytePerPixelC,
5296 		double              BytePerPixelDETY,
5297 		double              BytePerPixelDETC,
5298 		enum dm_rotation_angle   SourceRotation,
5299 		/* Output */
5300 		unsigned int        *MaxUncompressedBlockLuma,
5301 		unsigned int        *MaxUncompressedBlockChroma,
5302 		unsigned int        *MaxCompressedBlockLuma,
5303 		unsigned int        *MaxCompressedBlockChroma,
5304 		unsigned int        *IndependentBlockLuma,
5305 		unsigned int        *IndependentBlockChroma)
5306 {
5307 	typedef enum {
5308 		REQ_256Bytes,
5309 		REQ_128BytesNonContiguous,
5310 		REQ_128BytesContiguous,
5311 		REQ_NA
5312 	} RequestType;
5313 
5314 	RequestType   RequestLuma;
5315 	RequestType   RequestChroma;
5316 
5317 	unsigned int   segment_order_horz_contiguous_luma;
5318 	unsigned int   segment_order_horz_contiguous_chroma;
5319 	unsigned int   segment_order_vert_contiguous_luma;
5320 	unsigned int   segment_order_vert_contiguous_chroma;
5321 	unsigned int req128_horz_wc_l;
5322 	unsigned int req128_horz_wc_c;
5323 	unsigned int req128_vert_wc_l;
5324 	unsigned int req128_vert_wc_c;
5325 	unsigned int MAS_vp_horz_limit;
5326 	unsigned int MAS_vp_vert_limit;
5327 	unsigned int max_vp_horz_width;
5328 	unsigned int max_vp_vert_height;
5329 	unsigned int eff_surf_width_l;
5330 	unsigned int eff_surf_width_c;
5331 	unsigned int eff_surf_height_l;
5332 	unsigned int eff_surf_height_c;
5333 	unsigned int full_swath_bytes_horz_wc_l;
5334 	unsigned int full_swath_bytes_horz_wc_c;
5335 	unsigned int full_swath_bytes_vert_wc_l;
5336 	unsigned int full_swath_bytes_vert_wc_c;
5337 	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5338 
5339 	unsigned int   yuv420;
5340 	unsigned int   horz_div_l;
5341 	unsigned int   horz_div_c;
5342 	unsigned int   vert_div_l;
5343 	unsigned int   vert_div_c;
5344 
5345 	unsigned int     swath_buf_size;
5346 	double   detile_buf_vp_horz_limit;
5347 	double   detile_buf_vp_vert_limit;
5348 
5349 	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5350 			SourcePixelFormat == dm_420_12) ? 1 : 0);
5351 	horz_div_l = 1;
5352 	horz_div_c = 1;
5353 	vert_div_l = 1;
5354 	vert_div_c = 1;
5355 
5356 	if (BytePerPixelY == 1)
5357 		vert_div_l = 0;
5358 	if (BytePerPixelC == 1)
5359 		vert_div_c = 0;
5360 
5361 	if (BytePerPixelC == 0) {
5362 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5363 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5364 				BytePerPixelY / (1 + horz_div_l));
5365 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5366 				(1 + vert_div_l));
5367 	} else {
5368 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5369 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5370 				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5371 				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5372 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5373 				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5374 				(1 + vert_div_c) / (1 + yuv420));
5375 	}
5376 
5377 	if (SourcePixelFormat == dm_420_10) {
5378 		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5379 		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5380 	}
5381 
5382 	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5383 	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5384 
5385 	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5386 	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5387 	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5388 	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5389 	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5390 	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5391 	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5392 	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5393 
5394 	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5395 	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5396 	if (BytePerPixelC > 0) {
5397 		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5398 		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5399 	} else {
5400 		full_swath_bytes_horz_wc_c = 0;
5401 		full_swath_bytes_vert_wc_c = 0;
5402 	}
5403 
5404 	if (SourcePixelFormat == dm_420_10) {
5405 		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5406 		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5407 		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5408 		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5409 	}
5410 
5411 	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5412 		req128_horz_wc_l = 0;
5413 		req128_horz_wc_c = 0;
5414 	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5415 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5416 		req128_horz_wc_l = 0;
5417 		req128_horz_wc_c = 1;
5418 	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5419 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5420 		req128_horz_wc_l = 1;
5421 		req128_horz_wc_c = 0;
5422 	} else {
5423 		req128_horz_wc_l = 1;
5424 		req128_horz_wc_c = 1;
5425 	}
5426 
5427 	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5428 		req128_vert_wc_l = 0;
5429 		req128_vert_wc_c = 0;
5430 	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5431 			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5432 		req128_vert_wc_l = 0;
5433 		req128_vert_wc_c = 1;
5434 	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5435 			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5436 		req128_vert_wc_l = 1;
5437 		req128_vert_wc_c = 0;
5438 	} else {
5439 		req128_vert_wc_l = 1;
5440 		req128_vert_wc_c = 1;
5441 	}
5442 
5443 	if (BytePerPixelY == 2) {
5444 		segment_order_horz_contiguous_luma = 0;
5445 		segment_order_vert_contiguous_luma = 1;
5446 	} else {
5447 		segment_order_horz_contiguous_luma = 1;
5448 		segment_order_vert_contiguous_luma = 0;
5449 	}
5450 
5451 	if (BytePerPixelC == 2) {
5452 		segment_order_horz_contiguous_chroma = 0;
5453 		segment_order_vert_contiguous_chroma = 1;
5454 	} else {
5455 		segment_order_horz_contiguous_chroma = 1;
5456 		segment_order_vert_contiguous_chroma = 0;
5457 	}
5458 #ifdef __DML_VBA_DEBUG__
5459 	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5460 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5461 	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5462 	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5463 	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5464 	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5465 	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5466 	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5467 	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5468 			__func__, segment_order_horz_contiguous_chroma);
5469 #endif
5470 
5471 	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5472 		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5473 			RequestLuma = REQ_256Bytes;
5474 		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5475 				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5476 			RequestLuma = REQ_128BytesNonContiguous;
5477 		else
5478 			RequestLuma = REQ_128BytesContiguous;
5479 
5480 		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5481 			RequestChroma = REQ_256Bytes;
5482 		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5483 				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5484 			RequestChroma = REQ_128BytesNonContiguous;
5485 		else
5486 			RequestChroma = REQ_128BytesContiguous;
5487 
5488 	} else if (!IsVertical(SourceRotation)) {
5489 		if (req128_horz_wc_l == 0)
5490 			RequestLuma = REQ_256Bytes;
5491 		else if (segment_order_horz_contiguous_luma == 0)
5492 			RequestLuma = REQ_128BytesNonContiguous;
5493 		else
5494 			RequestLuma = REQ_128BytesContiguous;
5495 
5496 		if (req128_horz_wc_c == 0)
5497 			RequestChroma = REQ_256Bytes;
5498 		else if (segment_order_horz_contiguous_chroma == 0)
5499 			RequestChroma = REQ_128BytesNonContiguous;
5500 		else
5501 			RequestChroma = REQ_128BytesContiguous;
5502 
5503 	} else {
5504 		if (req128_vert_wc_l == 0)
5505 			RequestLuma = REQ_256Bytes;
5506 		else if (segment_order_vert_contiguous_luma == 0)
5507 			RequestLuma = REQ_128BytesNonContiguous;
5508 		else
5509 			RequestLuma = REQ_128BytesContiguous;
5510 
5511 		if (req128_vert_wc_c == 0)
5512 			RequestChroma = REQ_256Bytes;
5513 		else if (segment_order_vert_contiguous_chroma == 0)
5514 			RequestChroma = REQ_128BytesNonContiguous;
5515 		else
5516 			RequestChroma = REQ_128BytesContiguous;
5517 	}
5518 
5519 	if (RequestLuma == REQ_256Bytes) {
5520 		*MaxUncompressedBlockLuma = 256;
5521 		*MaxCompressedBlockLuma = 256;
5522 		*IndependentBlockLuma = 0;
5523 	} else if (RequestLuma == REQ_128BytesContiguous) {
5524 		*MaxUncompressedBlockLuma = 256;
5525 		*MaxCompressedBlockLuma = 128;
5526 		*IndependentBlockLuma = 128;
5527 	} else {
5528 		*MaxUncompressedBlockLuma = 256;
5529 		*MaxCompressedBlockLuma = 64;
5530 		*IndependentBlockLuma = 64;
5531 	}
5532 
5533 	if (RequestChroma == REQ_256Bytes) {
5534 		*MaxUncompressedBlockChroma = 256;
5535 		*MaxCompressedBlockChroma = 256;
5536 		*IndependentBlockChroma = 0;
5537 	} else if (RequestChroma == REQ_128BytesContiguous) {
5538 		*MaxUncompressedBlockChroma = 256;
5539 		*MaxCompressedBlockChroma = 128;
5540 		*IndependentBlockChroma = 128;
5541 	} else {
5542 		*MaxUncompressedBlockChroma = 256;
5543 		*MaxCompressedBlockChroma = 64;
5544 		*IndependentBlockChroma = 64;
5545 	}
5546 
5547 	if (DCCEnabled != true || BytePerPixelC == 0) {
5548 		*MaxUncompressedBlockChroma = 0;
5549 		*MaxCompressedBlockChroma = 0;
5550 		*IndependentBlockChroma = 0;
5551 	}
5552 
5553 	if (DCCEnabled != true) {
5554 		*MaxUncompressedBlockLuma = 0;
5555 		*MaxCompressedBlockLuma = 0;
5556 		*IndependentBlockLuma = 0;
5557 	}
5558 
5559 #ifdef __DML_VBA_DEBUG__
5560 	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5561 	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5562 	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5563 	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5564 	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5565 	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5566 #endif
5567 
5568 } // CalculateDCCConfiguration
5569 
5570 void dml32_CalculateStutterEfficiency(
5571 		unsigned int      CompressedBufferSizeInkByte,
5572 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5573 		bool   UnboundedRequestEnabled,
5574 		unsigned int      MetaFIFOSizeInKEntries,
5575 		unsigned int      ZeroSizeBufferEntries,
5576 		unsigned int      PixelChunkSizeInKByte,
5577 		unsigned int   NumberOfActiveSurfaces,
5578 		unsigned int      ROBBufferSizeInKByte,
5579 		double    TotalDataReadBandwidth,
5580 		double    DCFCLK,
5581 		double    ReturnBW,
5582 		unsigned int      CompbufReservedSpace64B,
5583 		unsigned int      CompbufReservedSpaceZs,
5584 		double    SRExitTime,
5585 		double    SRExitZ8Time,
5586 		bool   SynchronizeTimingsFinal,
5587 		unsigned int   BlendingAndTiming[],
5588 		double    StutterEnterPlusExitWatermark,
5589 		double    Z8StutterEnterPlusExitWatermark,
5590 		bool   ProgressiveToInterlaceUnitInOPP,
5591 		bool   Interlace[],
5592 		double    MinTTUVBlank[],
5593 		unsigned int   DPPPerSurface[],
5594 		unsigned int      DETBufferSizeY[],
5595 		unsigned int   BytePerPixelY[],
5596 		double    BytePerPixelDETY[],
5597 		double      SwathWidthY[],
5598 		unsigned int   SwathHeightY[],
5599 		unsigned int   SwathHeightC[],
5600 		double    NetDCCRateLuma[],
5601 		double    NetDCCRateChroma[],
5602 		double    DCCFractionOfZeroSizeRequestsLuma[],
5603 		double    DCCFractionOfZeroSizeRequestsChroma[],
5604 		unsigned int      HTotal[],
5605 		unsigned int      VTotal[],
5606 		double    PixelClock[],
5607 		double    VRatio[],
5608 		enum dm_rotation_angle SourceRotation[],
5609 		unsigned int   BlockHeight256BytesY[],
5610 		unsigned int   BlockWidth256BytesY[],
5611 		unsigned int   BlockHeight256BytesC[],
5612 		unsigned int   BlockWidth256BytesC[],
5613 		unsigned int   DCCYMaxUncompressedBlock[],
5614 		unsigned int   DCCCMaxUncompressedBlock[],
5615 		unsigned int      VActive[],
5616 		bool   DCCEnable[],
5617 		bool   WritebackEnable[],
5618 		double    ReadBandwidthSurfaceLuma[],
5619 		double    ReadBandwidthSurfaceChroma[],
5620 		double    meta_row_bw[],
5621 		double    dpte_row_bw[],
5622 
5623 		/* Output */
5624 		double   *StutterEfficiencyNotIncludingVBlank,
5625 		double   *StutterEfficiency,
5626 		unsigned int     *NumberOfStutterBurstsPerFrame,
5627 		double   *Z8StutterEfficiencyNotIncludingVBlank,
5628 		double   *Z8StutterEfficiency,
5629 		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5630 		double   *StutterPeriod,
5631 		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5632 {
5633 
5634 	bool FoundCriticalSurface = false;
5635 	unsigned int SwathSizeCriticalSurface = 0;
5636 	unsigned int LastChunkOfSwathSize;
5637 	unsigned int MissingPartOfLastSwathOfDETSize;
5638 	double LastZ8StutterPeriod = 0.0;
5639 	double LastStutterPeriod = 0.0;
5640 	unsigned int TotalNumberOfActiveOTG = 0;
5641 	double doublePixelClock;
5642 	unsigned int doubleHTotal;
5643 	unsigned int doubleVTotal;
5644 	bool SameTiming = true;
5645 	double DETBufferingTimeY;
5646 	double SwathWidthYCriticalSurface = 0.0;
5647 	double SwathHeightYCriticalSurface = 0.0;
5648 	double VActiveTimeCriticalSurface = 0.0;
5649 	double FrameTimeCriticalSurface = 0.0;
5650 	unsigned int BytePerPixelYCriticalSurface = 0;
5651 	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5652 	unsigned int DETBufferSizeYCriticalSurface = 0;
5653 	double MinTTUVBlankCriticalSurface = 0.0;
5654 	unsigned int BlockWidth256BytesYCriticalSurface = 0;
5655 	bool doublePlaneCriticalSurface = 0;
5656 	bool doublePipeCriticalSurface = 0;
5657 	double TotalCompressedReadBandwidth;
5658 	double TotalRowReadBandwidth;
5659 	double AverageDCCCompressionRate;
5660 	double EffectiveCompressedBufferSize;
5661 	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5662 	double StutterBurstTime;
5663 	unsigned int TotalActiveWriteback;
5664 	double LinesInDETY;
5665 	double LinesInDETYRoundedDownToSwath;
5666 	double MaximumEffectiveCompressionLuma;
5667 	double MaximumEffectiveCompressionChroma;
5668 	double TotalZeroSizeRequestReadBandwidth;
5669 	double TotalZeroSizeCompressedReadBandwidth;
5670 	double AverageDCCZeroSizeFraction;
5671 	double AverageZeroSizeCompressionRate;
5672 	unsigned int k;
5673 
5674 	TotalZeroSizeRequestReadBandwidth = 0;
5675 	TotalZeroSizeCompressedReadBandwidth = 0;
5676 	TotalRowReadBandwidth = 0;
5677 	TotalCompressedReadBandwidth = 0;
5678 
5679 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5680 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5681 			if (DCCEnable[k] == true) {
5682 				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5683 						|| (!IsVertical(SourceRotation[k])
5684 								&& BlockHeight256BytesY[k] > SwathHeightY[k])
5685 						|| DCCYMaxUncompressedBlock[k] < 256) {
5686 					MaximumEffectiveCompressionLuma = 2;
5687 				} else {
5688 					MaximumEffectiveCompressionLuma = 4;
5689 				}
5690 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5691 						+ ReadBandwidthSurfaceLuma[k]
5692 								/ dml_min(NetDCCRateLuma[k],
5693 										MaximumEffectiveCompressionLuma);
5694 #ifdef __DML_VBA_DEBUG__
5695 				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5696 						__func__, k, ReadBandwidthSurfaceLuma[k]);
5697 				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5698 						__func__, k, NetDCCRateLuma[k]);
5699 				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5700 						__func__, k, MaximumEffectiveCompressionLuma);
5701 #endif
5702 				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5703 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5704 				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5705 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5706 								/ MaximumEffectiveCompressionLuma;
5707 
5708 				if (ReadBandwidthSurfaceChroma[k] > 0) {
5709 					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5710 							|| (!IsVertical(SourceRotation[k])
5711 									&& BlockHeight256BytesC[k] > SwathHeightC[k])
5712 							|| DCCCMaxUncompressedBlock[k] < 256) {
5713 						MaximumEffectiveCompressionChroma = 2;
5714 					} else {
5715 						MaximumEffectiveCompressionChroma = 4;
5716 					}
5717 					TotalCompressedReadBandwidth =
5718 							TotalCompressedReadBandwidth
5719 							+ ReadBandwidthSurfaceChroma[k]
5720 							/ dml_min(NetDCCRateChroma[k],
5721 							MaximumEffectiveCompressionChroma);
5722 #ifdef __DML_VBA_DEBUG__
5723 					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5724 							__func__, k, ReadBandwidthSurfaceChroma[k]);
5725 					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5726 							__func__, k, NetDCCRateChroma[k]);
5727 					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5728 							__func__, k, MaximumEffectiveCompressionChroma);
5729 #endif
5730 					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5731 							+ ReadBandwidthSurfaceChroma[k]
5732 									* DCCFractionOfZeroSizeRequestsChroma[k];
5733 					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5734 							+ ReadBandwidthSurfaceChroma[k]
5735 									* DCCFractionOfZeroSizeRequestsChroma[k]
5736 									/ MaximumEffectiveCompressionChroma;
5737 				}
5738 			} else {
5739 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5740 						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5741 			}
5742 			TotalRowReadBandwidth = TotalRowReadBandwidth
5743 					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5744 		}
5745 	}
5746 
5747 	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5748 	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5749 
5750 #ifdef __DML_VBA_DEBUG__
5751 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5752 	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5753 	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5754 	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5755 			__func__, TotalZeroSizeCompressedReadBandwidth);
5756 	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5757 	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5758 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5759 	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5760 	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5761 	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5762 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5763 #endif
5764 	if (AverageDCCZeroSizeFraction == 1) {
5765 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5766 				/ TotalZeroSizeCompressedReadBandwidth;
5767 		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5768 				* AverageZeroSizeCompressionRate
5769 				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5770 						* AverageZeroSizeCompressionRate;
5771 	} else if (AverageDCCZeroSizeFraction > 0) {
5772 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5773 				/ TotalZeroSizeCompressedReadBandwidth;
5774 		EffectiveCompressedBufferSize = dml_min(
5775 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5776 				(double) MetaFIFOSizeInKEntries * 1024 * 64
5777 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5778 					+ 1 / AverageDCCCompressionRate))
5779 					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5780 					* AverageDCCCompressionRate,
5781 					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5782 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5783 
5784 #ifdef __DML_VBA_DEBUG__
5785 		dml_print("DML::%s: min 1 = %f\n", __func__,
5786 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5787 		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5788 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5789 						AverageDCCCompressionRate));
5790 		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5791 				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5792 		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5793 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5794 #endif
5795 	} else {
5796 		EffectiveCompressedBufferSize = dml_min(
5797 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5798 				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5799 				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5800 						* AverageDCCCompressionRate;
5801 
5802 #ifdef __DML_VBA_DEBUG__
5803 		dml_print("DML::%s: min 1 = %f\n", __func__,
5804 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5805 		dml_print("DML::%s: min 2 = %f\n", __func__,
5806 				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5807 #endif
5808 	}
5809 
5810 #ifdef __DML_VBA_DEBUG__
5811 	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5812 	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5813 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5814 #endif
5815 
5816 	*StutterPeriod = 0;
5817 
5818 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5819 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5820 			LinesInDETY = ((double) DETBufferSizeY[k]
5821 					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5822 							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5823 					/ BytePerPixelDETY[k] / SwathWidthY[k];
5824 			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5825 			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5826 					/ VRatio[k];
5827 #ifdef __DML_VBA_DEBUG__
5828 			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5829 			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5830 			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5831 			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5832 					__func__, k, ReadBandwidthSurfaceLuma[k]);
5833 			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5834 			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5835 			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5836 					__func__, k, LinesInDETYRoundedDownToSwath);
5837 			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5838 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5839 			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5840 			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5841 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5842 #endif
5843 
5844 			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5845 				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5846 
5847 				FoundCriticalSurface = true;
5848 				*StutterPeriod = DETBufferingTimeY;
5849 				FrameTimeCriticalSurface = (
5850 						isInterlaceTiming ?
5851 								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5852 						* (double) HTotal[k] / PixelClock[k];
5853 				VActiveTimeCriticalSurface = (
5854 						isInterlaceTiming ?
5855 								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5856 						* (double) HTotal[k] / PixelClock[k];
5857 				BytePerPixelYCriticalSurface = BytePerPixelY[k];
5858 				SwathWidthYCriticalSurface = SwathWidthY[k];
5859 				SwathHeightYCriticalSurface = SwathHeightY[k];
5860 				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5861 				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5862 						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
5863 				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5864 				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5865 				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5866 				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5867 
5868 #ifdef __DML_VBA_DEBUG__
5869 				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5870 						__func__, k, FoundCriticalSurface);
5871 				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5872 						__func__, k, *StutterPeriod);
5873 				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5874 						__func__, k, MinTTUVBlankCriticalSurface);
5875 				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5876 						__func__, k, FrameTimeCriticalSurface);
5877 				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5878 						__func__, k, VActiveTimeCriticalSurface);
5879 				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5880 						__func__, k, BytePerPixelYCriticalSurface);
5881 				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5882 						__func__, k, SwathWidthYCriticalSurface);
5883 				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5884 						__func__, k, SwathHeightYCriticalSurface);
5885 				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5886 						__func__, k, BlockWidth256BytesYCriticalSurface);
5887 				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5888 						__func__, k, doublePlaneCriticalSurface);
5889 				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5890 						__func__, k, doublePipeCriticalSurface);
5891 				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5892 						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5893 #endif
5894 			}
5895 		}
5896 	}
5897 
5898 	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5899 			EffectiveCompressedBufferSize);
5900 #ifdef __DML_VBA_DEBUG__
5901 	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5902 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5903 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5904 			__func__, *StutterPeriod * TotalDataReadBandwidth);
5905 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5906 	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5907 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5908 	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5909 	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5910 	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5911 	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5912 #endif
5913 
5914 	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5915 			/ ReturnBW
5916 			+ (*StutterPeriod * TotalDataReadBandwidth
5917 					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5918 			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5919 #ifdef __DML_VBA_DEBUG__
5920 	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5921 			AverageDCCCompressionRate / ReturnBW);
5922 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5923 			__func__, (*StutterPeriod * TotalDataReadBandwidth));
5924 	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5925 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5926 	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5927 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5928 #endif
5929 	StutterBurstTime = dml_max(StutterBurstTime,
5930 			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5931 					* SwathWidthYCriticalSurface / ReturnBW);
5932 
5933 #ifdef __DML_VBA_DEBUG__
5934 	dml_print("DML::%s: Time to finish residue swath=%f\n",
5935 			__func__,
5936 			LinesToFinishSwathTransferStutterCriticalSurface *
5937 			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5938 #endif
5939 
5940 	TotalActiveWriteback = 0;
5941 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5942 		if (WritebackEnable[k])
5943 			TotalActiveWriteback = TotalActiveWriteback + 1;
5944 	}
5945 
5946 	if (TotalActiveWriteback == 0) {
5947 #ifdef __DML_VBA_DEBUG__
5948 		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5949 		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5950 		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5951 		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5952 #endif
5953 		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5954 				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5955 		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5956 				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5957 		*NumberOfStutterBurstsPerFrame = (
5958 				*StutterEfficiencyNotIncludingVBlank > 0 ?
5959 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5960 		*Z8NumberOfStutterBurstsPerFrame = (
5961 				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5962 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5963 	} else {
5964 		*StutterEfficiencyNotIncludingVBlank = 0.;
5965 		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
5966 		*NumberOfStutterBurstsPerFrame = 0;
5967 		*Z8NumberOfStutterBurstsPerFrame = 0;
5968 	}
5969 #ifdef __DML_VBA_DEBUG__
5970 	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5971 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5972 			__func__, *StutterEfficiencyNotIncludingVBlank);
5973 	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5974 			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
5975 	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5976 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5977 #endif
5978 
5979 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5980 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5981 			if (BlendingAndTiming[k] == k) {
5982 				if (TotalNumberOfActiveOTG == 0) {
5983 					doublePixelClock = PixelClock[k];
5984 					doubleHTotal = HTotal[k];
5985 					doubleVTotal = VTotal[k];
5986 				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5987 						|| doubleVTotal != VTotal[k]) {
5988 					SameTiming = false;
5989 				}
5990 				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5991 			}
5992 		}
5993 	}
5994 
5995 	if (*StutterEfficiencyNotIncludingVBlank > 0) {
5996 		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5997 
5998 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5999 				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6000 			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6001 						+ StutterBurstTime * VActiveTimeCriticalSurface
6002 						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6003 		} else {
6004 			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6005 		}
6006 	} else {
6007 		*StutterEfficiency = 0;
6008 	}
6009 
6010 	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6011 		LastZ8StutterPeriod = VActiveTimeCriticalSurface
6012 				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6013 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6014 				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6015 			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6016 				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6017 		} else {
6018 			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6019 		}
6020 	} else {
6021 		*Z8StutterEfficiency = 0.;
6022 	}
6023 
6024 #ifdef __DML_VBA_DEBUG__
6025 	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6026 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6027 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6028 	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6029 	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6030 	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6031 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6032 			__func__, *StutterEfficiencyNotIncludingVBlank);
6033 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6034 #endif
6035 
6036 	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6037 			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6038 	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6039 	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6040 			- DETBufferSizeYCriticalSurface;
6041 
6042 	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6043 			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6044 			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6045 			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6046 
6047 #ifdef __DML_VBA_DEBUG__
6048 	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6049 	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6050 	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6051 	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6052 #endif
6053 } // CalculateStutterEfficiency
6054 
6055 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6056 		unsigned int    ConfigReturnBufferSizeInKByte,
6057 		unsigned int    ROBBufferSizeInKByte,
6058 		unsigned int MaxNumDPP,
6059 		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6060 		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6061 
6062 		/* Output */
6063 		unsigned int *MaxTotalDETInKByte,
6064 		unsigned int *nomDETInKByte,
6065 		unsigned int *MinCompressedBufferSizeInKByte)
6066 {
6067 	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6068 	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6069 
6070 	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6071 			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6072 	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6073 	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6074 
6075 #ifdef __DML_VBA_DEBUG__
6076 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6077 	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6078 	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6079 	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6080 	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6081 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6082 #endif
6083 
6084 	if (det_buff_size_override_en) {
6085 		*nomDETInKByte = det_buff_size_override_val;
6086 #ifdef __DML_VBA_DEBUG__
6087 		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6088 #endif
6089 	}
6090 } // CalculateMaxDETAndMinCompressedBufferSize
6091 
6092 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6093 		double ReturnBW,
6094 		bool NotUrgentLatencyHiding[],
6095 		double ReadBandwidthLuma[],
6096 		double ReadBandwidthChroma[],
6097 		double cursor_bw[],
6098 		double meta_row_bandwidth[],
6099 		double dpte_row_bandwidth[],
6100 		unsigned int NumberOfDPP[],
6101 		double UrgentBurstFactorLuma[],
6102 		double UrgentBurstFactorChroma[],
6103 		double UrgentBurstFactorCursor[])
6104 {
6105 	unsigned int k;
6106 	bool NotEnoughUrgentLatencyHiding = false;
6107 	bool CalculateVActiveBandwithSupport_val = false;
6108 	double VActiveBandwith = 0;
6109 
6110 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6111 		if (NotUrgentLatencyHiding[k]) {
6112 			NotEnoughUrgentLatencyHiding = true;
6113 		}
6114 	}
6115 
6116 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6117 		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6118 	}
6119 
6120 	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6121 
6122 #ifdef __DML_VBA_DEBUG__
6123 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6124 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6125 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6126 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6127 #endif
6128 	return CalculateVActiveBandwithSupport_val;
6129 }
6130 
6131 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6132 		double ReturnBW,
6133 		bool NotUrgentLatencyHiding[],
6134 		double ReadBandwidthLuma[],
6135 		double ReadBandwidthChroma[],
6136 		double PrefetchBandwidthLuma[],
6137 		double PrefetchBandwidthChroma[],
6138 		double cursor_bw[],
6139 		double meta_row_bandwidth[],
6140 		double dpte_row_bandwidth[],
6141 		double cursor_bw_pre[],
6142 		double prefetch_vmrow_bw[],
6143 		unsigned int NumberOfDPP[],
6144 		double UrgentBurstFactorLuma[],
6145 		double UrgentBurstFactorChroma[],
6146 		double UrgentBurstFactorCursor[],
6147 		double UrgentBurstFactorLumaPre[],
6148 		double UrgentBurstFactorChromaPre[],
6149 		double UrgentBurstFactorCursorPre[],
6150 
6151 		/* output */
6152 		double  *PrefetchBandwidth,
6153 		double  *FractionOfUrgentBandwidth,
6154 		bool *PrefetchBandwidthSupport)
6155 {
6156 	unsigned int k;
6157 	bool NotEnoughUrgentLatencyHiding = false;
6158 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6159 		if (NotUrgentLatencyHiding[k]) {
6160 			NotEnoughUrgentLatencyHiding = true;
6161 		}
6162 	}
6163 
6164 	*PrefetchBandwidth = 0;
6165 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6166 		*PrefetchBandwidth = *PrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6167 				ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]),
6168 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6169 	}
6170 
6171 	*PrefetchBandwidthSupport = (*PrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6172 	*FractionOfUrgentBandwidth = *PrefetchBandwidth / ReturnBW;
6173 }
6174 
6175 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6176 		double ReturnBW,
6177 		double ReadBandwidthLuma[],
6178 		double ReadBandwidthChroma[],
6179 		double PrefetchBandwidthLuma[],
6180 		double PrefetchBandwidthChroma[],
6181 		double cursor_bw[],
6182 		double cursor_bw_pre[],
6183 		unsigned int NumberOfDPP[],
6184 		double UrgentBurstFactorLuma[],
6185 		double UrgentBurstFactorChroma[],
6186 		double UrgentBurstFactorCursor[],
6187 		double UrgentBurstFactorLumaPre[],
6188 		double UrgentBurstFactorChromaPre[],
6189 		double UrgentBurstFactorCursorPre[])
6190 {
6191 	unsigned int k;
6192 	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6193 
6194 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6195 		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6196 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6197 	}
6198 
6199 	return CalculateBandwidthAvailableForImmediateFlip_val;
6200 }
6201 
6202 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6203 		double ReturnBW,
6204 		enum immediate_flip_requirement ImmediateFlipRequirement[],
6205 		double final_flip_bw[],
6206 		double ReadBandwidthLuma[],
6207 		double ReadBandwidthChroma[],
6208 		double PrefetchBandwidthLuma[],
6209 		double PrefetchBandwidthChroma[],
6210 		double cursor_bw[],
6211 		double meta_row_bandwidth[],
6212 		double dpte_row_bandwidth[],
6213 		double cursor_bw_pre[],
6214 		double prefetch_vmrow_bw[],
6215 		unsigned int NumberOfDPP[],
6216 		double UrgentBurstFactorLuma[],
6217 		double UrgentBurstFactorChroma[],
6218 		double UrgentBurstFactorCursor[],
6219 		double UrgentBurstFactorLumaPre[],
6220 		double UrgentBurstFactorChromaPre[],
6221 		double UrgentBurstFactorCursorPre[],
6222 
6223 		/* output */
6224 		double  *TotalBandwidth,
6225 		double  *FractionOfUrgentBandwidth,
6226 		bool *ImmediateFlipBandwidthSupport)
6227 {
6228 	unsigned int k;
6229 	*TotalBandwidth = 0;
6230 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6231 		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6232 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6233 					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6234 					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6235 		} else {
6236 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6237 					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6238 					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6239 		}
6240 	}
6241 	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6242 	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6243 }
6244