1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29 
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31 
32 unsigned int dml32_dscceComputeDelay(
33 		unsigned int bpc,
34 		double BPP,
35 		unsigned int sliceWidth,
36 		unsigned int numSlices,
37 		enum output_format_class pixelFormat,
38 		enum output_encoder_class Output)
39 {
40 	// valid bpc         = source bits per component in the set of {8, 10, 12}
41 	// valid bpp         = increments of 1/16 of a bit
42 	//                    min = 6/7/8 in N420/N422/444, respectively
43 	//                    max = such that compression is 1:1
44 	//valid sliceWidth  = number of pixels per slice line,
45 	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 
49 	// fixed value
50 	unsigned int rcModelSize = 8192;
51 
52 	// N422/N420 operate at 2 pixels per clock
53 	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 	Delay, pixels;
55 
56 	if (pixelFormat == dm_420)
57 		pixelsPerClock = 2;
58 	else if (pixelFormat == dm_n422)
59 		pixelsPerClock = 2;
60 	// #all other modes operate at 1 pixel per clock
61 	else
62 		pixelsPerClock = 1;
63 
64 	//initial transmit delay as per PPS
65 	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66 
67 	//compute ssm delay
68 	if (bpc == 8)
69 		D = 81;
70 	else if (bpc == 10)
71 		D = 89;
72 	else
73 		D = 113;
74 
75 	//divide by pixel per cycle to compute slice width as seen by DSC
76 	w = sliceWidth / pixelsPerClock;
77 
78 	//422 mode has an additional cycle of delay
79 	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 		s = 0;
81 	else
82 		s = 1;
83 
84 	//main calculation for the dscce
85 	ix = initalXmitDelay + 45;
86 	wx = (w + 2) / 3;
87 	p = 3 * wx - w;
88 	l0 = ix / w;
89 	a = ix + p * l0;
90 	ax = (a + 2) / 3 + D + 6 + 1;
91 	L = (ax + wx - 1) / wx;
92 	if ((ix % w) == 0 && p != 0)
93 		lstall = 1;
94 	else
95 		lstall = 0;
96 	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97 
98 	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 	pixels = Delay * 3 * pixelsPerClock;
100 
101 #ifdef __DML_VBA_DEBUG__
102 	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 	dml_print("DML::%s: Output: %d\n", __func__, Output);
108 	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110 
111 	return pixels;
112 }
113 
114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 	unsigned int Delay = 0;
117 
118 	if (pixelFormat == dm_420) {
119 		//   sfr
120 		Delay = Delay + 2;
121 		//   dsccif
122 		Delay = Delay + 0;
123 		//   dscc - input deserializer
124 		Delay = Delay + 3;
125 		//   dscc gets pixels every other cycle
126 		Delay = Delay + 2;
127 		//   dscc - input cdc fifo
128 		Delay = Delay + 12;
129 		//   dscc gets pixels every other cycle
130 		Delay = Delay + 13;
131 		//   dscc - cdc uncertainty
132 		Delay = Delay + 2;
133 		//   dscc - output cdc fifo
134 		Delay = Delay + 7;
135 		//   dscc gets pixels every other cycle
136 		Delay = Delay + 3;
137 		//   dscc - cdc uncertainty
138 		Delay = Delay + 2;
139 		//   dscc - output serializer
140 		Delay = Delay + 1;
141 		//   sft
142 		Delay = Delay + 1;
143 	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 		//   sfr
145 		Delay = Delay + 2;
146 		//   dsccif
147 		Delay = Delay + 1;
148 		//   dscc - input deserializer
149 		Delay = Delay + 5;
150 		//  dscc - input cdc fifo
151 		Delay = Delay + 25;
152 		//   dscc - cdc uncertainty
153 		Delay = Delay + 2;
154 		//   dscc - output cdc fifo
155 		Delay = Delay + 10;
156 		//   dscc - cdc uncertainty
157 		Delay = Delay + 2;
158 		//   dscc - output serializer
159 		Delay = Delay + 1;
160 		//   sft
161 		Delay = Delay + 1;
162 	} else {
163 		//   sfr
164 		Delay = Delay + 2;
165 		//   dsccif
166 		Delay = Delay + 0;
167 		//   dscc - input deserializer
168 		Delay = Delay + 3;
169 		//   dscc - input cdc fifo
170 		Delay = Delay + 12;
171 		//   dscc - cdc uncertainty
172 		Delay = Delay + 2;
173 		//   dscc - output cdc fifo
174 		Delay = Delay + 7;
175 		//   dscc - output serializer
176 		Delay = Delay + 1;
177 		//   dscc - cdc uncertainty
178 		Delay = Delay + 2;
179 		//   sft
180 		Delay = Delay + 1;
181 	}
182 
183 	return Delay;
184 }
185 
186 
187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 	bool is_vert = false;
190 
191 	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 		is_vert = true;
193 	else
194 		is_vert = false;
195 	return is_vert;
196 }
197 
198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 		double HRatio,
200 		double HRatioChroma,
201 		double VRatio,
202 		double VRatioChroma,
203 		double MaxDCHUBToPSCLThroughput,
204 		double MaxPSCLToLBThroughput,
205 		double PixelClock,
206 		enum source_format_class SourcePixelFormat,
207 		unsigned int HTaps,
208 		unsigned int HTapsChroma,
209 		unsigned int VTaps,
210 		unsigned int VTapsChroma,
211 
212 		/* output */
213 		double *PSCL_THROUGHPUT,
214 		double *PSCL_THROUGHPUT_CHROMA,
215 		double *DPPCLKUsingSingleDPP)
216 {
217 	double DPPCLKUsingSingleDPPLuma;
218 	double DPPCLKUsingSingleDPPChroma;
219 
220 	if (HRatio > 1) {
221 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 				dml_ceil((double) HTaps / 6.0, 1.0));
223 	} else {
224 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 	}
226 
227 	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 			*PSCL_THROUGHPUT, 1);
229 
230 	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232 
233 	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 			SourcePixelFormat != dm_rgbe_alpha)) {
235 		*PSCL_THROUGHPUT_CHROMA = 0;
236 		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 	} else {
238 		if (HRatioChroma > 1) {
239 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 		} else {
242 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 		}
244 		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 	}
250 }
251 
252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 		enum source_format_class SourcePixelFormat,
254 		enum dm_swizzle_mode SurfaceTiling,
255 
256 		/* Output */
257 		unsigned int *BytePerPixelY,
258 		unsigned int *BytePerPixelC,
259 		double  *BytePerPixelDETY,
260 		double  *BytePerPixelDETC,
261 		unsigned int *BlockHeight256BytesY,
262 		unsigned int *BlockHeight256BytesC,
263 		unsigned int *BlockWidth256BytesY,
264 		unsigned int *BlockWidth256BytesC,
265 		unsigned int *MacroTileHeightY,
266 		unsigned int *MacroTileHeightC,
267 		unsigned int *MacroTileWidthY,
268 		unsigned int *MacroTileWidthC)
269 {
270 	if (SourcePixelFormat == dm_444_64) {
271 		*BytePerPixelDETY = 8;
272 		*BytePerPixelDETC = 0;
273 		*BytePerPixelY = 8;
274 		*BytePerPixelC = 0;
275 	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 		*BytePerPixelDETY = 4;
277 		*BytePerPixelDETC = 0;
278 		*BytePerPixelY = 4;
279 		*BytePerPixelC = 0;
280 	} else if (SourcePixelFormat == dm_444_16) {
281 		*BytePerPixelDETY = 2;
282 		*BytePerPixelDETC = 0;
283 		*BytePerPixelY = 2;
284 		*BytePerPixelC = 0;
285 	} else if (SourcePixelFormat == dm_444_8) {
286 		*BytePerPixelDETY = 1;
287 		*BytePerPixelDETC = 0;
288 		*BytePerPixelY = 1;
289 		*BytePerPixelC = 0;
290 	} else if (SourcePixelFormat == dm_rgbe_alpha) {
291 		*BytePerPixelDETY = 4;
292 		*BytePerPixelDETC = 1;
293 		*BytePerPixelY = 4;
294 		*BytePerPixelC = 1;
295 	} else if (SourcePixelFormat == dm_420_8) {
296 		*BytePerPixelDETY = 1;
297 		*BytePerPixelDETC = 2;
298 		*BytePerPixelY = 1;
299 		*BytePerPixelC = 2;
300 	} else if (SourcePixelFormat == dm_420_12) {
301 		*BytePerPixelDETY = 2;
302 		*BytePerPixelDETC = 4;
303 		*BytePerPixelY = 2;
304 		*BytePerPixelC = 4;
305 	} else {
306 		*BytePerPixelDETY = 4.0 / 3;
307 		*BytePerPixelDETC = 8.0 / 3;
308 		*BytePerPixelY = 2;
309 		*BytePerPixelC = 4;
310 	}
311 #ifdef __DML_VBA_DEBUG__
312 	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
316 	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
317 #endif
318 	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 			|| SourcePixelFormat == dm_444_16
320 			|| SourcePixelFormat == dm_444_8
321 			|| SourcePixelFormat == dm_mono_16
322 			|| SourcePixelFormat == dm_mono_8
323 			|| SourcePixelFormat == dm_rgbe)) {
324 		if (SurfaceTiling == dm_sw_linear)
325 			*BlockHeight256BytesY = 1;
326 		else if (SourcePixelFormat == dm_444_64)
327 			*BlockHeight256BytesY = 4;
328 		else if (SourcePixelFormat == dm_444_8)
329 			*BlockHeight256BytesY = 16;
330 		else
331 			*BlockHeight256BytesY = 8;
332 
333 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 		*BlockHeight256BytesC = 0;
335 		*BlockWidth256BytesC = 0;
336 	} else {
337 		if (SurfaceTiling == dm_sw_linear) {
338 			*BlockHeight256BytesY = 1;
339 			*BlockHeight256BytesC = 1;
340 		} else if (SourcePixelFormat == dm_rgbe_alpha) {
341 			*BlockHeight256BytesY = 8;
342 			*BlockHeight256BytesC = 16;
343 		} else if (SourcePixelFormat == dm_420_8) {
344 			*BlockHeight256BytesY = 16;
345 			*BlockHeight256BytesC = 8;
346 		} else {
347 			*BlockHeight256BytesY = 8;
348 			*BlockHeight256BytesC = 8;
349 		}
350 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 	}
353 #ifdef __DML_VBA_DEBUG__
354 	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
355 	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
357 	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359 
360 	if (SurfaceTiling == dm_sw_linear) {
361 		*MacroTileHeightY = *BlockHeight256BytesY;
362 		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 		*MacroTileHeightC = *BlockHeight256BytesC;
364 		if (*MacroTileHeightC == 0)
365 			*MacroTileWidthC = 0;
366 		else
367 			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 		if (*MacroTileHeightC == 0)
374 			*MacroTileWidthC = 0;
375 		else
376 			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 	} else {
378 		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 		if (*MacroTileHeightC == 0)
382 			*MacroTileWidthC = 0;
383 		else
384 			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 	}
386 
387 #ifdef __DML_VBA_DEBUG__
388 	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
389 	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
391 	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394 
395 void dml32_CalculateSwathAndDETConfiguration(
396 		unsigned int DETSizeOverride[],
397 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 		unsigned int ConfigReturnBufferSizeInKByte,
399 		unsigned int MaxTotalDETInKByte,
400 		unsigned int MinCompressedBufferSizeInKByte,
401 		double ForceSingleDPP,
402 		unsigned int NumberOfActiveSurfaces,
403 		unsigned int nomDETInKByte,
404 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 		unsigned int PixelChunkSizeKBytes,
407 		unsigned int ROBSizeKBytes,
408 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 		enum output_encoder_class Output[],
410 		double ReadBandwidthLuma[],
411 		double ReadBandwidthChroma[],
412 		double MaximumSwathWidthLuma[],
413 		double MaximumSwathWidthChroma[],
414 		enum dm_rotation_angle SourceRotation[],
415 		bool ViewportStationary[],
416 		enum source_format_class SourcePixelFormat[],
417 		enum dm_swizzle_mode SurfaceTiling[],
418 		unsigned int ViewportWidth[],
419 		unsigned int ViewportHeight[],
420 		unsigned int ViewportXStart[],
421 		unsigned int ViewportYStart[],
422 		unsigned int ViewportXStartC[],
423 		unsigned int ViewportYStartC[],
424 		unsigned int SurfaceWidthY[],
425 		unsigned int SurfaceWidthC[],
426 		unsigned int SurfaceHeightY[],
427 		unsigned int SurfaceHeightC[],
428 		unsigned int Read256BytesBlockHeightY[],
429 		unsigned int Read256BytesBlockHeightC[],
430 		unsigned int Read256BytesBlockWidthY[],
431 		unsigned int Read256BytesBlockWidthC[],
432 		enum odm_combine_mode ODMMode[],
433 		unsigned int BlendingAndTiming[],
434 		unsigned int BytePerPixY[],
435 		unsigned int BytePerPixC[],
436 		double BytePerPixDETY[],
437 		double BytePerPixDETC[],
438 		unsigned int HActive[],
439 		double HRatio[],
440 		double HRatioChroma[],
441 		unsigned int DPPPerSurface[],
442 
443 		/* Output */
444 		unsigned int swath_width_luma_ub[],
445 		unsigned int swath_width_chroma_ub[],
446 		double SwathWidth[],
447 		double SwathWidthChroma[],
448 		unsigned int SwathHeightY[],
449 		unsigned int SwathHeightC[],
450 		unsigned int DETBufferSizeInKByte[],
451 		unsigned int DETBufferSizeY[],
452 		unsigned int DETBufferSizeC[],
453 		bool *UnboundedRequestEnabled,
454 		unsigned int *CompressedBufferSizeInkByte,
455 		unsigned int *CompBufReservedSpaceKBytes,
456 		bool *CompBufReservedSpaceNeedAdjustment,
457 		bool ViewportSizeSupportPerSurface[],
458 		bool *ViewportSizeSupport)
459 {
460 	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 	unsigned int RoundedUpSwathSizeBytesY;
465 	unsigned int RoundedUpSwathSizeBytesC;
466 	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 	unsigned int k;
469 	unsigned int TotalActiveDPP = 0;
470 	bool NoChromaSurfaces = true;
471 	unsigned int DETBufferSizeInKByteForSwathCalculation;
472 
473 #ifdef __DML_VBA_DEBUG__
474 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 	dml32_CalculateSwathWidth(ForceSingleDPP,
479 			NumberOfActiveSurfaces,
480 			SourcePixelFormat,
481 			SourceRotation,
482 			ViewportStationary,
483 			ViewportWidth,
484 			ViewportHeight,
485 			ViewportXStart,
486 			ViewportYStart,
487 			ViewportXStartC,
488 			ViewportYStartC,
489 			SurfaceWidthY,
490 			SurfaceWidthC,
491 			SurfaceHeightY,
492 			SurfaceHeightC,
493 			ODMMode,
494 			BytePerPixY,
495 			BytePerPixC,
496 			Read256BytesBlockHeightY,
497 			Read256BytesBlockHeightC,
498 			Read256BytesBlockWidthY,
499 			Read256BytesBlockWidthC,
500 			BlendingAndTiming,
501 			HActive,
502 			HRatio,
503 			DPPPerSurface,
504 
505 			/* Output */
506 			SwathWidthdoubleDPP,
507 			SwathWidthdoubleDPPChroma,
508 			SwathWidth,
509 			SwathWidthChroma,
510 			MaximumSwathHeightY,
511 			MaximumSwathHeightC,
512 			swath_width_luma_ub,
513 			swath_width_chroma_ub);
514 
515 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 				RoundedUpMaxSwathSizeBytesY[k]);
525 		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 				RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531 
532 		if (SourcePixelFormat[k] == dm_420_10) {
533 			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 		}
536 	}
537 
538 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 			NoChromaSurfaces = false;
543 		}
544 	}
545 
546 	// By default, just set the reserved space to 2 pixel chunks size
547 	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548 
549 	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553 
554 	if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 	}
557 
558 	#ifdef __DML_VBA_DEBUG__
559 		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
560 		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
561 	#endif
562 
563 	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564 
565 	dml32_CalculateDETBufferSize(DETSizeOverride,
566 			UseMALLForPStateChange,
567 			ForceSingleDPP,
568 			NumberOfActiveSurfaces,
569 			*UnboundedRequestEnabled,
570 			nomDETInKByte,
571 			MaxTotalDETInKByte,
572 			ConfigReturnBufferSizeInKByte,
573 			MinCompressedBufferSizeInKByte,
574 			CompressedBufferSegmentSizeInkByteFinal,
575 			SourcePixelFormat,
576 			ReadBandwidthLuma,
577 			ReadBandwidthChroma,
578 			RoundedUpMaxSwathSizeBytesY,
579 			RoundedUpMaxSwathSizeBytesC,
580 			DPPPerSurface,
581 
582 			/* Output */
583 			DETBufferSizeInKByte,    // per hubp pipe
584 			CompressedBufferSizeInkByte);
585 
586 #ifdef __DML_VBA_DEBUG__
587 	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594 
595 	*ViewportSizeSupport = true;
596 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597 
598 		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 				DETBufferSizeInKByteForSwathCalculation);
603 #endif
604 
605 		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 			SwathHeightY[k] = MaximumSwathHeightY[k];
608 			SwathHeightC[k] = MaximumSwathHeightC[k];
609 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 			SwathHeightC[k] = MaximumSwathHeightC[k];
616 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 			SwathHeightY[k] = MaximumSwathHeightY[k];
622 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 		} else {
626 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 		}
631 
632 		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 			*ViewportSizeSupport = false;
637 			ViewportSizeSupportPerSurface[k] = false;
638 		} else {
639 			ViewportSizeSupportPerSurface[k] = true;
640 		}
641 
642 		if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 			DETBufferSizeC[k] = 0;
648 		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 		} else {
655 #ifdef __DML_VBA_DEBUG__
656 			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 		}
661 
662 #ifdef __DML_VBA_DEBUG__
663 		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 				k, RoundedUpMaxSwathSizeBytesY[k]);
667 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 				k, RoundedUpMaxSwathSizeBytesC[k]);
669 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 				ViewportSizeSupportPerSurface[k]);
676 #endif
677 
678 	}
679 } // CalculateSwathAndDETConfiguration
680 
681 void dml32_CalculateSwathWidth(
682 		bool				ForceSingleDPP,
683 		unsigned int			NumberOfActiveSurfaces,
684 		enum source_format_class	SourcePixelFormat[],
685 		enum dm_rotation_angle		SourceRotation[],
686 		bool				ViewportStationary[],
687 		unsigned int			ViewportWidth[],
688 		unsigned int			ViewportHeight[],
689 		unsigned int			ViewportXStart[],
690 		unsigned int			ViewportYStart[],
691 		unsigned int			ViewportXStartC[],
692 		unsigned int			ViewportYStartC[],
693 		unsigned int			SurfaceWidthY[],
694 		unsigned int			SurfaceWidthC[],
695 		unsigned int			SurfaceHeightY[],
696 		unsigned int			SurfaceHeightC[],
697 		enum odm_combine_mode		ODMMode[],
698 		unsigned int			BytePerPixY[],
699 		unsigned int			BytePerPixC[],
700 		unsigned int			Read256BytesBlockHeightY[],
701 		unsigned int			Read256BytesBlockHeightC[],
702 		unsigned int			Read256BytesBlockWidthY[],
703 		unsigned int			Read256BytesBlockWidthC[],
704 		unsigned int			BlendingAndTiming[],
705 		unsigned int			HActive[],
706 		double				HRatio[],
707 		unsigned int			DPPPerSurface[],
708 
709 		/* Output */
710 		double				SwathWidthdoubleDPPY[],
711 		double				SwathWidthdoubleDPPC[],
712 		double				SwathWidthY[], // per-pipe
713 		double				SwathWidthC[], // per-pipe
714 		unsigned int			MaximumSwathHeightY[],
715 		unsigned int			MaximumSwathHeightC[],
716 		unsigned int			swath_width_luma_ub[], // per-pipe
717 		unsigned int			swath_width_chroma_ub[]) // per-pipe
718 {
719 	unsigned int k, j;
720 	enum odm_combine_mode MainSurfaceODMMode;
721 
722 	unsigned int surface_width_ub_l;
723 	unsigned int surface_height_ub_l;
724 	unsigned int surface_width_ub_c = 0;
725 	unsigned int surface_height_ub_c = 0;
726 
727 #ifdef __DML_VBA_DEBUG__
728 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731 
732 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 		if (!IsVertical(SourceRotation[k]))
734 			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 		else
736 			SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737 
738 #ifdef __DML_VBA_DEBUG__
739 		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742 
743 		MainSurfaceODMMode = ODMMode[k];
744 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 			if (BlendingAndTiming[k] == j)
746 				MainSurfaceODMMode = ODMMode[j];
747 		}
748 
749 		if (ForceSingleDPP) {
750 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 		} else {
752 			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 						dml_round(HActive[k] / 4.0 * HRatio[k]));
755 			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 						dml_round(HActive[k] / 2.0 * HRatio[k]));
758 			} else if (DPPPerSurface[k] == 2) {
759 				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 			} else {
761 				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 			}
763 		}
764 
765 #ifdef __DML_VBA_DEBUG__
766 		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772 
773 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 				SourcePixelFormat[k] == dm_420_12) {
775 			SwathWidthC[k] = SwathWidthY[k] / 2;
776 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 		} else {
778 			SwathWidthC[k] = SwathWidthY[k];
779 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 		}
781 
782 		if (ForceSingleDPP == true) {
783 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 		}
786 
787 		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789 
790 		if (!IsVertical(SourceRotation[k])) {
791 			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 						dml_floor(ViewportXStart[k] +
796 								SwathWidthY[k] +
797 								Read256BytesBlockWidthY[k] - 1,
798 								Read256BytesBlockWidthY[k]) -
799 								dml_floor(ViewportXStart[k],
800 								Read256BytesBlockWidthY[k]));
801 			} else {
802 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 						dml_ceil(SwathWidthY[k] - 1,
804 								Read256BytesBlockWidthY[k]) +
805 								Read256BytesBlockWidthY[k]);
806 			}
807 			if (BytePerPixC[k] > 0) {
808 				surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 									Read256BytesBlockWidthC[k] - 1,
813 									Read256BytesBlockWidthC[k]) -
814 									dml_floor(ViewportXStartC[k],
815 									Read256BytesBlockWidthC[k]));
816 				} else {
817 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 							dml_ceil(SwathWidthC[k] - 1,
819 								Read256BytesBlockWidthC[k]) +
820 								Read256BytesBlockWidthC[k]);
821 				}
822 			} else {
823 				swath_width_chroma_ub[k] = 0;
824 			}
825 		} else {
826 			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828 
829 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 						Read256BytesBlockHeightY[k]) -
833 						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 			} else {
835 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 			}
838 			if (BytePerPixC[k] > 0) {
839 				surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 									Read256BytesBlockHeightC[k] - 1,
844 									Read256BytesBlockHeightC[k]) -
845 									dml_floor(ViewportYStartC[k],
846 											Read256BytesBlockHeightC[k]));
847 				} else {
848 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 							Read256BytesBlockHeightC[k]);
851 				}
852 			} else {
853 				swath_width_chroma_ub[k] = 0;
854 			}
855 		}
856 
857 #ifdef __DML_VBA_DEBUG__
858 		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873 
874 	}
875 } // CalculateSwathWidth
876 
877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 			unsigned int TotalNumberOfActiveDPP,
879 			bool NoChroma,
880 			enum output_encoder_class Output,
881 			enum dm_swizzle_mode SurfaceTiling,
882 			bool CompBufReservedSpaceNeedAdjustment,
883 			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 	bool ret_val = false;
886 
887 	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 			TotalNumberOfActiveDPP == 1 && NoChroma);
889 	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 		ret_val = false;
891 
892 	if (SurfaceTiling == dm_sw_linear)
893 		ret_val = false;
894 
895 	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 		ret_val = false;
897 
898 #ifdef __DML_VBA_DEBUG__
899 	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
900 	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
902 #endif
903 
904 	return (ret_val);
905 }
906 
907 void dml32_CalculateDETBufferSize(
908 		unsigned int DETSizeOverride[],
909 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 		bool ForceSingleDPP,
911 		unsigned int NumberOfActiveSurfaces,
912 		bool UnboundedRequestEnabled,
913 		unsigned int nomDETInKByte,
914 		unsigned int MaxTotalDETInKByte,
915 		unsigned int ConfigReturnBufferSizeInKByte,
916 		unsigned int MinCompressedBufferSizeInKByte,
917 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 		enum source_format_class SourcePixelFormat[],
919 		double ReadBandwidthLuma[],
920 		double ReadBandwidthChroma[],
921 		unsigned int RoundedUpMaxSwathSizeBytesY[],
922 		unsigned int RoundedUpMaxSwathSizeBytesC[],
923 		unsigned int DPPPerSurface[],
924 		/* Output */
925 		unsigned int DETBufferSizeInKByte[],
926 		unsigned int *CompressedBufferSizeInkByte)
927 {
928 	unsigned int DETBufferSizePoolInKByte;
929 	unsigned int NextDETBufferPieceInKByte;
930 	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 	bool NextPotentialSurfaceToAssignDETPieceFound;
932 	unsigned int NextSurfaceToAssignDETPiece;
933 	double TotalBandwidth;
934 	double BandwidthOfSurfacesNotAssignedDETPiece;
935 	unsigned int max_minDET;
936 	unsigned int minDET;
937 	unsigned int minDET_pipe;
938 	unsigned int j, k;
939 
940 #ifdef __DML_VBA_DEBUG__
941 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 			CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951 
952 	// Note: Will use default det size if that fits 2 swaths
953 	if (UnboundedRequestEnabled) {
954 		if (DETSizeOverride[0] > 0) {
955 			DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 		} else {
957 			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 					((double) RoundedUpMaxSwathSizeBytesY[0] +
959 							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 		}
961 		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 	} else {
963 		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 			DETBufferSizeInKByte[k] = nomDETInKByte;
966 			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 					SourcePixelFormat[k] == dm_420_12) {
968 				max_minDET = nomDETInKByte - 64;
969 			} else {
970 				max_minDET = nomDETInKByte;
971 			}
972 			minDET = 128;
973 			minDET_pipe = 0;
974 
975 			// add DET resource until can hold 2 full swaths
976 			while (minDET <= max_minDET && minDET_pipe == 0) {
977 				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 					minDET_pipe = minDET;
980 				minDET = minDET + 64;
981 			}
982 
983 #ifdef __DML_VBA_DEBUG__
984 			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
985 			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
986 			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
987 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 					RoundedUpMaxSwathSizeBytesY[k]);
989 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 					RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992 
993 			if (minDET_pipe == 0) {
994 				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 						__func__, k, minDET_pipe);
999 #endif
1000 			}
1001 
1002 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 				DETBufferSizeInKByte[k] = 0;
1004 			} else if (DETSizeOverride[k] > 0) {
1005 				DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 				DETBufferSizeInKByte[k] = minDET_pipe;
1010 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 			}
1013 
1014 #ifdef __DML_VBA_DEBUG__
1015 			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 		}
1021 
1022 		TotalBandwidth = 0;
1023 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 		}
1027 #ifdef __DML_VBA_DEBUG__
1028 		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036 
1037 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 			} else {
1046 				DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 			}
1048 #ifdef __DML_VBA_DEBUG__
1049 			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 					DETPieceAssignedToThisSurfaceAlready[k]);
1051 			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 					BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 		}
1055 
1056 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 			NextPotentialSurfaceToAssignDETPieceFound = false;
1058 			NextSurfaceToAssignDETPiece = 0;
1059 
1060 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 						ReadBandwidthLuma[k]);
1064 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 						ReadBandwidthChroma[k]);
1066 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 						NextSurfaceToAssignDETPiece);
1072 #endif
1073 				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 						(!NextPotentialSurfaceToAssignDETPieceFound ||
1075 						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 					NextSurfaceToAssignDETPiece = k;
1079 					NextPotentialSurfaceToAssignDETPieceFound = true;
1080 				}
1081 #ifdef __DML_VBA_DEBUG__
1082 				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 			}
1088 
1089 			if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 				// Note: To show the banker's rounding behavior in VBA and also the fact
1091 				// that the DET buffer size varies due to precision issue
1092 				//
1093 				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
1094 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 				// BandwidthOfSurfacesNotAssignedDETPiece /
1097 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1099 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 				 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 				//
1104 				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106 
1107 				NextDETBufferPieceInKByte = dml_min(
1108 					dml_round((double) DETBufferSizePoolInKByte *
1109 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 						BandwidthOfSurfacesNotAssignedDETPiece /
1112 						((ForceSingleDPP ? 1 :
1113 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 						(ForceSingleDPP ? 1 :
1115 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 						dml_floor((double) DETBufferSizePoolInKByte,
1117 						(ForceSingleDPP ? 1 :
1118 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119 
1120 				// Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 				// We should limit the per-pipe DET size to the nominal / max per pipe.
1122 				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 					} else {
1128 						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 						// already has the max per-pipe value
1130 						NextDETBufferPieceInKByte = 0;
1131 					}
1132 				}
1133 
1134 #ifdef __DML_VBA_DEBUG__
1135 				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 					DETBufferSizePoolInKByte);
1137 				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 					NextSurfaceToAssignDETPiece);
1139 				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 					NextDETBufferPieceInKByte);
1147 				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 					__func__, j, NextSurfaceToAssignDETPiece,
1149 					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151 
1152 				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 						+ NextDETBufferPieceInKByte
1155 						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159 
1160 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 			}
1166 		}
1167 		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 	}
1169 	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170 
1171 #ifdef __DML_VBA_DEBUG__
1172 	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 	}
1178 #endif
1179 } // CalculateDETBufferSize
1180 
1181 void dml32_CalculateODMMode(
1182 		unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 		unsigned int HActive,
1184 		enum output_format_class OutFormat,
1185 		enum output_encoder_class Output,
1186 		enum odm_combine_policy ODMUse,
1187 		double StateDispclk,
1188 		double MaxDispclk,
1189 		bool DSCEnable,
1190 		unsigned int TotalNumberOfActiveDPP,
1191 		unsigned int MaxNumDPP,
1192 		double PixelClock,
1193 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 		double DISPCLKRampingMargin,
1195 		double DISPCLKDPPCLKVCOSpeed,
1196 		unsigned int NumberOfDSCSlices,
1197 
1198 		/* Output */
1199 		bool *TotalAvailablePipesSupport,
1200 		unsigned int *NumberOfDPP,
1201 		enum odm_combine_mode *ODMMode,
1202 		double *RequiredDISPCLKPerSurface)
1203 {
1204 
1205 	double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208 
1209 	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 			MaxDispclk);
1212 	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 			MaxDispclk);
1215 	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 			MaxDispclk);
1218 	*TotalAvailablePipesSupport = true;
1219 	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220 
1221 	if (ODMUse == dm_odm_combine_policy_none)
1222 		*ODMMode = dm_odm_combine_mode_disabled;
1223 
1224 	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 	*NumberOfDPP = 0;
1226 
1227 	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 	// (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229 
1230 	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 					|| NumberOfDSCSlices > 8)))) {
1234 		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 			*ODMMode = dm_odm_combine_mode_4to1;
1236 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 			*NumberOfDPP = 4;
1238 		} else {
1239 			*TotalAvailablePipesSupport = false;
1240 		}
1241 	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 					|| (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 			*ODMMode = dm_odm_combine_mode_2to1;
1248 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 			*NumberOfDPP = 2;
1250 		} else {
1251 			*TotalAvailablePipesSupport = false;
1252 		}
1253 	} else {
1254 		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 			*NumberOfDPP = 1;
1256 		else
1257 			*TotalAvailablePipesSupport = false;
1258 	}
1259 	if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 			ODMUse != dm_odm_combine_policy_4to1) {
1261 		if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 			*ODMMode = dm_odm_combine_mode_disabled;
1263 			*NumberOfDPP = 0;
1264 			*TotalAvailablePipesSupport = false;
1265 		} else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 				*ODMMode == dm_odm_combine_mode_4to1) {
1267 			*ODMMode = dm_odm_combine_mode_4to1;
1268 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 			*NumberOfDPP = 4;
1270 		} else {
1271 			*ODMMode = dm_odm_combine_mode_2to1;
1272 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 			*NumberOfDPP = 2;
1274 		}
1275 	}
1276 	if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 			HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 		*ODMMode = dm_odm_combine_mode_disabled;
1279 		*NumberOfDPP = 0;
1280 		*TotalAvailablePipesSupport = false;
1281 	}
1282 }
1283 
1284 double dml32_CalculateRequiredDispclk(
1285 		enum odm_combine_mode ODMMode,
1286 		double PixelClock,
1287 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 		double DISPCLKRampingMargin,
1289 		double DISPCLKDPPCLKVCOSpeed,
1290 		double MaxDispclk)
1291 {
1292 	double RequiredDispclk = 0.;
1293 	double PixelClockAfterODM;
1294 	double DISPCLKWithRampingRoundedToDFSGranularity;
1295 	double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 	double MaxDispclkRoundedDownToDFSGranularity;
1297 
1298 	if (ODMMode == dm_odm_combine_mode_4to1)
1299 		PixelClockAfterODM = PixelClock / 4;
1300 	else if (ODMMode == dm_odm_combine_mode_2to1)
1301 		PixelClockAfterODM = PixelClock / 2;
1302 	else
1303 		PixelClockAfterODM = PixelClock;
1304 
1305 
1306 	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309 
1310 	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312 
1313 	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314 
1315 	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 	else
1320 		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321 
1322 	return RequiredDispclk;
1323 }
1324 
1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 	if (Clock <= 0.0)
1328 		return 0.0;
1329 
1330 	if (round_up)
1331 		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 	else
1333 		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335 
1336 void dml32_CalculateOutputLink(
1337 		double PHYCLKPerState,
1338 		double PHYCLKD18PerState,
1339 		double PHYCLKD32PerState,
1340 		double Downspreading,
1341 		bool IsMainSurfaceUsingTheIndicatedTiming,
1342 		enum output_encoder_class Output,
1343 		enum output_format_class OutputFormat,
1344 		unsigned int HTotal,
1345 		unsigned int HActive,
1346 		double PixelClockBackEnd,
1347 		double ForcedOutputLinkBPP,
1348 		unsigned int DSCInputBitPerComponent,
1349 		unsigned int NumberOfDSCSlices,
1350 		double AudioSampleRate,
1351 		unsigned int AudioSampleLayout,
1352 		enum odm_combine_mode ODMModeNoDSC,
1353 		enum odm_combine_mode ODMModeDSC,
1354 		bool DSCEnable,
1355 		unsigned int OutputLinkDPLanes,
1356 		enum dm_output_link_dp_rate OutputLinkDPRate,
1357 
1358 		/* Output */
1359 		bool *RequiresDSC,
1360 		double *RequiresFEC,
1361 		double  *OutBpp,
1362 		enum dm_output_type *OutputType,
1363 		enum dm_output_rate *OutputRate,
1364 		unsigned int *RequiredSlots)
1365 {
1366 	bool LinkDSCEnable;
1367 	unsigned int dummy;
1368 	*RequiresDSC = false;
1369 	*RequiresFEC = false;
1370 	*OutBpp = 0;
1371 	*OutputType = dm_output_type_unknown;
1372 	*OutputRate = dm_output_rate_unknown;
1373 
1374 	if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 		if (Output == dm_hdmi) {
1376 			*RequiresDSC = false;
1377 			*RequiresFEC = false;
1378 			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 					ODMModeNoDSC, ODMModeDSC, &dummy);
1382 			//OutputTypeAndRate = "HDMI";
1383 			*OutputType = dm_output_type_hdmi;
1384 
1385 		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 			if (DSCEnable == true) {
1387 				*RequiresDSC = true;
1388 				LinkDSCEnable = true;
1389 				if (Output == dm_dp || Output == dm_dp2p0)
1390 					*RequiresFEC = true;
1391 				else
1392 					*RequiresFEC = false;
1393 			} else {
1394 				*RequiresDSC = false;
1395 				LinkDSCEnable = false;
1396 				if (Output == dm_dp2p0)
1397 					*RequiresFEC = true;
1398 				else
1399 					*RequiresFEC = false;
1400 			}
1401 			if (Output == dm_dp2p0) {
1402 				*OutBpp = 0;
1403 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 						PHYCLKD32PerState >= 10000 / 32) {
1405 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 					if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411 							ForcedOutputLinkBPP == 0) {
1412 						*RequiresDSC = true;
1413 						LinkDSCEnable = true;
1414 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 								OutputFormat, DSCInputBitPerComponent,
1418 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 					}
1421 					//OutputTypeAndRate = Output & " UHBR10";
1422 					*OutputType = dm_output_type_dp2p0;
1423 					*OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 				}
1425 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 						*OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 
1433 					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 							ForcedOutputLinkBPP == 0) {
1435 						*RequiresDSC = true;
1436 						LinkDSCEnable = true;
1437 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 								OutputFormat, DSCInputBitPerComponent,
1441 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 					}
1444 					//OutputTypeAndRate = Output & " UHBR13p5";
1445 					*OutputType = dm_output_type_dp2p0;
1446 					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 				}
1448 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 						*RequiresDSC = true;
1457 						LinkDSCEnable = true;
1458 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 								OutputFormat, DSCInputBitPerComponent,
1462 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 					}
1465 					//OutputTypeAndRate = Output & " UHBR20";
1466 					*OutputType = dm_output_type_dp2p0;
1467 					*OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 				}
1469 			} else {
1470 				*OutBpp = 0;
1471 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 						PHYCLKPerState >= 270) {
1473 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 							ForcedOutputLinkBPP == 0) {
1480 						*RequiresDSC = true;
1481 						LinkDSCEnable = true;
1482 						if (Output == dm_dp)
1483 							*RequiresFEC = true;
1484 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 								OutputFormat, DSCInputBitPerComponent,
1488 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 					}
1491 					//OutputTypeAndRate = Output & " HBR";
1492 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 					*OutputRate = dm_output_rate_dp_rate_hbr;
1494 				}
1495 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 						*OutBpp == 0 && PHYCLKPerState >= 540) {
1497 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502 
1503 					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 							ForcedOutputLinkBPP == 0) {
1505 						*RequiresDSC = true;
1506 						LinkDSCEnable = true;
1507 						if (Output == dm_dp)
1508 							*RequiresFEC = true;
1509 
1510 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 								OutputFormat, DSCInputBitPerComponent,
1514 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 					}
1517 					//OutputTypeAndRate = Output & " HBR2";
1518 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 					*OutputRate = dm_output_rate_dp_rate_hbr2;
1520 				}
1521 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 							ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 							RequiredSlots);
1528 
1529 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 						*RequiresDSC = true;
1531 						LinkDSCEnable = true;
1532 						if (Output == dm_dp)
1533 							*RequiresFEC = true;
1534 
1535 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 								OutputFormat, DSCInputBitPerComponent,
1539 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 					}
1542 					//OutputTypeAndRate = Output & " HBR3";
1543 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 					*OutputRate = dm_output_rate_dp_rate_hbr3;
1545 				}
1546 			}
1547 		}
1548 	}
1549 }
1550 
1551 void dml32_CalculateDPPCLK(
1552 		unsigned int NumberOfActiveSurfaces,
1553 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 		double DISPCLKDPPCLKVCOSpeed,
1555 		double DPPCLKUsingSingleDPP[],
1556 		unsigned int DPPPerSurface[],
1557 
1558 		/* output */
1559 		double *GlobalDPPCLK,
1560 		double Dppclk[])
1561 {
1562 	unsigned int k;
1563 	*GlobalDPPCLK = 0;
1564 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 	}
1568 	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572 
1573 double dml32_TruncToValidBPP(
1574 		double LinkBitRate,
1575 		unsigned int Lanes,
1576 		unsigned int HTotal,
1577 		unsigned int HActive,
1578 		double PixelClock,
1579 		double DesiredBPP,
1580 		bool DSCEnable,
1581 		enum output_encoder_class Output,
1582 		enum output_format_class Format,
1583 		unsigned int DSCInputBitPerComponent,
1584 		unsigned int DSCSlices,
1585 		unsigned int AudioRate,
1586 		unsigned int AudioLayout,
1587 		enum odm_combine_mode ODMModeNoDSC,
1588 		enum odm_combine_mode ODMModeDSC,
1589 		/* Output */
1590 		unsigned int *RequiredSlots)
1591 {
1592 	double    MaxLinkBPP;
1593 	unsigned int   MinDSCBPP;
1594 	double    MaxDSCBPP;
1595 	unsigned int   NonDSCBPP0;
1596 	unsigned int   NonDSCBPP1;
1597 	unsigned int   NonDSCBPP2;
1598 	unsigned int   NonDSCBPP3;
1599 
1600 	if (Format == dm_420) {
1601 		NonDSCBPP0 = 12;
1602 		NonDSCBPP1 = 15;
1603 		NonDSCBPP2 = 18;
1604 		MinDSCBPP = 6;
1605 		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1606 	} else if (Format == dm_444) {
1607 		NonDSCBPP0 = 18;
1608 		NonDSCBPP1 = 24;
1609 		NonDSCBPP2 = 30;
1610 		NonDSCBPP3 = 36;
1611 		MinDSCBPP = 8;
1612 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1613 	} else {
1614 		if (Output == dm_hdmi) {
1615 			NonDSCBPP0 = 24;
1616 			NonDSCBPP1 = 24;
1617 			NonDSCBPP2 = 24;
1618 		} else {
1619 			NonDSCBPP0 = 16;
1620 			NonDSCBPP1 = 20;
1621 			NonDSCBPP2 = 24;
1622 		}
1623 		if (Format == dm_n422) {
1624 			MinDSCBPP = 7;
1625 			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1626 		} else {
1627 			MinDSCBPP = 8;
1628 			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1629 		}
1630 	}
1631 	if (Output == dm_dp2p0) {
1632 		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1633 	} else if (DSCEnable && Output == dm_dp) {
1634 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1635 	} else {
1636 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1637 	}
1638 
1639 	if (DSCEnable) {
1640 		if (ODMModeDSC == dm_odm_combine_mode_4to1)
1641 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1642 		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1643 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1644 		else if (ODMModeDSC == dm_odm_split_mode_1to2)
1645 			MaxLinkBPP = 2 * MaxLinkBPP;
1646 	} else {
1647 		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1648 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1649 		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1650 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1651 		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1652 			MaxLinkBPP = 2 * MaxLinkBPP;
1653 	}
1654 
1655 	if (DesiredBPP == 0) {
1656 		if (DSCEnable) {
1657 			if (MaxLinkBPP < MinDSCBPP)
1658 				return BPP_INVALID;
1659 			else if (MaxLinkBPP >= MaxDSCBPP)
1660 				return MaxDSCBPP;
1661 			else
1662 				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1663 		} else {
1664 			if (MaxLinkBPP >= NonDSCBPP3)
1665 				return NonDSCBPP3;
1666 			else if (MaxLinkBPP >= NonDSCBPP2)
1667 				return NonDSCBPP2;
1668 			else if (MaxLinkBPP >= NonDSCBPP1)
1669 				return NonDSCBPP1;
1670 			else if (MaxLinkBPP >= NonDSCBPP0)
1671 				return 16.0;
1672 			else
1673 				return BPP_INVALID;
1674 		}
1675 	} else {
1676 		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1677 				DesiredBPP == NonDSCBPP0 || DesiredBPP == NonDSCBPP3)) ||
1678 				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1679 			return BPP_INVALID;
1680 		else
1681 			return DesiredBPP;
1682 	}
1683 
1684 	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1685 
1686 	return BPP_INVALID;
1687 } // TruncToValidBPP
1688 
1689 double dml32_RequiredDTBCLK(
1690 		bool              DSCEnable,
1691 		double               PixelClock,
1692 		enum output_format_class  OutputFormat,
1693 		double               OutputBpp,
1694 		unsigned int              DSCSlices,
1695 		unsigned int                 HTotal,
1696 		unsigned int                 HActive,
1697 		unsigned int              AudioRate,
1698 		unsigned int              AudioLayout)
1699 {
1700 	double PixelWordRate;
1701 	double HCActive;
1702 	double HCBlank;
1703 	double AverageTribyteRate;
1704 	double HActiveTribyteRate;
1705 
1706 	if (DSCEnable != true)
1707 		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1708 
1709 	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1710 	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1711 			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1712 	HCBlank = 64 + 32 *
1713 			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1714 	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1715 	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1716 	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1717 }
1718 
1719 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1720 		enum odm_combine_mode ODMMode,
1721 		unsigned int DSCInputBitPerComponent,
1722 		double OutputBpp,
1723 		unsigned int HActive,
1724 		unsigned int HTotal,
1725 		unsigned int NumberOfDSCSlices,
1726 		enum output_format_class  OutputFormat,
1727 		enum output_encoder_class Output,
1728 		double PixelClock,
1729 		double PixelClockBackEnd,
1730 		double dsc_delay_factor_wa)
1731 {
1732 	unsigned int DSCDelayRequirement_val;
1733 
1734 	if (DSCEnabled == true && OutputBpp != 0) {
1735 		if (ODMMode == dm_odm_combine_mode_4to1) {
1736 			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1737 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1738 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1739 		} else if (ODMMode == dm_odm_combine_mode_2to1) {
1740 			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1741 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1742 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1743 		} else {
1744 			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1745 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1746 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1747 		}
1748 
1749 		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1750 				dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1751 
1752 		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1753 
1754 	} else {
1755 		DSCDelayRequirement_val = 0;
1756 	}
1757 
1758 #ifdef __DML_VBA_DEBUG__
1759 	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1760 	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1761 	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1762 	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1763 	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1764 	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1765 	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1766 #endif
1767 
1768 	return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1769 }
1770 
1771 void dml32_CalculateSurfaceSizeInMall(
1772 		unsigned int NumberOfActiveSurfaces,
1773 		unsigned int MALLAllocatedForDCN,
1774 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1775 		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1776 		bool DCCEnable[],
1777 		bool ViewportStationary[],
1778 		unsigned int ViewportXStartY[],
1779 		unsigned int ViewportYStartY[],
1780 		unsigned int ViewportXStartC[],
1781 		unsigned int ViewportYStartC[],
1782 		unsigned int ViewportWidthY[],
1783 		unsigned int ViewportHeightY[],
1784 		unsigned int BytesPerPixelY[],
1785 		unsigned int ViewportWidthC[],
1786 		unsigned int ViewportHeightC[],
1787 		unsigned int BytesPerPixelC[],
1788 		unsigned int SurfaceWidthY[],
1789 		unsigned int SurfaceWidthC[],
1790 		unsigned int SurfaceHeightY[],
1791 		unsigned int SurfaceHeightC[],
1792 		unsigned int Read256BytesBlockWidthY[],
1793 		unsigned int Read256BytesBlockWidthC[],
1794 		unsigned int Read256BytesBlockHeightY[],
1795 		unsigned int Read256BytesBlockHeightC[],
1796 		unsigned int ReadBlockWidthY[],
1797 		unsigned int ReadBlockWidthC[],
1798 		unsigned int ReadBlockHeightY[],
1799 		unsigned int ReadBlockHeightC[],
1800 		unsigned int DCCMetaPitchY[],
1801 		unsigned int DCCMetaPitchC[],
1802 
1803 		/* Output */
1804 		unsigned int    SurfaceSizeInMALL[],
1805 		bool *ExceededMALLSize)
1806 {
1807 	unsigned int k;
1808 	unsigned int TotalSurfaceSizeInMALLForSS = 0;
1809 	unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1810 	unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1811 
1812 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1813 		if (ViewportStationary[k]) {
1814 			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1815 					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1816 						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1817 						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1818 						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1819 						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1820 						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1821 
1822 			if (ReadBlockWidthC[k] > 0) {
1823 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1824 						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1825 							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1826 							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1827 							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1828 							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1829 							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1830 							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1831 							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1832 							BytesPerPixelC[k];
1833 			}
1834 			if (DCCEnable[k] == true) {
1835 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1836 						(dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
1837 							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1838 							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1839 							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1840 							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1841 							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1842 							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1843 							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
1844 							Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1845 				if (Read256BytesBlockWidthC[k] > 0) {
1846 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1847 							dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
1848 								Read256BytesBlockWidthC[k]),
1849 								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1850 								* Read256BytesBlockWidthC[k] - 1, 8 *
1851 								Read256BytesBlockWidthC[k]) -
1852 								dml_floor(ViewportXStartC[k], 8 *
1853 								Read256BytesBlockWidthC[k])) *
1854 								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1855 								Read256BytesBlockHeightC[k]),
1856 								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1857 								8 * Read256BytesBlockHeightC[k] - 1, 8 *
1858 								Read256BytesBlockHeightC[k]) -
1859 								dml_floor(ViewportYStartC[k], 8 *
1860 								Read256BytesBlockHeightC[k])) *
1861 								BytesPerPixelC[k] / 256;
1862 				}
1863 			}
1864 		} else {
1865 			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1866 					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1867 					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1868 							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1869 							BytesPerPixelY[k];
1870 			if (ReadBlockWidthC[k] > 0) {
1871 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1872 						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1873 								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1874 						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1875 								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1876 								BytesPerPixelC[k];
1877 			}
1878 			if (DCCEnable[k] == true) {
1879 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1880 						(dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
1881 								Read256BytesBlockWidthY[k] - 1), 8 *
1882 								Read256BytesBlockWidthY[k]) *
1883 						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1884 								Read256BytesBlockHeightY[k] - 1), 8 *
1885 								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1886 
1887 				if (Read256BytesBlockWidthC[k] > 0) {
1888 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1889 							dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
1890 									Read256BytesBlockWidthC[k] - 1), 8 *
1891 									Read256BytesBlockWidthC[k]) *
1892 							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1893 									Read256BytesBlockHeightC[k] - 1), 8 *
1894 									Read256BytesBlockHeightC[k]) *
1895 									BytesPerPixelC[k] / 256;
1896 				}
1897 			}
1898 		}
1899 	}
1900 
1901 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1902 		/* SS and Subvp counted separate as they are never used at the same time */
1903 		if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1904 			TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1905 		else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1906 			TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1907 	}
1908 	*ExceededMALLSize =  (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1909 							(TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1910 } // CalculateSurfaceSizeInMall
1911 
1912 void dml32_CalculateVMRowAndSwath(
1913 		unsigned int NumberOfActiveSurfaces,
1914 		DmlPipe myPipe[],
1915 		unsigned int SurfaceSizeInMALL[],
1916 		unsigned int PTEBufferSizeInRequestsLuma,
1917 		unsigned int PTEBufferSizeInRequestsChroma,
1918 		unsigned int DCCMetaBufferSizeBytes,
1919 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1920 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1921 		unsigned int MALLAllocatedForDCN,
1922 		double SwathWidthY[],
1923 		double SwathWidthC[],
1924 		bool GPUVMEnable,
1925 		bool HostVMEnable,
1926 		unsigned int HostVMMaxNonCachedPageTableLevels,
1927 		unsigned int GPUVMMaxPageTableLevels,
1928 		unsigned int GPUVMMinPageSizeKBytes[],
1929 		unsigned int HostVMMinPageSize,
1930 
1931 		/* Output */
1932 		bool PTEBufferSizeNotExceeded[],
1933 		bool DCCMetaBufferSizeNotExceeded[],
1934 		unsigned int dpte_row_width_luma_ub[],
1935 		unsigned int dpte_row_width_chroma_ub[],
1936 		unsigned int dpte_row_height_luma[],
1937 		unsigned int dpte_row_height_chroma[],
1938 		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1939 		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1940 		unsigned int meta_req_width[],
1941 		unsigned int meta_req_width_chroma[],
1942 		unsigned int meta_req_height[],
1943 		unsigned int meta_req_height_chroma[],
1944 		unsigned int meta_row_width[],
1945 		unsigned int meta_row_width_chroma[],
1946 		unsigned int meta_row_height[],
1947 		unsigned int meta_row_height_chroma[],
1948 		unsigned int vm_group_bytes[],
1949 		unsigned int dpte_group_bytes[],
1950 		unsigned int PixelPTEReqWidthY[],
1951 		unsigned int PixelPTEReqHeightY[],
1952 		unsigned int PTERequestSizeY[],
1953 		unsigned int PixelPTEReqWidthC[],
1954 		unsigned int PixelPTEReqHeightC[],
1955 		unsigned int PTERequestSizeC[],
1956 		unsigned int dpde0_bytes_per_frame_ub_l[],
1957 		unsigned int meta_pte_bytes_per_frame_ub_l[],
1958 		unsigned int dpde0_bytes_per_frame_ub_c[],
1959 		unsigned int meta_pte_bytes_per_frame_ub_c[],
1960 		double PrefetchSourceLinesY[],
1961 		double PrefetchSourceLinesC[],
1962 		double VInitPreFillY[],
1963 		double VInitPreFillC[],
1964 		unsigned int MaxNumSwathY[],
1965 		unsigned int MaxNumSwathC[],
1966 		double meta_row_bw[],
1967 		double dpte_row_bw[],
1968 		double PixelPTEBytesPerRow[],
1969 		double PDEAndMetaPTEBytesFrame[],
1970 		double MetaRowByte[],
1971 		bool use_one_row_for_frame[],
1972 		bool use_one_row_for_frame_flip[],
1973 		bool UsesMALLForStaticScreen[],
1974 		bool PTE_BUFFER_MODE[],
1975 		unsigned int BIGK_FRAGMENT_SIZE[])
1976 {
1977 	unsigned int k;
1978 	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1979 	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1980 	unsigned int PDEAndMetaPTEBytesFrameY;
1981 	unsigned int PDEAndMetaPTEBytesFrameC;
1982 	unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1983 	unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1984 	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1985 	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1986 	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1987 	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1988 	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1989 	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1990 	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1991 	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1992 	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1993 
1994 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1995 		if (HostVMEnable == true) {
1996 			vm_group_bytes[k] = 512;
1997 			dpte_group_bytes[k] = 512;
1998 		} else if (GPUVMEnable == true) {
1999 			vm_group_bytes[k] = 2048;
2000 			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
2001 				dpte_group_bytes[k] = 512;
2002 			else
2003 				dpte_group_bytes[k] = 2048;
2004 		} else {
2005 			vm_group_bytes[k] = 0;
2006 			dpte_group_bytes[k] = 0;
2007 		}
2008 
2009 		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2010 				myPipe[k].SourcePixelFormat == dm_420_12 ||
2011 				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2012 			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2013 					!IsVertical(myPipe[k].SourceRotation)) {
2014 				PTEBufferSizeInRequestsForLuma[k] =
2015 						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2016 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2017 			} else {
2018 				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2019 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2020 			}
2021 
2022 			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2023 					myPipe[k].ViewportStationary,
2024 					myPipe[k].DCCEnable,
2025 					myPipe[k].DPPPerSurface,
2026 					myPipe[k].BlockHeight256BytesC,
2027 					myPipe[k].BlockWidth256BytesC,
2028 					myPipe[k].SourcePixelFormat,
2029 					myPipe[k].SurfaceTiling,
2030 					myPipe[k].BytePerPixelC,
2031 					myPipe[k].SourceRotation,
2032 					SwathWidthC[k],
2033 					myPipe[k].ViewportHeightChroma,
2034 					myPipe[k].ViewportXStartC,
2035 					myPipe[k].ViewportYStartC,
2036 					GPUVMEnable,
2037 					HostVMEnable,
2038 					HostVMMaxNonCachedPageTableLevels,
2039 					GPUVMMaxPageTableLevels,
2040 					GPUVMMinPageSizeKBytes[k],
2041 					HostVMMinPageSize,
2042 					PTEBufferSizeInRequestsForChroma[k],
2043 					myPipe[k].PitchC,
2044 					myPipe[k].DCCMetaPitchC,
2045 					myPipe[k].BlockWidthC,
2046 					myPipe[k].BlockHeightC,
2047 
2048 					/* Output */
2049 					&MetaRowByteC[k],
2050 					&PixelPTEBytesPerRowC[k],
2051 					&dpte_row_width_chroma_ub[k],
2052 					&dpte_row_height_chroma[k],
2053 					&dpte_row_height_linear_chroma[k],
2054 					&PixelPTEBytesPerRowC_one_row_per_frame[k],
2055 					&dpte_row_width_chroma_ub_one_row_per_frame[k],
2056 					&dpte_row_height_chroma_one_row_per_frame[k],
2057 					&meta_req_width_chroma[k],
2058 					&meta_req_height_chroma[k],
2059 					&meta_row_width_chroma[k],
2060 					&meta_row_height_chroma[k],
2061 					&PixelPTEReqWidthC[k],
2062 					&PixelPTEReqHeightC[k],
2063 					&PTERequestSizeC[k],
2064 					&dpde0_bytes_per_frame_ub_c[k],
2065 					&meta_pte_bytes_per_frame_ub_c[k]);
2066 
2067 			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2068 					myPipe[k].VRatioChroma,
2069 					myPipe[k].VTapsChroma,
2070 					myPipe[k].InterlaceEnable,
2071 					myPipe[k].ProgressiveToInterlaceUnitInOPP,
2072 					myPipe[k].SwathHeightC,
2073 					myPipe[k].SourceRotation,
2074 					myPipe[k].ViewportStationary,
2075 					SwathWidthC[k],
2076 					myPipe[k].ViewportHeightChroma,
2077 					myPipe[k].ViewportXStartC,
2078 					myPipe[k].ViewportYStartC,
2079 
2080 					/* Output */
2081 					&VInitPreFillC[k],
2082 					&MaxNumSwathC[k]);
2083 		} else {
2084 			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2085 			PTEBufferSizeInRequestsForChroma[k] = 0;
2086 			PixelPTEBytesPerRowC[k] = 0;
2087 			PDEAndMetaPTEBytesFrameC = 0;
2088 			MetaRowByteC[k] = 0;
2089 			MaxNumSwathC[k] = 0;
2090 			PrefetchSourceLinesC[k] = 0;
2091 			dpte_row_height_chroma_one_row_per_frame[k] = 0;
2092 			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2093 			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2094 		}
2095 
2096 		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2097 				myPipe[k].ViewportStationary,
2098 				myPipe[k].DCCEnable,
2099 				myPipe[k].DPPPerSurface,
2100 				myPipe[k].BlockHeight256BytesY,
2101 				myPipe[k].BlockWidth256BytesY,
2102 				myPipe[k].SourcePixelFormat,
2103 				myPipe[k].SurfaceTiling,
2104 				myPipe[k].BytePerPixelY,
2105 				myPipe[k].SourceRotation,
2106 				SwathWidthY[k],
2107 				myPipe[k].ViewportHeight,
2108 				myPipe[k].ViewportXStart,
2109 				myPipe[k].ViewportYStart,
2110 				GPUVMEnable,
2111 				HostVMEnable,
2112 				HostVMMaxNonCachedPageTableLevels,
2113 				GPUVMMaxPageTableLevels,
2114 				GPUVMMinPageSizeKBytes[k],
2115 				HostVMMinPageSize,
2116 				PTEBufferSizeInRequestsForLuma[k],
2117 				myPipe[k].PitchY,
2118 				myPipe[k].DCCMetaPitchY,
2119 				myPipe[k].BlockWidthY,
2120 				myPipe[k].BlockHeightY,
2121 
2122 				/* Output */
2123 				&MetaRowByteY[k],
2124 				&PixelPTEBytesPerRowY[k],
2125 				&dpte_row_width_luma_ub[k],
2126 				&dpte_row_height_luma[k],
2127 				&dpte_row_height_linear_luma[k],
2128 				&PixelPTEBytesPerRowY_one_row_per_frame[k],
2129 				&dpte_row_width_luma_ub_one_row_per_frame[k],
2130 				&dpte_row_height_luma_one_row_per_frame[k],
2131 				&meta_req_width[k],
2132 				&meta_req_height[k],
2133 				&meta_row_width[k],
2134 				&meta_row_height[k],
2135 				&PixelPTEReqWidthY[k],
2136 				&PixelPTEReqHeightY[k],
2137 				&PTERequestSizeY[k],
2138 				&dpde0_bytes_per_frame_ub_l[k],
2139 				&meta_pte_bytes_per_frame_ub_l[k]);
2140 
2141 		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2142 				myPipe[k].VRatio,
2143 				myPipe[k].VTaps,
2144 				myPipe[k].InterlaceEnable,
2145 				myPipe[k].ProgressiveToInterlaceUnitInOPP,
2146 				myPipe[k].SwathHeightY,
2147 				myPipe[k].SourceRotation,
2148 				myPipe[k].ViewportStationary,
2149 				SwathWidthY[k],
2150 				myPipe[k].ViewportHeight,
2151 				myPipe[k].ViewportXStart,
2152 				myPipe[k].ViewportYStart,
2153 
2154 				/* Output */
2155 				&VInitPreFillY[k],
2156 				&MaxNumSwathY[k]);
2157 
2158 		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2159 		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2160 
2161 		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2162 				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2163 			PTEBufferSizeNotExceeded[k] = true;
2164 		} else {
2165 			PTEBufferSizeNotExceeded[k] = false;
2166 		}
2167 
2168 		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2169 			PTEBufferSizeInRequestsForLuma[k] &&
2170 			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2171 	}
2172 
2173 	dml32_CalculateMALLUseForStaticScreen(
2174 			NumberOfActiveSurfaces,
2175 			MALLAllocatedForDCN,
2176 			UseMALLForStaticScreen,   // mode
2177 			SurfaceSizeInMALL,
2178 			one_row_per_frame_fits_in_buffer,
2179 			/* Output */
2180 			UsesMALLForStaticScreen); // boolen
2181 
2182 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2183 		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2184 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2185 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2186 				(GPUVMMinPageSizeKBytes[k] > 64);
2187 		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2188 	}
2189 
2190 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2191 #ifdef __DML_VBA_DEBUG__
2192 		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2193 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2194 #endif
2195 		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2196 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2197 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2198 				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2199 
2200 		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2201 				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2202 
2203 		if (use_one_row_for_frame[k]) {
2204 			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2205 			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2206 			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2207 			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2208 			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2209 			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2210 			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2211 		}
2212 
2213 		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2214 			DCCMetaBufferSizeNotExceeded[k] = true;
2215 		else
2216 			DCCMetaBufferSizeNotExceeded[k] = false;
2217 
2218 		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2219 		if (use_one_row_for_frame[k])
2220 			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2221 
2222 		dml32_CalculateRowBandwidth(
2223 				GPUVMEnable,
2224 				myPipe[k].SourcePixelFormat,
2225 				myPipe[k].VRatio,
2226 				myPipe[k].VRatioChroma,
2227 				myPipe[k].DCCEnable,
2228 				myPipe[k].HTotal / myPipe[k].PixelClock,
2229 				MetaRowByteY[k], MetaRowByteC[k],
2230 				meta_row_height[k],
2231 				meta_row_height_chroma[k],
2232 				PixelPTEBytesPerRowY[k],
2233 				PixelPTEBytesPerRowC[k],
2234 				dpte_row_height_luma[k],
2235 				dpte_row_height_chroma[k],
2236 
2237 				/* Output */
2238 				&meta_row_bw[k],
2239 				&dpte_row_bw[k]);
2240 #ifdef __DML_VBA_DEBUG__
2241 		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2242 		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2243 				__func__, k, use_one_row_for_frame_flip[k]);
2244 		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2245 				__func__, k, UseMALLForPStateChange[k]);
2246 		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2247 		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2248 				__func__, k, dpte_row_width_luma_ub[k]);
2249 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2250 		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2251 				__func__, k, dpte_row_height_chroma[k]);
2252 		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2253 				__func__, k, dpte_row_width_chroma_ub[k]);
2254 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2255 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2256 		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2257 				__func__, k, PTEBufferSizeNotExceeded[k]);
2258 		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2259 		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2260 #endif
2261 	}
2262 } // CalculateVMRowAndSwath
2263 
2264 unsigned int dml32_CalculateVMAndRowBytes(
2265 		bool ViewportStationary,
2266 		bool DCCEnable,
2267 		unsigned int NumberOfDPPs,
2268 		unsigned int BlockHeight256Bytes,
2269 		unsigned int BlockWidth256Bytes,
2270 		enum source_format_class SourcePixelFormat,
2271 		unsigned int SurfaceTiling,
2272 		unsigned int BytePerPixel,
2273 		enum dm_rotation_angle SourceRotation,
2274 		double SwathWidth,
2275 		unsigned int ViewportHeight,
2276 		unsigned int    ViewportXStart,
2277 		unsigned int    ViewportYStart,
2278 		bool GPUVMEnable,
2279 		bool HostVMEnable,
2280 		unsigned int HostVMMaxNonCachedPageTableLevels,
2281 		unsigned int GPUVMMaxPageTableLevels,
2282 		unsigned int GPUVMMinPageSizeKBytes,
2283 		unsigned int HostVMMinPageSize,
2284 		unsigned int PTEBufferSizeInRequests,
2285 		unsigned int Pitch,
2286 		unsigned int DCCMetaPitch,
2287 		unsigned int MacroTileWidth,
2288 		unsigned int MacroTileHeight,
2289 
2290 		/* Output */
2291 		unsigned int *MetaRowByte,
2292 		unsigned int *PixelPTEBytesPerRow,
2293 		unsigned int    *dpte_row_width_ub,
2294 		unsigned int *dpte_row_height,
2295 		unsigned int *dpte_row_height_linear,
2296 		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2297 		unsigned int    *dpte_row_width_ub_one_row_per_frame,
2298 		unsigned int    *dpte_row_height_one_row_per_frame,
2299 		unsigned int *MetaRequestWidth,
2300 		unsigned int *MetaRequestHeight,
2301 		unsigned int *meta_row_width,
2302 		unsigned int *meta_row_height,
2303 		unsigned int *PixelPTEReqWidth,
2304 		unsigned int *PixelPTEReqHeight,
2305 		unsigned int *PTERequestSize,
2306 		unsigned int    *DPDE0BytesFrame,
2307 		unsigned int    *MetaPTEBytesFrame)
2308 {
2309 	unsigned int MPDEBytesFrame;
2310 	unsigned int DCCMetaSurfaceBytes;
2311 	unsigned int ExtraDPDEBytesFrame;
2312 	unsigned int PDEAndMetaPTEBytesFrame;
2313 	unsigned int HostVMDynamicLevels = 0;
2314 	unsigned int    MacroTileSizeBytes;
2315 	unsigned int    vp_height_meta_ub;
2316 	unsigned int    vp_height_dpte_ub;
2317 	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2318 
2319 	if (GPUVMEnable == true && HostVMEnable == true) {
2320 		if (HostVMMinPageSize < 2048)
2321 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2322 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2323 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2324 		else
2325 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2326 	}
2327 
2328 	*MetaRequestHeight = 8 * BlockHeight256Bytes;
2329 	*MetaRequestWidth = 8 * BlockWidth256Bytes;
2330 	if (SurfaceTiling == dm_sw_linear) {
2331 		*meta_row_height = 32;
2332 		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2333 				- dml_floor(ViewportXStart, *MetaRequestWidth);
2334 	} else if (!IsVertical(SourceRotation)) {
2335 		*meta_row_height = *MetaRequestHeight;
2336 		if (ViewportStationary && NumberOfDPPs == 1) {
2337 			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2338 					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2339 		} else {
2340 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2341 		}
2342 		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2343 	} else {
2344 		*meta_row_height = *MetaRequestWidth;
2345 		if (ViewportStationary && NumberOfDPPs == 1) {
2346 			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2347 					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2348 		} else {
2349 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2350 		}
2351 		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2352 	}
2353 
2354 	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2355 		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2356 				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2357 	} else if (!IsVertical(SourceRotation)) {
2358 		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2359 	} else {
2360 		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2361 	}
2362 
2363 	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2364 
2365 	if (GPUVMEnable == true) {
2366 		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2367 				(8 * 4.0 * 1024), 1) + 1) * 64;
2368 		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2369 	} else {
2370 		*MetaPTEBytesFrame = 0;
2371 		MPDEBytesFrame = 0;
2372 	}
2373 
2374 	if (DCCEnable != true) {
2375 		*MetaPTEBytesFrame = 0;
2376 		MPDEBytesFrame = 0;
2377 		*MetaRowByte = 0;
2378 	}
2379 
2380 	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2381 
2382 	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2383 		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2384 			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2385 					MacroTileHeight - 1, MacroTileHeight) -
2386 					dml_floor(ViewportYStart, MacroTileHeight);
2387 		} else if (!IsVertical(SourceRotation)) {
2388 			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2389 		} else {
2390 			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2391 		}
2392 		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2393 				(8 * 2097152), 1) + 1);
2394 		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2395 	} else {
2396 		*DPDE0BytesFrame = 0;
2397 		ExtraDPDEBytesFrame = 0;
2398 		vp_height_dpte_ub = 0;
2399 	}
2400 
2401 	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2402 
2403 #ifdef __DML_VBA_DEBUG__
2404 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2405 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2406 	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2407 	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2408 	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2409 	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2410 	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2411 	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2412 	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2413 	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2414 	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2415 	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2416 	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2417 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2418 	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2419 	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2420 	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2421 #endif
2422 
2423 	if (HostVMEnable == true)
2424 		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2425 
2426 	if (SurfaceTiling == dm_sw_linear) {
2427 		*PixelPTEReqHeight = 1;
2428 		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2429 		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2430 		*PTERequestSize = 64;
2431 	} else if (GPUVMMinPageSizeKBytes == 4) {
2432 		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2433 		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2434 		*PTERequestSize = 128;
2435 	} else {
2436 		*PixelPTEReqHeight = MacroTileHeight;
2437 		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2438 		*PTERequestSize = 64;
2439 	}
2440 #ifdef __DML_VBA_DEBUG__
2441 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2442 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2443 	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2444 	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2445 	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2446 	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2447 	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2448 #endif
2449 
2450 	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2451 	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2452 			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2453 					(double) *PixelPTEReqWidth;
2454 	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2455 			*PTERequestSize;
2456 
2457 	if (SurfaceTiling == dm_sw_linear) {
2458 		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2459 				*PixelPTEReqWidth / Pitch), 1));
2460 #ifdef __DML_VBA_DEBUG__
2461 		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2462 				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2463 		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2464 				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2465 		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2466 				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2467 		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2468 				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2469 						*PixelPTEReqWidth / Pitch), 1));
2470 		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2471 #endif
2472 		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2473 				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2474 		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2475 
2476 		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2477 		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2478 				PixelPTEReqWidth_linear / Pitch), 1);
2479 		if (*dpte_row_height_linear > 128)
2480 			*dpte_row_height_linear = 128;
2481 
2482 	} else if (!IsVertical(SourceRotation)) {
2483 		*dpte_row_height = *PixelPTEReqHeight;
2484 
2485 		if (GPUVMMinPageSizeKBytes > 64) {
2486 			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2487 					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2488 		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
2489 			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2490 					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2491 					dml_floor(ViewportXStart, *PixelPTEReqWidth);
2492 		} else {
2493 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2494 					*PixelPTEReqWidth;
2495 		}
2496 
2497 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2498 	} else {
2499 		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2500 
2501 		if (ViewportStationary && (NumberOfDPPs == 1)) {
2502 			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2503 					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2504 		} else {
2505 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2506 					* *PixelPTEReqHeight;
2507 		}
2508 
2509 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2510 	}
2511 
2512 	if (GPUVMEnable != true)
2513 		*PixelPTEBytesPerRow = 0;
2514 	if (HostVMEnable == true)
2515 		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2516 
2517 #ifdef __DML_VBA_DEBUG__
2518 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2519 	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2520 	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2521 	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2522 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2523 	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2524 	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2525 	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2526 			__func__, *dpte_row_width_ub_one_row_per_frame);
2527 	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2528 			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
2529 	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2530 			*MetaPTEBytesFrame);
2531 #endif
2532 
2533 	return PDEAndMetaPTEBytesFrame;
2534 } // CalculateVMAndRowBytes
2535 
2536 double dml32_CalculatePrefetchSourceLines(
2537 		double VRatio,
2538 		unsigned int VTaps,
2539 		bool Interlace,
2540 		bool ProgressiveToInterlaceUnitInOPP,
2541 		unsigned int SwathHeight,
2542 		enum dm_rotation_angle SourceRotation,
2543 		bool ViewportStationary,
2544 		double SwathWidth,
2545 		unsigned int ViewportHeight,
2546 		unsigned int ViewportXStart,
2547 		unsigned int ViewportYStart,
2548 
2549 		/* Output */
2550 		double *VInitPreFill,
2551 		unsigned int *MaxNumSwath)
2552 {
2553 
2554 	unsigned int vp_start_rot;
2555 	unsigned int sw0_tmp;
2556 	unsigned int MaxPartialSwath;
2557 	double numLines;
2558 
2559 #ifdef __DML_VBA_DEBUG__
2560 	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2561 	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2562 	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2563 	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2564 	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2565 	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2566 #endif
2567 	if (ProgressiveToInterlaceUnitInOPP)
2568 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2569 	else
2570 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2571 
2572 	if (ViewportStationary) {
2573 		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2574 			vp_start_rot = SwathHeight -
2575 					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2576 		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2577 			vp_start_rot = ViewportXStart;
2578 		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2579 			vp_start_rot = SwathHeight -
2580 					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2581 		} else {
2582 			vp_start_rot = ViewportYStart;
2583 		}
2584 		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2585 		if (sw0_tmp < *VInitPreFill)
2586 			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2587 		else
2588 			*MaxNumSwath = 1;
2589 		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2590 	} else {
2591 		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2592 		if (*VInitPreFill > 1)
2593 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2594 		else
2595 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2596 	}
2597 	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2598 
2599 #ifdef __DML_VBA_DEBUG__
2600 	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2601 	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2602 	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2603 	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2604 	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2605 #endif
2606 	return numLines;
2607 
2608 } // CalculatePrefetchSourceLines
2609 
2610 void dml32_CalculateMALLUseForStaticScreen(
2611 		unsigned int NumberOfActiveSurfaces,
2612 		unsigned int MALLAllocatedForDCNFinal,
2613 		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2614 		unsigned int SurfaceSizeInMALL[],
2615 		bool one_row_per_frame_fits_in_buffer[],
2616 
2617 		/* output */
2618 		bool UsesMALLForStaticScreen[])
2619 {
2620 	unsigned int k;
2621 	unsigned int SurfaceToAddToMALL;
2622 	bool CanAddAnotherSurfaceToMALL;
2623 	unsigned int TotalSurfaceSizeInMALL;
2624 
2625 	TotalSurfaceSizeInMALL = 0;
2626 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2627 		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2628 		if (UsesMALLForStaticScreen[k])
2629 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2630 #ifdef __DML_VBA_DEBUG__
2631 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2632 		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2633 #endif
2634 	}
2635 
2636 	SurfaceToAddToMALL = 0;
2637 	CanAddAnotherSurfaceToMALL = true;
2638 	while (CanAddAnotherSurfaceToMALL) {
2639 		CanAddAnotherSurfaceToMALL = false;
2640 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2641 			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2642 					!UsesMALLForStaticScreen[k] &&
2643 					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2644 					one_row_per_frame_fits_in_buffer[k] &&
2645 					(!CanAddAnotherSurfaceToMALL ||
2646 					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2647 				CanAddAnotherSurfaceToMALL = true;
2648 				SurfaceToAddToMALL = k;
2649 #ifdef __DML_VBA_DEBUG__
2650 				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2651 						__func__, k, UseMALLForStaticScreen[k]);
2652 #endif
2653 			}
2654 		}
2655 		if (CanAddAnotherSurfaceToMALL) {
2656 			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2657 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2658 
2659 #ifdef __DML_VBA_DEBUG__
2660 			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2661 			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2662 #endif
2663 
2664 		}
2665 	}
2666 }
2667 
2668 void dml32_CalculateRowBandwidth(
2669 		bool GPUVMEnable,
2670 		enum source_format_class SourcePixelFormat,
2671 		double VRatio,
2672 		double VRatioChroma,
2673 		bool DCCEnable,
2674 		double LineTime,
2675 		unsigned int MetaRowByteLuma,
2676 		unsigned int MetaRowByteChroma,
2677 		unsigned int meta_row_height_luma,
2678 		unsigned int meta_row_height_chroma,
2679 		unsigned int PixelPTEBytesPerRowLuma,
2680 		unsigned int PixelPTEBytesPerRowChroma,
2681 		unsigned int dpte_row_height_luma,
2682 		unsigned int dpte_row_height_chroma,
2683 		/* Output */
2684 		double *meta_row_bw,
2685 		double *dpte_row_bw)
2686 {
2687 	if (DCCEnable != true) {
2688 		*meta_row_bw = 0;
2689 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2690 			SourcePixelFormat == dm_rgbe_alpha) {
2691 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2692 				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2693 	} else {
2694 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2695 	}
2696 
2697 	if (GPUVMEnable != true) {
2698 		*dpte_row_bw = 0;
2699 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2700 			SourcePixelFormat == dm_rgbe_alpha) {
2701 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2702 				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2703 	} else {
2704 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2705 	}
2706 }
2707 
2708 double dml32_CalculateUrgentLatency(
2709 		double UrgentLatencyPixelDataOnly,
2710 		double UrgentLatencyPixelMixedWithVMData,
2711 		double UrgentLatencyVMDataOnly,
2712 		bool   DoUrgentLatencyAdjustment,
2713 		double UrgentLatencyAdjustmentFabricClockComponent,
2714 		double UrgentLatencyAdjustmentFabricClockReference,
2715 		double FabricClock)
2716 {
2717 	double   ret;
2718 
2719 	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2720 	if (DoUrgentLatencyAdjustment == true) {
2721 		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2722 				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2723 	}
2724 	return ret;
2725 }
2726 
2727 void dml32_CalculateUrgentBurstFactor(
2728 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2729 		unsigned int    swath_width_luma_ub,
2730 		unsigned int    swath_width_chroma_ub,
2731 		unsigned int SwathHeightY,
2732 		unsigned int SwathHeightC,
2733 		double  LineTime,
2734 		double  UrgentLatency,
2735 		double  CursorBufferSize,
2736 		unsigned int CursorWidth,
2737 		unsigned int CursorBPP,
2738 		double  VRatio,
2739 		double  VRatioC,
2740 		double  BytePerPixelInDETY,
2741 		double  BytePerPixelInDETC,
2742 		unsigned int    DETBufferSizeY,
2743 		unsigned int    DETBufferSizeC,
2744 		/* Output */
2745 		double *UrgentBurstFactorCursor,
2746 		double *UrgentBurstFactorLuma,
2747 		double *UrgentBurstFactorChroma,
2748 		bool   *NotEnoughUrgentLatencyHiding)
2749 {
2750 	double       LinesInDETLuma;
2751 	double       LinesInDETChroma;
2752 	unsigned int LinesInCursorBuffer;
2753 	double       CursorBufferSizeInTime;
2754 	double       DETBufferSizeInTimeLuma;
2755 	double       DETBufferSizeInTimeChroma;
2756 
2757 	*NotEnoughUrgentLatencyHiding = 0;
2758 
2759 	if (CursorWidth > 0) {
2760 		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2761 				(CursorWidth * CursorBPP / 8.0)), 1.0);
2762 		if (VRatio > 0) {
2763 			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2764 			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2765 				*NotEnoughUrgentLatencyHiding = 1;
2766 				*UrgentBurstFactorCursor = 0;
2767 			} else {
2768 				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
2769 						(CursorBufferSizeInTime - UrgentLatency);
2770 			}
2771 		} else {
2772 			*UrgentBurstFactorCursor = 1;
2773 		}
2774 	}
2775 
2776 	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2777 			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2778 
2779 	if (VRatio > 0) {
2780 		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2781 		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2782 			*NotEnoughUrgentLatencyHiding = 1;
2783 			*UrgentBurstFactorLuma = 0;
2784 		} else {
2785 			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2786 		}
2787 	} else {
2788 		*UrgentBurstFactorLuma = 1;
2789 	}
2790 
2791 	if (BytePerPixelInDETC > 0) {
2792 		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2793 					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2794 					/ swath_width_chroma_ub;
2795 
2796 		if (VRatio > 0) {
2797 			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2798 			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2799 				*NotEnoughUrgentLatencyHiding = 1;
2800 				*UrgentBurstFactorChroma = 0;
2801 			} else {
2802 				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2803 						/ (DETBufferSizeInTimeChroma - UrgentLatency);
2804 			}
2805 		} else {
2806 			*UrgentBurstFactorChroma = 1;
2807 		}
2808 	}
2809 } // CalculateUrgentBurstFactor
2810 
2811 void dml32_CalculateDCFCLKDeepSleep(
2812 		unsigned int NumberOfActiveSurfaces,
2813 		unsigned int BytePerPixelY[],
2814 		unsigned int BytePerPixelC[],
2815 		double VRatio[],
2816 		double VRatioChroma[],
2817 		double SwathWidthY[],
2818 		double SwathWidthC[],
2819 		unsigned int DPPPerSurface[],
2820 		double HRatio[],
2821 		double HRatioChroma[],
2822 		double PixelClock[],
2823 		double PSCL_THROUGHPUT[],
2824 		double PSCL_THROUGHPUT_CHROMA[],
2825 		double Dppclk[],
2826 		double ReadBandwidthLuma[],
2827 		double ReadBandwidthChroma[],
2828 		unsigned int ReturnBusWidth,
2829 
2830 		/* Output */
2831 		double *DCFClkDeepSleep)
2832 {
2833 	unsigned int k;
2834 	double   DisplayPipeLineDeliveryTimeLuma;
2835 	double   DisplayPipeLineDeliveryTimeChroma;
2836 	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2837 	double ReadBandwidth = 0.0;
2838 
2839 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2840 
2841 		if (VRatio[k] <= 1) {
2842 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2843 					/ PixelClock[k];
2844 		} else {
2845 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2846 		}
2847 		if (BytePerPixelC[k] == 0) {
2848 			DisplayPipeLineDeliveryTimeChroma = 0;
2849 		} else {
2850 			if (VRatioChroma[k] <= 1) {
2851 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2852 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2853 			} else {
2854 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2855 						/ Dppclk[k];
2856 			}
2857 		}
2858 
2859 		if (BytePerPixelC[k] > 0) {
2860 			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2861 					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2862 					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2863 					32.0 / DisplayPipeLineDeliveryTimeChroma);
2864 		} else {
2865 			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2866 					64.0 / DisplayPipeLineDeliveryTimeLuma;
2867 		}
2868 		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2869 
2870 #ifdef __DML_VBA_DEBUG__
2871 		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2872 		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2873 #endif
2874 	}
2875 
2876 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2877 		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2878 
2879 	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2880 
2881 #ifdef __DML_VBA_DEBUG__
2882 	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2883 	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2884 	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2885 	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2886 #endif
2887 
2888 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2889 		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2890 #ifdef __DML_VBA_DEBUG__
2891 	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2892 #endif
2893 } // CalculateDCFCLKDeepSleep
2894 
2895 double dml32_CalculateWriteBackDelay(
2896 		enum source_format_class WritebackPixelFormat,
2897 		double WritebackHRatio,
2898 		double WritebackVRatio,
2899 		unsigned int WritebackVTaps,
2900 		unsigned int         WritebackDestinationWidth,
2901 		unsigned int         WritebackDestinationHeight,
2902 		unsigned int         WritebackSourceHeight,
2903 		unsigned int HTotal)
2904 {
2905 	double CalculateWriteBackDelay;
2906 	double Line_length;
2907 	double Output_lines_last_notclamped;
2908 	double WritebackVInit;
2909 
2910 	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2911 	Line_length = dml_max((double) WritebackDestinationWidth,
2912 			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2913 	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2914 			dml_ceil(((double)WritebackSourceHeight -
2915 					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2916 	if (Output_lines_last_notclamped < 0) {
2917 		CalculateWriteBackDelay = 0;
2918 	} else {
2919 		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2920 				(HTotal - WritebackDestinationWidth) + 80;
2921 	}
2922 	return CalculateWriteBackDelay;
2923 }
2924 
2925 void dml32_UseMinimumDCFCLK(
2926 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2927 		bool DRRDisplay[],
2928 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2929 		unsigned int MaxInterDCNTileRepeaters,
2930 		unsigned int MaxPrefetchMode,
2931 		double DRAMClockChangeLatencyFinal,
2932 		double FCLKChangeLatency,
2933 		double SREnterPlusExitTime,
2934 		unsigned int ReturnBusWidth,
2935 		unsigned int RoundTripPingLatencyCycles,
2936 		unsigned int ReorderingBytes,
2937 		unsigned int PixelChunkSizeInKByte,
2938 		unsigned int MetaChunkSize,
2939 		bool GPUVMEnable,
2940 		unsigned int GPUVMMaxPageTableLevels,
2941 		bool HostVMEnable,
2942 		unsigned int NumberOfActiveSurfaces,
2943 		double HostVMMinPageSize,
2944 		unsigned int HostVMMaxNonCachedPageTableLevels,
2945 		bool DynamicMetadataVMEnabled,
2946 		bool ImmediateFlipRequirement,
2947 		bool ProgressiveToInterlaceUnitInOPP,
2948 		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2949 		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2950 		unsigned int VTotal[],
2951 		unsigned int VActive[],
2952 		unsigned int DynamicMetadataTransmittedBytes[],
2953 		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2954 		bool Interlace[],
2955 		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2956 		double RequiredDISPCLK[][2],
2957 		double UrgLatency[],
2958 		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2959 		double ProjectedDCFClkDeepSleep[][2],
2960 		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2961 		unsigned int TotalNumberOfActiveDPP[][2],
2962 		unsigned int TotalNumberOfDCCActiveDPP[][2],
2963 		unsigned int dpte_group_bytes[],
2964 		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2965 		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2966 		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2967 		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2968 		unsigned int BytePerPixelY[],
2969 		unsigned int BytePerPixelC[],
2970 		unsigned int HTotal[],
2971 		double PixelClock[],
2972 		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2973 		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2974 		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2975 		bool DynamicMetadataEnable[],
2976 		double ReadBandwidthLuma[],
2977 		double ReadBandwidthChroma[],
2978 		double DCFCLKPerState[],
2979 		/* Output */
2980 		double DCFCLKState[][2])
2981 {
2982 	unsigned int i, j, k;
2983 	unsigned int     dummy1;
2984 	double dummy2, dummy3;
2985 	double   NormalEfficiency;
2986 	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2987 
2988 	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2989 	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2990 		for  (j = 0; j <= 1; ++j) {
2991 			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2992 			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2993 			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2994 			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2995 			double MinimumTWait = 0.0;
2996 			double DPTEBandwidth;
2997 			double DCFCLKRequiredForAverageBandwidth;
2998 			unsigned int ExtraLatencyBytes;
2999 			double ExtraLatencyCycles;
3000 			double DCFCLKRequiredForPeakBandwidth;
3001 			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
3002 			double MinimumTvmPlus2Tr0;
3003 
3004 			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
3005 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3006 				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3007 						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3008 								/ (15.75 * HTotal[k] / PixelClock[k]);
3009 			}
3010 
3011 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3012 				NoOfDPPState[k] = NoOfDPP[i][j][k];
3013 
3014 			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3015 			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3016 
3017 			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3018 					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3019 					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3020 					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3021 					HostVMMaxNonCachedPageTableLevels);
3022 			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3023 					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3024 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3025 				double DCFCLKCyclesRequiredInPrefetch;
3026 				double PrefetchTime;
3027 
3028 				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3029 						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3030 						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3031 								* BytePerPixelC[k]) / NormalEfficiency
3032 						/ ReturnBusWidth;
3033 				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3034 						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3035 								/ NormalEfficiency / ReturnBusWidth
3036 								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3037 						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3038 								/ ReturnBusWidth
3039 						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3040 						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
3041 				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3042 						* HTotal[k] / PixelClock[k];
3043 				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3044 						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3045 						UrgLatency[i] * GPUVMMaxPageTableLevels *
3046 						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3047 
3048 				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3049 						UseMALLForPStateChange[k],
3050 						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3051 						DRRDisplay[k],
3052 						DRAMClockChangeLatencyFinal,
3053 						FCLKChangeLatency,
3054 						UrgLatency[i],
3055 						SREnterPlusExitTime);
3056 
3057 				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3058 						MinimumTWait - UrgLatency[i] *
3059 						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3060 						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3061 						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3062 						DynamicMetadataVMExtraLatency[k];
3063 
3064 				if (PrefetchTime > 0) {
3065 					double ExpectedVRatioPrefetch;
3066 
3067 					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3068 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3069 							DCFCLKCyclesRequiredInPrefetch);
3070 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3071 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3072 							PrefetchPixelLinesTime[k] *
3073 							dml_max(1.0, ExpectedVRatioPrefetch) *
3074 							dml_max(1.0, ExpectedVRatioPrefetch / 4);
3075 					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3076 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3077 								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3078 								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3079 								NormalEfficiency / ReturnBusWidth;
3080 					}
3081 				} else {
3082 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3083 				}
3084 				if (DynamicMetadataEnable[k] == true) {
3085 					double TSetupPipe;
3086 					double TdmbfPipe;
3087 					double TdmsksPipe;
3088 					double TdmecPipe;
3089 					double AllowedTimeForUrgentExtraLatency;
3090 
3091 					dml32_CalculateVUpdateAndDynamicMetadataParameters(
3092 							MaxInterDCNTileRepeaters,
3093 							RequiredDPPCLKPerSurface[i][j][k],
3094 							RequiredDISPCLK[i][j],
3095 							ProjectedDCFClkDeepSleep[i][j],
3096 							PixelClock[k],
3097 							HTotal[k],
3098 							VTotal[k] - VActive[k],
3099 							DynamicMetadataTransmittedBytes[k],
3100 							DynamicMetadataLinesBeforeActiveRequired[k],
3101 							Interlace[k],
3102 							ProgressiveToInterlaceUnitInOPP,
3103 
3104 							/* output */
3105 							&TSetupPipe,
3106 							&TdmbfPipe,
3107 							&TdmecPipe,
3108 							&TdmsksPipe,
3109 							&dummy1,
3110 							&dummy2,
3111 							&dummy3);
3112 					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3113 							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3114 							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3115 					if (AllowedTimeForUrgentExtraLatency > 0)
3116 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3117 								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3118 								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3119 					else
3120 						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3121 				}
3122 			}
3123 			DCFCLKRequiredForPeakBandwidth = 0;
3124 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3125 				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3126 						DCFCLKRequiredForPeakBandwidthPerSurface[k];
3127 			}
3128 			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3129 					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3130 					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3131 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3132 				double MaximumTvmPlus2Tr0PlusTsw;
3133 
3134 				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3135 						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3136 				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3137 					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3138 				} else {
3139 					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3140 							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3141 								MinimumTvmPlus2Tr0 -
3142 								PrefetchPixelLinesTime[k] / 4),
3143 							(2 * ExtraLatencyCycles +
3144 								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3145 								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3146 				}
3147 			}
3148 			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3149 					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3150 		}
3151 	}
3152 }
3153 
3154 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3155 		unsigned int TotalNumberOfActiveDPP,
3156 		unsigned int PixelChunkSizeInKByte,
3157 		unsigned int TotalNumberOfDCCActiveDPP,
3158 		unsigned int MetaChunkSize,
3159 		bool GPUVMEnable,
3160 		bool HostVMEnable,
3161 		unsigned int NumberOfActiveSurfaces,
3162 		unsigned int NumberOfDPP[],
3163 		unsigned int dpte_group_bytes[],
3164 		double HostVMInefficiencyFactor,
3165 		double HostVMMinPageSize,
3166 		unsigned int HostVMMaxNonCachedPageTableLevels)
3167 {
3168 	unsigned int k;
3169 	double   ret;
3170 	unsigned int  HostVMDynamicLevels;
3171 
3172 	if (GPUVMEnable == true && HostVMEnable == true) {
3173 		if (HostVMMinPageSize < 2048)
3174 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3175 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3176 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3177 		else
3178 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3179 	} else {
3180 		HostVMDynamicLevels = 0;
3181 	}
3182 
3183 	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3184 			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3185 
3186 	if (GPUVMEnable == true) {
3187 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3188 			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3189 					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3190 		}
3191 	}
3192 	return ret;
3193 }
3194 
3195 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3196 		unsigned int MaxInterDCNTileRepeaters,
3197 		double Dppclk,
3198 		double Dispclk,
3199 		double DCFClkDeepSleep,
3200 		double PixelClock,
3201 		unsigned int HTotal,
3202 		unsigned int VBlank,
3203 		unsigned int DynamicMetadataTransmittedBytes,
3204 		unsigned int DynamicMetadataLinesBeforeActiveRequired,
3205 		unsigned int InterlaceEnable,
3206 		bool ProgressiveToInterlaceUnitInOPP,
3207 
3208 		/* output */
3209 		double *TSetup,
3210 		double *Tdmbf,
3211 		double *Tdmec,
3212 		double *Tdmsks,
3213 		unsigned int *VUpdateOffsetPix,
3214 		double *VUpdateWidthPix,
3215 		double *VReadyOffsetPix)
3216 {
3217 	double TotalRepeaterDelayTime;
3218 
3219 	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3220 	*VUpdateWidthPix  =
3221 			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3222 	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3223 			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3224 	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3225 	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3226 	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3227 	*Tdmec = HTotal / PixelClock;
3228 
3229 	if (DynamicMetadataLinesBeforeActiveRequired == 0)
3230 		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3231 	else
3232 		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3233 
3234 	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3235 		*Tdmsks = *Tdmsks / 2;
3236 #ifdef __DML_VBA_DEBUG__
3237 	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3238 	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3239 	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3240 
3241 	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3242 			__func__, DynamicMetadataLinesBeforeActiveRequired);
3243 	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3244 	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3245 	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3246 	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3247 #endif
3248 }
3249 
3250 double dml32_CalculateTWait(
3251 		unsigned int PrefetchMode,
3252 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3253 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3254 		bool DRRDisplay,
3255 		double DRAMClockChangeLatency,
3256 		double FCLKChangeLatency,
3257 		double UrgentLatency,
3258 		double SREnterPlusExitTime)
3259 {
3260 	double TWait = 0.0;
3261 
3262 	if (PrefetchMode == 0 &&
3263 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3264 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3265 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3266 			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3267 		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3268 	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3269 		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3270 	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3271 		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3272 	} else {
3273 		TWait = UrgentLatency;
3274 	}
3275 
3276 #ifdef __DML_VBA_DEBUG__
3277 	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3278 	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3279 #endif
3280 	return TWait;
3281 } // CalculateTWait
3282 
3283 // Function: get_return_bw_mbps
3284 // Megabyte per second
3285 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3286 		const int VoltageLevel,
3287 		const bool HostVMEnable,
3288 		const double DCFCLK,
3289 		const double FabricClock,
3290 		const double DRAMSpeed)
3291 {
3292 	double ReturnBW = 0.;
3293 	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3294 	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3295 	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3296 	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3297 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3298 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3299 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3300 	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3301 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3302 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3303 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3304 
3305 	if (HostVMEnable != true)
3306 		ReturnBW = PixelDataOnlyReturnBW;
3307 	else
3308 		ReturnBW = PixelMixedWithVMDataReturnBW;
3309 
3310 #ifdef __DML_VBA_DEBUG__
3311 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3312 	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3313 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3314 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3315 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3316 	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3317 	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3318 	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3319 	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3320 	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3321 	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3322 #endif
3323 	return ReturnBW;
3324 }
3325 
3326 // Function: get_return_bw_mbps_vm_only
3327 // Megabyte per second
3328 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3329 		const int VoltageLevel,
3330 		const double DCFCLK,
3331 		const double FabricClock,
3332 		const double DRAMSpeed)
3333 {
3334 	double VMDataOnlyReturnBW = dml_min3(
3335 			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3336 			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3337 					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3338 			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3339 					* (VoltageLevel < 2 ?
3340 							soc->pct_ideal_dram_bw_after_urgent_strobe :
3341 							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3342 #ifdef __DML_VBA_DEBUG__
3343 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3344 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3345 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3346 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3347 	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3348 #endif
3349 	return VMDataOnlyReturnBW;
3350 }
3351 
3352 double dml32_CalculateExtraLatency(
3353 		unsigned int RoundTripPingLatencyCycles,
3354 		unsigned int ReorderingBytes,
3355 		double DCFCLK,
3356 		unsigned int TotalNumberOfActiveDPP,
3357 		unsigned int PixelChunkSizeInKByte,
3358 		unsigned int TotalNumberOfDCCActiveDPP,
3359 		unsigned int MetaChunkSize,
3360 		double ReturnBW,
3361 		bool GPUVMEnable,
3362 		bool HostVMEnable,
3363 		unsigned int NumberOfActiveSurfaces,
3364 		unsigned int NumberOfDPP[],
3365 		unsigned int dpte_group_bytes[],
3366 		double HostVMInefficiencyFactor,
3367 		double HostVMMinPageSize,
3368 		unsigned int HostVMMaxNonCachedPageTableLevels)
3369 {
3370 	double ExtraLatencyBytes;
3371 	double ExtraLatency;
3372 
3373 	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3374 			ReorderingBytes,
3375 			TotalNumberOfActiveDPP,
3376 			PixelChunkSizeInKByte,
3377 			TotalNumberOfDCCActiveDPP,
3378 			MetaChunkSize,
3379 			GPUVMEnable,
3380 			HostVMEnable,
3381 			NumberOfActiveSurfaces,
3382 			NumberOfDPP,
3383 			dpte_group_bytes,
3384 			HostVMInefficiencyFactor,
3385 			HostVMMinPageSize,
3386 			HostVMMaxNonCachedPageTableLevels);
3387 
3388 	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3389 
3390 #ifdef __DML_VBA_DEBUG__
3391 	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3392 	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3393 	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3394 	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3395 	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3396 #endif
3397 
3398 	return ExtraLatency;
3399 } // CalculateExtraLatency
3400 
3401 bool dml32_CalculatePrefetchSchedule(
3402 		struct vba_vars_st *v,
3403 		unsigned int k,
3404 		double HostVMInefficiencyFactor,
3405 		DmlPipe *myPipe,
3406 		unsigned int DSCDelay,
3407 		unsigned int DPP_RECOUT_WIDTH,
3408 		unsigned int VStartup,
3409 		unsigned int MaxVStartup,
3410 		double UrgentLatency,
3411 		double UrgentExtraLatency,
3412 		double TCalc,
3413 		unsigned int PDEAndMetaPTEBytesFrame,
3414 		unsigned int MetaRowByte,
3415 		unsigned int PixelPTEBytesPerRow,
3416 		double PrefetchSourceLinesY,
3417 		unsigned int SwathWidthY,
3418 		unsigned int VInitPreFillY,
3419 		unsigned int MaxNumSwathY,
3420 		double PrefetchSourceLinesC,
3421 		unsigned int SwathWidthC,
3422 		unsigned int VInitPreFillC,
3423 		unsigned int MaxNumSwathC,
3424 		unsigned int swath_width_luma_ub,
3425 		unsigned int swath_width_chroma_ub,
3426 		unsigned int SwathHeightY,
3427 		unsigned int SwathHeightC,
3428 		double TWait,
3429 		double TPreReq,
3430 		/* Output */
3431 		double   *DSTXAfterScaler,
3432 		double   *DSTYAfterScaler,
3433 		double *DestinationLinesForPrefetch,
3434 		double *PrefetchBandwidth,
3435 		double *DestinationLinesToRequestVMInVBlank,
3436 		double *DestinationLinesToRequestRowInVBlank,
3437 		double *VRatioPrefetchY,
3438 		double *VRatioPrefetchC,
3439 		double *RequiredPrefetchPixDataBWLuma,
3440 		double *RequiredPrefetchPixDataBWChroma,
3441 		bool   *NotEnoughTimeForDynamicMetadata,
3442 		double *Tno_bw,
3443 		double *prefetch_vmrow_bw,
3444 		double *Tdmdl_vm,
3445 		double *Tdmdl,
3446 		double *TSetup,
3447 		unsigned int   *VUpdateOffsetPix,
3448 		double   *VUpdateWidthPix,
3449 		double   *VReadyOffsetPix)
3450 {
3451 	double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3452 	bool MyError = false;
3453 	unsigned int DPPCycles, DISPCLKCycles;
3454 	double DSTTotalPixelsAfterScaler;
3455 	double LineTime;
3456 	double dst_y_prefetch_equ;
3457 	double prefetch_bw_oto;
3458 	double Tvm_oto;
3459 	double Tr0_oto;
3460 	double Tvm_oto_lines;
3461 	double Tr0_oto_lines;
3462 	double dst_y_prefetch_oto;
3463 	double TimeForFetchingMetaPTE = 0;
3464 	double TimeForFetchingRowInVBlank = 0;
3465 	double LinesToRequestPrefetchPixelData = 0;
3466 	unsigned int HostVMDynamicLevelsTrips;
3467 	double  trip_to_mem;
3468 	double  Tvm_trips;
3469 	double  Tr0_trips;
3470 	double  Tvm_trips_rounded;
3471 	double  Tr0_trips_rounded;
3472 	double  Lsw_oto;
3473 	double  Tpre_rounded;
3474 	double  prefetch_bw_equ;
3475 	double  Tvm_equ;
3476 	double  Tr0_equ;
3477 	double  Tdmbf;
3478 	double  Tdmec;
3479 	double  Tdmsks;
3480 	double  prefetch_sw_bytes;
3481 	double  bytes_pp;
3482 	double  dep_bytes;
3483 	unsigned int max_vratio_pre = v->MaxVRatioPre;
3484 	double  min_Lsw;
3485 	double  Tsw_est1 = 0;
3486 	double  Tsw_est3 = 0;
3487 
3488 	if (v->GPUVMEnable == true && v->HostVMEnable == true)
3489 		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3490 	else
3491 		HostVMDynamicLevelsTrips = 0;
3492 #ifdef __DML_VBA_DEBUG__
3493 	dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3494 	dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3495 	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3496 	dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3497 			__func__, v->HostVMEnable, HostVMInefficiencyFactor);
3498 #endif
3499 	dml32_CalculateVUpdateAndDynamicMetadataParameters(
3500 			v->MaxInterDCNTileRepeaters,
3501 			myPipe->Dppclk,
3502 			myPipe->Dispclk,
3503 			myPipe->DCFClkDeepSleep,
3504 			myPipe->PixelClock,
3505 			myPipe->HTotal,
3506 			myPipe->VBlank,
3507 			v->DynamicMetadataTransmittedBytes[k],
3508 			v->DynamicMetadataLinesBeforeActiveRequired[k],
3509 			myPipe->InterlaceEnable,
3510 			myPipe->ProgressiveToInterlaceUnitInOPP,
3511 			TSetup,
3512 
3513 			/* output */
3514 			&Tdmbf,
3515 			&Tdmec,
3516 			&Tdmsks,
3517 			VUpdateOffsetPix,
3518 			VUpdateWidthPix,
3519 			VReadyOffsetPix);
3520 
3521 	LineTime = myPipe->HTotal / myPipe->PixelClock;
3522 	trip_to_mem = UrgentLatency;
3523 	Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3524 
3525 	if (v->DynamicMetadataVMEnabled == true)
3526 		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
3527 	else
3528 		*Tdmdl = TWait + UrgentExtraLatency;
3529 
3530 #ifdef __DML_VBA_ALLOW_DELTA__
3531 	if (v->DynamicMetadataEnable[k] == false)
3532 		*Tdmdl = 0.0;
3533 #endif
3534 
3535 	if (v->DynamicMetadataEnable[k] == true) {
3536 		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3537 			*NotEnoughTimeForDynamicMetadata = true;
3538 #ifdef __DML_VBA_DEBUG__
3539 			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3540 			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3541 					__func__, Tdmbf);
3542 			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3543 			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3544 					__func__, Tdmsks);
3545 			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3546 					__func__, *Tdmdl);
3547 #endif
3548 		} else {
3549 			*NotEnoughTimeForDynamicMetadata = false;
3550 		}
3551 	} else {
3552 		*NotEnoughTimeForDynamicMetadata = false;
3553 	}
3554 
3555 	*Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3556 			v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3557 
3558 	if (myPipe->ScalerEnabled)
3559 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3560 	else
3561 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3562 
3563 	DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3564 
3565 	DISPCLKCycles = v->DISPCLKDelaySubtotal;
3566 
3567 	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3568 		return true;
3569 
3570 	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3571 			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3572 
3573 	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3574 			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3575 			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3576 					myPipe->HActive / 2 : 0)
3577 			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3578 
3579 #ifdef __DML_VBA_DEBUG__
3580 	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3581 	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3582 	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3583 	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3584 	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3585 	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3586 	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3587 	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3588 	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3589 #endif
3590 
3591 	if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3592 		*DSTYAfterScaler = 1;
3593 	else
3594 		*DSTYAfterScaler = 0;
3595 
3596 	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3597 	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3598 	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3599 #ifdef __DML_VBA_DEBUG__
3600 	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3601 	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3602 #endif
3603 
3604 	MyError = false;
3605 
3606 	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3607 
3608 	if (v->GPUVMEnable == true) {
3609 		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3610 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3611 		if (v->GPUVMMaxPageTableLevels >= 3) {
3612 			*Tno_bw = UrgentExtraLatency + trip_to_mem *
3613 					(double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3614 		} else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3615 			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3616 					4.0 * LineTime; // VBA_ERROR
3617 			*Tno_bw = UrgentExtraLatency;
3618 		} else {
3619 			*Tno_bw = 0;
3620 		}
3621 	} else if (myPipe->DCCEnable == true) {
3622 		Tvm_trips_rounded = LineTime / 4.0;
3623 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3624 		*Tno_bw = 0;
3625 	} else {
3626 		Tvm_trips_rounded = LineTime / 4.0;
3627 		Tr0_trips_rounded = LineTime / 2.0;
3628 		*Tno_bw = 0;
3629 	}
3630 	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3631 	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3632 
3633 	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3634 			|| myPipe->SourcePixelFormat == dm_420_12) {
3635 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3636 	} else {
3637 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3638 	}
3639 
3640 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3641 			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3642 	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3643 			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3644 
3645 	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3646 	min_Lsw = dml_max(min_Lsw, 1.0);
3647 	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3648 
3649 	if (v->GPUVMEnable == true) {
3650 		Tvm_oto = dml_max3(
3651 				Tvm_trips,
3652 				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3653 				LineTime / 4.0);
3654 	} else
3655 		Tvm_oto = LineTime / 4.0;
3656 
3657 	if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3658 		Tr0_oto = dml_max4(
3659 				Tr0_trips,
3660 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3661 				(LineTime - Tvm_oto)/2.0,
3662 				LineTime / 4.0);
3663 #ifdef __DML_VBA_DEBUG__
3664 		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3665 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3666 		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3667 		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3668 		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3669 #endif
3670 	} else
3671 		Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3672 
3673 	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3674 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3675 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3676 
3677 	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3678 			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3679 
3680 	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3681 #ifdef __DML_VBA_DEBUG__
3682 	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3683 	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3684 	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3685 	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3686 	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3687 	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3688 	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3689 	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3690 	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3691 	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3692 	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3693 	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3694 	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3695 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3696 	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3697 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3698 	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3699 	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3700 	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3701 	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3702 	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3703 	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3704 	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3705 	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3706 	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3707 	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3708 	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3709 #endif
3710 
3711 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3712 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
3713 #ifdef __DML_VBA_DEBUG__
3714 	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3715 	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3716 	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3717 	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3718 			__func__, VStartup * LineTime);
3719 	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3720 	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3721 	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3722 	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3723 	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3724 	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3725 	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3726 			__func__, *DSTYAfterScaler);
3727 #endif
3728 	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3729 			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3730 
3731 	if (prefetch_sw_bytes < dep_bytes)
3732 		prefetch_sw_bytes = 2 * dep_bytes;
3733 
3734 	*PrefetchBandwidth = 0;
3735 	*DestinationLinesToRequestVMInVBlank = 0;
3736 	*DestinationLinesToRequestRowInVBlank = 0;
3737 	*VRatioPrefetchY = 0;
3738 	*VRatioPrefetchC = 0;
3739 	*RequiredPrefetchPixDataBWLuma = 0;
3740 	if (dst_y_prefetch_equ > 1 &&
3741 			(Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3742 		double PrefetchBandwidth1;
3743 		double PrefetchBandwidth2;
3744 		double PrefetchBandwidth3;
3745 		double PrefetchBandwidth4;
3746 
3747 		if (Tpre_rounded - *Tno_bw > 0) {
3748 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3749 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3750 					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3751 			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3752 		} else
3753 			PrefetchBandwidth1 = 0;
3754 
3755 		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3756 				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3757 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3758 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3759 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3760 		}
3761 
3762 		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3763 			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3764 			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3765 		else
3766 			PrefetchBandwidth2 = 0;
3767 
3768 		if (Tpre_rounded - Tvm_trips_rounded > 0) {
3769 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3770 					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3771 			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3772 		} else
3773 			PrefetchBandwidth3 = 0;
3774 
3775 
3776 		if (VStartup == MaxVStartup &&
3777 				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3778 				LineTime - Tvm_trips_rounded > 0) {
3779 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3780 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3781 		}
3782 
3783 		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3784 			PrefetchBandwidth4 = prefetch_sw_bytes /
3785 					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3786 		} else {
3787 			PrefetchBandwidth4 = 0;
3788 		}
3789 
3790 #ifdef __DML_VBA_DEBUG__
3791 		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3792 		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3793 		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3794 		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3795 		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3796 		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3797 		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3798 		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3799 		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3800 #endif
3801 		{
3802 			bool Case1OK;
3803 			bool Case2OK;
3804 			bool Case3OK;
3805 
3806 			if (PrefetchBandwidth1 > 0) {
3807 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3808 						>= Tvm_trips_rounded
3809 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3810 								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
3811 					Case1OK = true;
3812 				} else {
3813 					Case1OK = false;
3814 				}
3815 			} else {
3816 				Case1OK = false;
3817 			}
3818 
3819 			if (PrefetchBandwidth2 > 0) {
3820 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3821 						>= Tvm_trips_rounded
3822 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3823 						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
3824 					Case2OK = true;
3825 				} else {
3826 					Case2OK = false;
3827 				}
3828 			} else {
3829 				Case2OK = false;
3830 			}
3831 
3832 			if (PrefetchBandwidth3 > 0) {
3833 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3834 						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3835 								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3836 								Tr0_trips_rounded) {
3837 					Case3OK = true;
3838 				} else {
3839 					Case3OK = false;
3840 				}
3841 			} else {
3842 				Case3OK = false;
3843 			}
3844 
3845 			if (Case1OK)
3846 				prefetch_bw_equ = PrefetchBandwidth1;
3847 			else if (Case2OK)
3848 				prefetch_bw_equ = PrefetchBandwidth2;
3849 			else if (Case3OK)
3850 				prefetch_bw_equ = PrefetchBandwidth3;
3851 			else
3852 				prefetch_bw_equ = PrefetchBandwidth4;
3853 
3854 #ifdef __DML_VBA_DEBUG__
3855 			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3856 			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3857 			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3858 			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3859 #endif
3860 
3861 			if (prefetch_bw_equ > 0) {
3862 				if (v->GPUVMEnable == true) {
3863 					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3864 							HostVMInefficiencyFactor / prefetch_bw_equ,
3865 							Tvm_trips, LineTime / 4);
3866 				} else {
3867 					Tvm_equ = LineTime / 4;
3868 				}
3869 
3870 				if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3871 					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3872 							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3873 							(LineTime - Tvm_equ) / 2, LineTime / 4);
3874 				} else {
3875 					Tr0_equ = (LineTime - Tvm_equ) / 2;
3876 				}
3877 			} else {
3878 				Tvm_equ = 0;
3879 				Tr0_equ = 0;
3880 #ifdef __DML_VBA_DEBUG__
3881 				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3882 #endif
3883 			}
3884 		}
3885 
3886 		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3887 			if (dst_y_prefetch_oto * LineTime < TPreReq) {
3888 				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3889 			} else {
3890 				*DestinationLinesForPrefetch = dst_y_prefetch_oto;
3891 			}
3892 			TimeForFetchingMetaPTE = Tvm_oto;
3893 			TimeForFetchingRowInVBlank = Tr0_oto;
3894 			*PrefetchBandwidth = prefetch_bw_oto;
3895 		} else {
3896 			*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3897 			TimeForFetchingMetaPTE = Tvm_equ;
3898 			TimeForFetchingRowInVBlank = Tr0_equ;
3899 			*PrefetchBandwidth = prefetch_bw_equ;
3900 		}
3901 
3902 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3903 
3904 		*DestinationLinesToRequestRowInVBlank =
3905 				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3906 
3907 		LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch -
3908 				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3909 
3910 #ifdef __DML_VBA_DEBUG__
3911 		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3912 		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3913 				__func__, *DestinationLinesToRequestVMInVBlank);
3914 		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3915 		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3916 		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3917 				__func__, *DestinationLinesToRequestRowInVBlank);
3918 		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3919 		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3920 #endif
3921 
3922 		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3923 			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3924 			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3925 #ifdef __DML_VBA_DEBUG__
3926 			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3927 			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3928 			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3929 #endif
3930 			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3931 				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3932 					*VRatioPrefetchY =
3933 							dml_max((double) PrefetchSourceLinesY /
3934 									LinesToRequestPrefetchPixelData,
3935 									(double) MaxNumSwathY * SwathHeightY /
3936 									(LinesToRequestPrefetchPixelData -
3937 									(VInitPreFillY - 3.0) / 2.0));
3938 					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3939 				} else {
3940 					MyError = true;
3941 					*VRatioPrefetchY = 0;
3942 				}
3943 #ifdef __DML_VBA_DEBUG__
3944 				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3945 				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3946 				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3947 #endif
3948 			}
3949 
3950 			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3951 			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3952 
3953 #ifdef __DML_VBA_DEBUG__
3954 			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3955 			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3956 			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3957 #endif
3958 			if ((SwathHeightC > 4)) {
3959 				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3960 					*VRatioPrefetchC =
3961 						dml_max(*VRatioPrefetchC,
3962 							(double) MaxNumSwathC * SwathHeightC /
3963 							(LinesToRequestPrefetchPixelData -
3964 							(VInitPreFillC - 3.0) / 2.0));
3965 					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3966 				} else {
3967 					MyError = true;
3968 					*VRatioPrefetchC = 0;
3969 				}
3970 #ifdef __DML_VBA_DEBUG__
3971 				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3972 				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3973 				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3974 #endif
3975 			}
3976 
3977 			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3978 					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3979 					/ LineTime;
3980 
3981 #ifdef __DML_VBA_DEBUG__
3982 			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3983 			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3984 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3985 			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
3986 					__func__, *RequiredPrefetchPixDataBWLuma);
3987 #endif
3988 			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
3989 					LinesToRequestPrefetchPixelData
3990 					* myPipe->BytePerPixelC
3991 					* swath_width_chroma_ub / LineTime;
3992 		} else {
3993 			MyError = true;
3994 #ifdef __DML_VBA_DEBUG__
3995 			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
3996 					__func__, LinesToRequestPrefetchPixelData);
3997 #endif
3998 			*VRatioPrefetchY = 0;
3999 			*VRatioPrefetchC = 0;
4000 			*RequiredPrefetchPixDataBWLuma = 0;
4001 			*RequiredPrefetchPixDataBWChroma = 0;
4002 		}
4003 #ifdef __DML_VBA_DEBUG__
4004 		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4005 			(double)LinesToRequestPrefetchPixelData * LineTime +
4006 			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4007 		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4008 		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4009 			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4010 		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4011 		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4012 			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4013 			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4014 		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4015 				PixelPTEBytesPerRow);
4016 #endif
4017 	} else {
4018 		MyError = true;
4019 #ifdef __DML_VBA_DEBUG__
4020 		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4021 				__func__, dst_y_prefetch_equ);
4022 #endif
4023 	}
4024 
4025 	{
4026 		double prefetch_vm_bw;
4027 		double prefetch_row_bw;
4028 
4029 		if (PDEAndMetaPTEBytesFrame == 0) {
4030 			prefetch_vm_bw = 0;
4031 		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
4032 #ifdef __DML_VBA_DEBUG__
4033 			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4034 			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4035 			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4036 					__func__, *DestinationLinesToRequestVMInVBlank);
4037 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4038 #endif
4039 			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4040 					(*DestinationLinesToRequestVMInVBlank * LineTime);
4041 #ifdef __DML_VBA_DEBUG__
4042 			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4043 #endif
4044 		} else {
4045 			prefetch_vm_bw = 0;
4046 			MyError = true;
4047 #ifdef __DML_VBA_DEBUG__
4048 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4049 					__func__, *DestinationLinesToRequestVMInVBlank);
4050 #endif
4051 		}
4052 
4053 		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4054 			prefetch_row_bw = 0;
4055 		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
4056 			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4057 					(*DestinationLinesToRequestRowInVBlank * LineTime);
4058 
4059 #ifdef __DML_VBA_DEBUG__
4060 			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4061 			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4062 			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4063 					__func__, *DestinationLinesToRequestRowInVBlank);
4064 			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4065 #endif
4066 		} else {
4067 			prefetch_row_bw = 0;
4068 			MyError = true;
4069 #ifdef __DML_VBA_DEBUG__
4070 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4071 					__func__, *DestinationLinesToRequestRowInVBlank);
4072 #endif
4073 		}
4074 
4075 		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4076 	}
4077 
4078 	if (MyError) {
4079 		*PrefetchBandwidth = 0;
4080 		TimeForFetchingMetaPTE = 0;
4081 		TimeForFetchingRowInVBlank = 0;
4082 		*DestinationLinesToRequestVMInVBlank = 0;
4083 		*DestinationLinesToRequestRowInVBlank = 0;
4084 		*DestinationLinesForPrefetch = 0;
4085 		LinesToRequestPrefetchPixelData = 0;
4086 		*VRatioPrefetchY = 0;
4087 		*VRatioPrefetchC = 0;
4088 		*RequiredPrefetchPixDataBWLuma = 0;
4089 		*RequiredPrefetchPixDataBWChroma = 0;
4090 	}
4091 
4092 	return MyError;
4093 } // CalculatePrefetchSchedule
4094 
4095 void dml32_CalculateFlipSchedule(
4096 		double HostVMInefficiencyFactor,
4097 		double UrgentExtraLatency,
4098 		double UrgentLatency,
4099 		unsigned int GPUVMMaxPageTableLevels,
4100 		bool HostVMEnable,
4101 		unsigned int HostVMMaxNonCachedPageTableLevels,
4102 		bool GPUVMEnable,
4103 		double HostVMMinPageSize,
4104 		double PDEAndMetaPTEBytesPerFrame,
4105 		double MetaRowBytes,
4106 		double DPTEBytesPerRow,
4107 		double BandwidthAvailableForImmediateFlip,
4108 		unsigned int TotImmediateFlipBytes,
4109 		enum source_format_class SourcePixelFormat,
4110 		double LineTime,
4111 		double VRatio,
4112 		double VRatioChroma,
4113 		double Tno_bw,
4114 		bool DCCEnable,
4115 		unsigned int dpte_row_height,
4116 		unsigned int meta_row_height,
4117 		unsigned int dpte_row_height_chroma,
4118 		unsigned int meta_row_height_chroma,
4119 		bool    use_one_row_for_frame_flip,
4120 
4121 		/* Output */
4122 		double *DestinationLinesToRequestVMInImmediateFlip,
4123 		double *DestinationLinesToRequestRowInImmediateFlip,
4124 		double *final_flip_bw,
4125 		bool *ImmediateFlipSupportedForPipe)
4126 {
4127 	double min_row_time = 0.0;
4128 	unsigned int HostVMDynamicLevelsTrips;
4129 	double TimeForFetchingMetaPTEImmediateFlip;
4130 	double TimeForFetchingRowInVBlankImmediateFlip;
4131 	double ImmediateFlipBW;
4132 
4133 	if (GPUVMEnable == true && HostVMEnable == true)
4134 		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4135 	else
4136 		HostVMDynamicLevelsTrips = 0;
4137 
4138 #ifdef __DML_VBA_DEBUG__
4139 	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4140 	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4141 #endif
4142 
4143 	if (TotImmediateFlipBytes > 0) {
4144 		if (use_one_row_for_frame_flip) {
4145 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4146 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4147 		} else {
4148 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4149 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4150 		}
4151 		if (GPUVMEnable == true) {
4152 			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4153 					HostVMInefficiencyFactor / ImmediateFlipBW,
4154 					UrgentExtraLatency + UrgentLatency *
4155 					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4156 					LineTime / 4.0);
4157 		} else {
4158 			TimeForFetchingMetaPTEImmediateFlip = 0;
4159 		}
4160 		if ((GPUVMEnable == true || DCCEnable == true)) {
4161 			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4162 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4163 					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4164 		} else {
4165 			TimeForFetchingRowInVBlankImmediateFlip = 0;
4166 		}
4167 
4168 		*DestinationLinesToRequestVMInImmediateFlip =
4169 				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4170 		*DestinationLinesToRequestRowInImmediateFlip =
4171 				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4172 
4173 		if (GPUVMEnable == true) {
4174 			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4175 					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4176 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4177 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4178 		} else if ((GPUVMEnable == true || DCCEnable == true)) {
4179 			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4180 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4181 		} else {
4182 			*final_flip_bw = 0;
4183 		}
4184 	} else {
4185 		TimeForFetchingMetaPTEImmediateFlip = 0;
4186 		TimeForFetchingRowInVBlankImmediateFlip = 0;
4187 		*DestinationLinesToRequestVMInImmediateFlip = 0;
4188 		*DestinationLinesToRequestRowInImmediateFlip = 0;
4189 		*final_flip_bw = 0;
4190 	}
4191 
4192 	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4193 		if (GPUVMEnable == true && DCCEnable != true) {
4194 			min_row_time = dml_min(dpte_row_height *
4195 					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4196 		} else if (GPUVMEnable != true && DCCEnable == true) {
4197 			min_row_time = dml_min(meta_row_height *
4198 					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4199 		} else {
4200 			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4201 					LineTime / VRatio, dpte_row_height_chroma * LineTime /
4202 					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4203 		}
4204 	} else {
4205 		if (GPUVMEnable == true && DCCEnable != true) {
4206 			min_row_time = dpte_row_height * LineTime / VRatio;
4207 		} else if (GPUVMEnable != true && DCCEnable == true) {
4208 			min_row_time = meta_row_height * LineTime / VRatio;
4209 		} else {
4210 			min_row_time =
4211 				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4212 		}
4213 	}
4214 
4215 	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4216 			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4217 					> min_row_time) {
4218 		*ImmediateFlipSupportedForPipe = false;
4219 	} else {
4220 		*ImmediateFlipSupportedForPipe = true;
4221 	}
4222 
4223 #ifdef __DML_VBA_DEBUG__
4224 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4225 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4226 	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4227 			__func__, *DestinationLinesToRequestVMInImmediateFlip);
4228 	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4229 			__func__, *DestinationLinesToRequestRowInImmediateFlip);
4230 	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4231 	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4232 			__func__, TimeForFetchingRowInVBlankImmediateFlip);
4233 	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4234 	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4235 #endif
4236 } // CalculateFlipSchedule
4237 
4238 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4239 		struct vba_vars_st *v,
4240 		unsigned int PrefetchMode,
4241 		double DCFCLK,
4242 		double ReturnBW,
4243 		SOCParametersList mmSOCParameters,
4244 		double SOCCLK,
4245 		double DCFClkDeepSleep,
4246 		unsigned int DETBufferSizeY[],
4247 		unsigned int DETBufferSizeC[],
4248 		unsigned int SwathHeightY[],
4249 		unsigned int SwathHeightC[],
4250 		double SwathWidthY[],
4251 		double SwathWidthC[],
4252 		unsigned int DPPPerSurface[],
4253 		double BytePerPixelDETY[],
4254 		double BytePerPixelDETC[],
4255 		double DSTXAfterScaler[],
4256 		double DSTYAfterScaler[],
4257 		bool UnboundedRequestEnabled,
4258 		unsigned int CompressedBufferSizeInkByte,
4259 
4260 		/* Output */
4261 		enum clock_change_support *DRAMClockChangeSupport,
4262 		double MaxActiveDRAMClockChangeLatencySupported[],
4263 		unsigned int SubViewportLinesNeededInMALL[],
4264 		enum dm_fclock_change_support *FCLKChangeSupport,
4265 		double *MinActiveFCLKChangeLatencySupported,
4266 		bool *USRRetrainingSupport,
4267 		double ActiveDRAMClockChangeLatencyMargin[])
4268 {
4269 	unsigned int i, j, k;
4270 	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4271 	unsigned int DRAMClockChangeSupportNumber = 0;
4272 	unsigned int LastSurfaceWithoutMargin;
4273 	unsigned int DRAMClockChangeMethod = 0;
4274 	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4275 	double MinActiveFCLKChangeMargin = 0.;
4276 	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4277 	double ActiveClockChangeLatencyHidingY;
4278 	double ActiveClockChangeLatencyHidingC;
4279 	double ActiveClockChangeLatencyHiding;
4280 	double EffectiveDETBufferSizeY;
4281 	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4282 	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4283 	double TotalPixelBW = 0.0;
4284 	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4285 	double     EffectiveLBLatencyHidingY;
4286 	double     EffectiveLBLatencyHidingC;
4287 	double     LinesInDETY[DC__NUM_DPP__MAX];
4288 	double     LinesInDETC[DC__NUM_DPP__MAX];
4289 	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4290 	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4291 	double     FullDETBufferingTimeY;
4292 	double     FullDETBufferingTimeC;
4293 	double     WritebackDRAMClockChangeLatencyMargin;
4294 	double     WritebackFCLKChangeLatencyMargin;
4295 	double     WritebackLatencyHiding;
4296 	bool    SameTimingForFCLKChange;
4297 
4298 	unsigned int    TotalActiveWriteback = 0;
4299 	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4300 	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4301 
4302 	v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4303 	v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4304 			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4305 	v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4306 	v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4307 	v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4308 			+ 10 / DCFClkDeepSleep;
4309 	v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4310 			+ 10 / DCFClkDeepSleep;
4311 	v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4312 			+ 10 / DCFClkDeepSleep;
4313 	v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4314 			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4315 
4316 #ifdef __DML_VBA_DEBUG__
4317 	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4318 	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4319 	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4320 	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4321 	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4322 	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4323 	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4324 	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4325 	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4326 	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4327 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4328 			__func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4329 #endif
4330 
4331 
4332 	TotalActiveWriteback = 0;
4333 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4334 		if (v->WritebackEnable[k] == true)
4335 			TotalActiveWriteback = TotalActiveWriteback + 1;
4336 	}
4337 
4338 	if (TotalActiveWriteback <= 1) {
4339 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4340 	} else {
4341 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4342 				+ v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4343 	}
4344 	if (v->USRRetrainingRequiredFinal)
4345 		v->Watermark.WritebackUrgentWatermark = v->Watermark.WritebackUrgentWatermark
4346 				+ mmSOCParameters.USRRetrainingLatency;
4347 
4348 	if (TotalActiveWriteback <= 1) {
4349 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4350 				+ mmSOCParameters.WritebackLatency;
4351 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4352 				+ mmSOCParameters.WritebackLatency;
4353 	} else {
4354 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4355 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4356 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4357 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4358 	}
4359 
4360 	if (v->USRRetrainingRequiredFinal)
4361 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4362 				+ mmSOCParameters.USRRetrainingLatency;
4363 
4364 	if (v->USRRetrainingRequiredFinal)
4365 		v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4366 				+ mmSOCParameters.USRRetrainingLatency;
4367 
4368 #ifdef __DML_VBA_DEBUG__
4369 	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4370 			__func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4371 	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4372 	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4373 	dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4374 	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4375 #endif
4376 
4377 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4378 		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4379 				SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4380 	}
4381 
4382 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4383 
4384 		LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4385 		LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4386 
4387 
4388 #ifdef __DML_VBA_DEBUG__
4389 		dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4390 		dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
4391 		dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
4392 		dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
4393 		dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
4394 #endif
4395 
4396 		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4397 		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4398 		EffectiveDETBufferSizeY = DETBufferSizeY[k];
4399 
4400 		if (UnboundedRequestEnabled) {
4401 			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4402 					+ CompressedBufferSizeInkByte * 1024
4403 							* (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4404 							/ (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4405 		}
4406 
4407 		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4408 		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4409 		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4410 
4411 		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4412 				- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4413 
4414 		if (v->NumberOfActiveSurfaces > 1) {
4415 			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4416 					- (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4417 							/ v->PixelClock[k] / v->VRatio[k];
4418 		}
4419 
4420 		if (BytePerPixelDETC[k] > 0) {
4421 			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4422 			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4423 			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4424 					/ v->VRatioChroma[k];
4425 			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4426 					- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4427 							/ v->PixelClock[k];
4428 			if (v->NumberOfActiveSurfaces > 1) {
4429 				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4430 						- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4431 								/ v->PixelClock[k] / v->VRatioChroma[k];
4432 			}
4433 			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4434 					ActiveClockChangeLatencyHidingC);
4435 		} else {
4436 			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4437 		}
4438 
4439 		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4440 				- v->Watermark.DRAMClockChangeWatermark;
4441 		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4442 				- v->Watermark.FCLKChangeWatermark;
4443 		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4444 
4445 		if (v->WritebackEnable[k]) {
4446 			WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4447 					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4448 							/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4449 			if (v->WritebackPixelFormat[k] == dm_444_64)
4450 				WritebackLatencyHiding = WritebackLatencyHiding / 2;
4451 
4452 			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4453 					- v->Watermark.WritebackDRAMClockChangeWatermark;
4454 
4455 			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4456 					- v->Watermark.WritebackFCLKChangeWatermark;
4457 
4458 			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4459 					WritebackFCLKChangeLatencyMargin);
4460 			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4461 					WritebackDRAMClockChangeLatencyMargin);
4462 		}
4463 		MaxActiveDRAMClockChangeLatencySupported[k] =
4464 				(v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4465 						0 :
4466 						(ActiveDRAMClockChangeLatencyMargin[k]
4467 								+ mmSOCParameters.DRAMClockChangeLatency);
4468 	}
4469 
4470 	for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4471 		for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4472 			if (i == j ||
4473 					(v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4474 					(v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4475 					(v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4476 					(v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4477 					v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4478 					v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4479 					(v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4480 				SynchronizedSurfaces[i][j] = true;
4481 			} else {
4482 				SynchronizedSurfaces[i][j] = false;
4483 			}
4484 		}
4485 	}
4486 
4487 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4488 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4489 				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4490 				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4491 			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4492 			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4493 			SurfaceWithMinActiveFCLKChangeMargin = k;
4494 		}
4495 	}
4496 
4497 	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4498 
4499 	SameTimingForFCLKChange = true;
4500 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4501 		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4502 			if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4503 					(SameTimingForFCLKChange ||
4504 					ActiveFCLKChangeLatencyMargin[k] <
4505 					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4506 				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4507 			}
4508 			SameTimingForFCLKChange = false;
4509 		}
4510 	}
4511 
4512 	if (MinActiveFCLKChangeMargin > 0) {
4513 		*FCLKChangeSupport = dm_fclock_change_vactive;
4514 	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4515 			(PrefetchMode <= 1)) {
4516 		*FCLKChangeSupport = dm_fclock_change_vblank;
4517 	} else {
4518 		*FCLKChangeSupport = dm_fclock_change_unsupported;
4519 	}
4520 
4521 	*USRRetrainingSupport = true;
4522 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4523 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4524 				(USRRetrainingLatencyMargin[k] < 0)) {
4525 			*USRRetrainingSupport = false;
4526 		}
4527 	}
4528 
4529 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4530 		if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4531 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4532 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4533 				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4534 			if (PrefetchMode > 0) {
4535 				DRAMClockChangeSupportNumber = 2;
4536 			} else if (DRAMClockChangeSupportNumber == 0) {
4537 				DRAMClockChangeSupportNumber = 1;
4538 				LastSurfaceWithoutMargin = k;
4539 			} else if (DRAMClockChangeSupportNumber == 1 &&
4540 					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4541 				DRAMClockChangeSupportNumber = 2;
4542 			}
4543 		}
4544 	}
4545 
4546 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4547 		if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4548 			DRAMClockChangeMethod = 1;
4549 		else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4550 			DRAMClockChangeMethod = 2;
4551 	}
4552 
4553 	if (DRAMClockChangeMethod == 0) {
4554 		if (DRAMClockChangeSupportNumber == 0)
4555 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4556 		else if (DRAMClockChangeSupportNumber == 1)
4557 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4558 		else
4559 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4560 	} else if (DRAMClockChangeMethod == 1) {
4561 		if (DRAMClockChangeSupportNumber == 0)
4562 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4563 		else if (DRAMClockChangeSupportNumber == 1)
4564 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4565 		else
4566 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4567 	} else {
4568 		if (DRAMClockChangeSupportNumber == 0)
4569 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4570 		else if (DRAMClockChangeSupportNumber == 1)
4571 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4572 		else
4573 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4574 	}
4575 
4576 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4577 		unsigned int dst_y_pstate;
4578 		unsigned int src_y_pstate_l;
4579 		unsigned int src_y_pstate_c;
4580 		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4581 
4582 		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4583 		src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4584 		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4585 		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4586 
4587 #ifdef __DML_VBA_DEBUG__
4588 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4589 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4590 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4591 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4592 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4593 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4594 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4595 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4596 dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
4597 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4598 #endif
4599 		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4600 
4601 		if (BytePerPixelDETC[k] > 0) {
4602 			src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4603 			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4604 			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4605 			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4606 
4607 #ifdef __DML_VBA_DEBUG__
4608 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4609 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4610 dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4611 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4612 #endif
4613 		}
4614 	}
4615 #ifdef __DML_VBA_DEBUG__
4616 	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4617 	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4618 	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4619 			__func__, *MinActiveFCLKChangeLatencySupported);
4620 	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4621 #endif
4622 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4623 
4624 double dml32_CalculateWriteBackDISPCLK(
4625 		enum source_format_class WritebackPixelFormat,
4626 		double PixelClock,
4627 		double WritebackHRatio,
4628 		double WritebackVRatio,
4629 		unsigned int WritebackHTaps,
4630 		unsigned int WritebackVTaps,
4631 		unsigned int   WritebackSourceWidth,
4632 		unsigned int   WritebackDestinationWidth,
4633 		unsigned int HTotal,
4634 		unsigned int WritebackLineBufferSize,
4635 		double DISPCLKDPPCLKVCOSpeed)
4636 {
4637 	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4638 
4639 	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4640 	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4641 	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4642 			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4643 	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4644 }
4645 
4646 void dml32_CalculateMinAndMaxPrefetchMode(
4647 		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4648 		unsigned int             *MinPrefetchMode,
4649 		unsigned int             *MaxPrefetchMode)
4650 {
4651 	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4652 		*MinPrefetchMode = 3;
4653 		*MaxPrefetchMode = 3;
4654 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4655 		*MinPrefetchMode = 2;
4656 		*MaxPrefetchMode = 2;
4657 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4658 		*MinPrefetchMode = 1;
4659 		*MaxPrefetchMode = 1;
4660 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4661 		*MinPrefetchMode = 0;
4662 		*MaxPrefetchMode = 0;
4663 	} else {
4664 		*MinPrefetchMode = 0;
4665 		*MaxPrefetchMode = 3;
4666 	}
4667 } // CalculateMinAndMaxPrefetchMode
4668 
4669 void dml32_CalculatePixelDeliveryTimes(
4670 		unsigned int             NumberOfActiveSurfaces,
4671 		double              VRatio[],
4672 		double              VRatioChroma[],
4673 		double              VRatioPrefetchY[],
4674 		double              VRatioPrefetchC[],
4675 		unsigned int             swath_width_luma_ub[],
4676 		unsigned int             swath_width_chroma_ub[],
4677 		unsigned int             DPPPerSurface[],
4678 		double              HRatio[],
4679 		double              HRatioChroma[],
4680 		double              PixelClock[],
4681 		double              PSCL_THROUGHPUT[],
4682 		double              PSCL_THROUGHPUT_CHROMA[],
4683 		double              Dppclk[],
4684 		unsigned int             BytePerPixelC[],
4685 		enum dm_rotation_angle   SourceRotation[],
4686 		unsigned int             NumberOfCursors[],
4687 		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4688 		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4689 		unsigned int             BlockWidth256BytesY[],
4690 		unsigned int             BlockHeight256BytesY[],
4691 		unsigned int             BlockWidth256BytesC[],
4692 		unsigned int             BlockHeight256BytesC[],
4693 
4694 		/* Output */
4695 		double              DisplayPipeLineDeliveryTimeLuma[],
4696 		double              DisplayPipeLineDeliveryTimeChroma[],
4697 		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4698 		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4699 		double              DisplayPipeRequestDeliveryTimeLuma[],
4700 		double              DisplayPipeRequestDeliveryTimeChroma[],
4701 		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4702 		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4703 		double              CursorRequestDeliveryTime[],
4704 		double              CursorRequestDeliveryTimePrefetch[])
4705 {
4706 	double   req_per_swath_ub;
4707 	unsigned int k;
4708 
4709 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4710 
4711 #ifdef __DML_VBA_DEBUG__
4712 		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4713 		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4714 		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4715 		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4716 		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4717 		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4718 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4719 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4720 		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4721 		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4722 		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4723 #endif
4724 
4725 		if (VRatio[k] <= 1) {
4726 			DisplayPipeLineDeliveryTimeLuma[k] =
4727 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4728 		} else {
4729 			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4730 		}
4731 
4732 		if (BytePerPixelC[k] == 0) {
4733 			DisplayPipeLineDeliveryTimeChroma[k] = 0;
4734 		} else {
4735 			if (VRatioChroma[k] <= 1) {
4736 				DisplayPipeLineDeliveryTimeChroma[k] =
4737 					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4738 			} else {
4739 				DisplayPipeLineDeliveryTimeChroma[k] =
4740 					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4741 			}
4742 		}
4743 
4744 		if (VRatioPrefetchY[k] <= 1) {
4745 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4746 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4747 		} else {
4748 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4749 					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4750 		}
4751 
4752 		if (BytePerPixelC[k] == 0) {
4753 			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4754 		} else {
4755 			if (VRatioPrefetchC[k] <= 1) {
4756 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4757 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4758 			} else {
4759 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4760 						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4761 			}
4762 		}
4763 #ifdef __DML_VBA_DEBUG__
4764 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4765 				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4766 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4767 				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4768 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4769 				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4770 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4771 				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4772 #endif
4773 	}
4774 
4775 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4776 		if (!IsVertical(SourceRotation[k]))
4777 			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4778 		else
4779 			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4780 #ifdef __DML_VBA_DEBUG__
4781 		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4782 #endif
4783 
4784 		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4785 		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4786 				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4787 		if (BytePerPixelC[k] == 0) {
4788 			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4789 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4790 		} else {
4791 			if (!IsVertical(SourceRotation[k]))
4792 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4793 			else
4794 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4795 #ifdef __DML_VBA_DEBUG__
4796 			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4797 #endif
4798 			DisplayPipeRequestDeliveryTimeChroma[k] =
4799 					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4800 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4801 					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4802 		}
4803 #ifdef __DML_VBA_DEBUG__
4804 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4805 				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4806 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4807 				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4808 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4809 				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4810 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4811 				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4812 #endif
4813 	}
4814 
4815 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4816 		unsigned int cursor_req_per_width;
4817 
4818 		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4819 				256.0 / 8.0, 1.0);
4820 		if (NumberOfCursors[k] > 0) {
4821 			if (VRatio[k] <= 1) {
4822 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4823 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4824 			} else {
4825 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4826 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4827 			}
4828 			if (VRatioPrefetchY[k] <= 1) {
4829 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4830 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4831 			} else {
4832 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4833 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4834 			}
4835 		} else {
4836 			CursorRequestDeliveryTime[k] = 0;
4837 			CursorRequestDeliveryTimePrefetch[k] = 0;
4838 		}
4839 #ifdef __DML_VBA_DEBUG__
4840 		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4841 				__func__, k, NumberOfCursors[k]);
4842 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4843 				__func__, k, CursorRequestDeliveryTime[k]);
4844 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4845 				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
4846 #endif
4847 	}
4848 } // CalculatePixelDeliveryTimes
4849 
4850 void dml32_CalculateMetaAndPTETimes(
4851 		bool use_one_row_for_frame[],
4852 		unsigned int NumberOfActiveSurfaces,
4853 		bool GPUVMEnable,
4854 		unsigned int MetaChunkSize,
4855 		unsigned int MinMetaChunkSizeBytes,
4856 		unsigned int    HTotal[],
4857 		double  VRatio[],
4858 		double  VRatioChroma[],
4859 		double  DestinationLinesToRequestRowInVBlank[],
4860 		double  DestinationLinesToRequestRowInImmediateFlip[],
4861 		bool DCCEnable[],
4862 		double  PixelClock[],
4863 		unsigned int BytePerPixelY[],
4864 		unsigned int BytePerPixelC[],
4865 		enum dm_rotation_angle SourceRotation[],
4866 		unsigned int dpte_row_height[],
4867 		unsigned int dpte_row_height_chroma[],
4868 		unsigned int meta_row_width[],
4869 		unsigned int meta_row_width_chroma[],
4870 		unsigned int meta_row_height[],
4871 		unsigned int meta_row_height_chroma[],
4872 		unsigned int meta_req_width[],
4873 		unsigned int meta_req_width_chroma[],
4874 		unsigned int meta_req_height[],
4875 		unsigned int meta_req_height_chroma[],
4876 		unsigned int dpte_group_bytes[],
4877 		unsigned int    PTERequestSizeY[],
4878 		unsigned int    PTERequestSizeC[],
4879 		unsigned int    PixelPTEReqWidthY[],
4880 		unsigned int    PixelPTEReqHeightY[],
4881 		unsigned int    PixelPTEReqWidthC[],
4882 		unsigned int    PixelPTEReqHeightC[],
4883 		unsigned int    dpte_row_width_luma_ub[],
4884 		unsigned int    dpte_row_width_chroma_ub[],
4885 
4886 		/* Output */
4887 		double DST_Y_PER_PTE_ROW_NOM_L[],
4888 		double DST_Y_PER_PTE_ROW_NOM_C[],
4889 		double DST_Y_PER_META_ROW_NOM_L[],
4890 		double DST_Y_PER_META_ROW_NOM_C[],
4891 		double TimePerMetaChunkNominal[],
4892 		double TimePerChromaMetaChunkNominal[],
4893 		double TimePerMetaChunkVBlank[],
4894 		double TimePerChromaMetaChunkVBlank[],
4895 		double TimePerMetaChunkFlip[],
4896 		double TimePerChromaMetaChunkFlip[],
4897 		double time_per_pte_group_nom_luma[],
4898 		double time_per_pte_group_vblank_luma[],
4899 		double time_per_pte_group_flip_luma[],
4900 		double time_per_pte_group_nom_chroma[],
4901 		double time_per_pte_group_vblank_chroma[],
4902 		double time_per_pte_group_flip_chroma[])
4903 {
4904 	unsigned int   meta_chunk_width;
4905 	unsigned int   min_meta_chunk_width;
4906 	unsigned int   meta_chunk_per_row_int;
4907 	unsigned int   meta_row_remainder;
4908 	unsigned int   meta_chunk_threshold;
4909 	unsigned int   meta_chunks_per_row_ub;
4910 	unsigned int   meta_chunk_width_chroma;
4911 	unsigned int   min_meta_chunk_width_chroma;
4912 	unsigned int   meta_chunk_per_row_int_chroma;
4913 	unsigned int   meta_row_remainder_chroma;
4914 	unsigned int   meta_chunk_threshold_chroma;
4915 	unsigned int   meta_chunks_per_row_ub_chroma;
4916 	unsigned int   dpte_group_width_luma;
4917 	unsigned int   dpte_groups_per_row_luma_ub;
4918 	unsigned int   dpte_group_width_chroma;
4919 	unsigned int   dpte_groups_per_row_chroma_ub;
4920 	unsigned int k;
4921 
4922 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4923 		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4924 		if (BytePerPixelC[k] == 0)
4925 			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4926 		else
4927 			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4928 		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4929 		if (BytePerPixelC[k] == 0)
4930 			DST_Y_PER_META_ROW_NOM_C[k] = 0;
4931 		else
4932 			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4933 	}
4934 
4935 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4936 		if (DCCEnable[k] == true) {
4937 			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4938 			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4939 			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4940 			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4941 			if (!IsVertical(SourceRotation[k]))
4942 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4943 			else
4944 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4945 
4946 			if (meta_row_remainder <= meta_chunk_threshold)
4947 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4948 			else
4949 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4950 
4951 			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4952 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4953 			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4954 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4955 			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4956 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4957 			if (BytePerPixelC[k] == 0) {
4958 				TimePerChromaMetaChunkNominal[k] = 0;
4959 				TimePerChromaMetaChunkVBlank[k] = 0;
4960 				TimePerChromaMetaChunkFlip[k] = 0;
4961 			} else {
4962 				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4963 						meta_row_height_chroma[k];
4964 				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4965 						meta_row_height_chroma[k];
4966 				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4967 						meta_chunk_width_chroma;
4968 				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4969 				if (!IsVertical(SourceRotation[k])) {
4970 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4971 							meta_req_width_chroma[k];
4972 				} else {
4973 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4974 							meta_req_height_chroma[k];
4975 				}
4976 				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4977 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4978 				else
4979 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
4980 
4981 				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
4982 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4983 				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4984 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4985 				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4986 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
4987 			}
4988 		} else {
4989 			TimePerMetaChunkNominal[k] = 0;
4990 			TimePerMetaChunkVBlank[k] = 0;
4991 			TimePerMetaChunkFlip[k] = 0;
4992 			TimePerChromaMetaChunkNominal[k] = 0;
4993 			TimePerChromaMetaChunkVBlank[k] = 0;
4994 			TimePerChromaMetaChunkFlip[k] = 0;
4995 		}
4996 	}
4997 
4998 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4999 		if (GPUVMEnable == true) {
5000 			if (!IsVertical(SourceRotation[k])) {
5001 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5002 						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5003 			} else {
5004 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5005 						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5006 			}
5007 
5008 			if (use_one_row_for_frame[k]) {
5009 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5010 						(double) dpte_group_width_luma / 2.0, 1.0);
5011 			} else {
5012 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5013 						(double) dpte_group_width_luma, 1.0);
5014 			}
5015 #ifdef __DML_VBA_DEBUG__
5016 			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5017 					__func__, k, use_one_row_for_frame[k]);
5018 			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5019 					__func__, k, dpte_group_bytes[k]);
5020 			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5021 					__func__, k, PTERequestSizeY[k]);
5022 			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5023 					__func__, k, PixelPTEReqWidthY[k]);
5024 			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5025 					__func__, k, PixelPTEReqHeightY[k]);
5026 			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5027 					__func__, k, dpte_row_width_luma_ub[k]);
5028 			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5029 					__func__, k, dpte_group_width_luma);
5030 			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5031 					__func__, k, dpte_groups_per_row_luma_ub);
5032 #endif
5033 
5034 			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5035 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5036 			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5037 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5038 			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5039 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5040 			if (BytePerPixelC[k] == 0) {
5041 				time_per_pte_group_nom_chroma[k] = 0;
5042 				time_per_pte_group_vblank_chroma[k] = 0;
5043 				time_per_pte_group_flip_chroma[k] = 0;
5044 			} else {
5045 				if (!IsVertical(SourceRotation[k])) {
5046 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5047 							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5048 				} else {
5049 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5050 							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5051 				}
5052 
5053 				if (use_one_row_for_frame[k]) {
5054 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5055 							(double) dpte_group_width_chroma / 2.0, 1.0);
5056 				} else {
5057 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5058 							(double) dpte_group_width_chroma, 1.0);
5059 				}
5060 #ifdef __DML_VBA_DEBUG__
5061 				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5062 						__func__, k, dpte_row_width_chroma_ub[k]);
5063 				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5064 						__func__, k, dpte_group_width_chroma);
5065 				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5066 						__func__, k, dpte_groups_per_row_chroma_ub);
5067 #endif
5068 				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5069 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5070 				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5071 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5072 				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5073 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5074 			}
5075 		} else {
5076 			time_per_pte_group_nom_luma[k] = 0;
5077 			time_per_pte_group_vblank_luma[k] = 0;
5078 			time_per_pte_group_flip_luma[k] = 0;
5079 			time_per_pte_group_nom_chroma[k] = 0;
5080 			time_per_pte_group_vblank_chroma[k] = 0;
5081 			time_per_pte_group_flip_chroma[k] = 0;
5082 		}
5083 #ifdef __DML_VBA_DEBUG__
5084 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5085 				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
5086 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5087 				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5088 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5089 				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5090 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5091 				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5092 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5093 				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5094 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5095 				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5096 		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5097 				__func__, k, TimePerMetaChunkNominal[k]);
5098 		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5099 				__func__, k, TimePerMetaChunkVBlank[k]);
5100 		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5101 				__func__, k, TimePerMetaChunkFlip[k]);
5102 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5103 				__func__, k, TimePerChromaMetaChunkNominal[k]);
5104 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5105 				__func__, k, TimePerChromaMetaChunkVBlank[k]);
5106 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5107 				__func__, k, TimePerChromaMetaChunkFlip[k]);
5108 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5109 				__func__, k, time_per_pte_group_nom_luma[k]);
5110 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5111 				__func__, k, time_per_pte_group_vblank_luma[k]);
5112 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5113 				__func__, k, time_per_pte_group_flip_luma[k]);
5114 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5115 				__func__, k, time_per_pte_group_nom_chroma[k]);
5116 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5117 				__func__, k, time_per_pte_group_vblank_chroma[k]);
5118 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5119 				__func__, k, time_per_pte_group_flip_chroma[k]);
5120 #endif
5121 	}
5122 } // CalculateMetaAndPTETimes
5123 
5124 void dml32_CalculateVMGroupAndRequestTimes(
5125 		unsigned int     NumberOfActiveSurfaces,
5126 		bool     GPUVMEnable,
5127 		unsigned int     GPUVMMaxPageTableLevels,
5128 		unsigned int     HTotal[],
5129 		unsigned int     BytePerPixelC[],
5130 		double      DestinationLinesToRequestVMInVBlank[],
5131 		double      DestinationLinesToRequestVMInImmediateFlip[],
5132 		bool     DCCEnable[],
5133 		double      PixelClock[],
5134 		unsigned int        dpte_row_width_luma_ub[],
5135 		unsigned int        dpte_row_width_chroma_ub[],
5136 		unsigned int     vm_group_bytes[],
5137 		unsigned int     dpde0_bytes_per_frame_ub_l[],
5138 		unsigned int     dpde0_bytes_per_frame_ub_c[],
5139 		unsigned int        meta_pte_bytes_per_frame_ub_l[],
5140 		unsigned int        meta_pte_bytes_per_frame_ub_c[],
5141 
5142 		/* Output */
5143 		double      TimePerVMGroupVBlank[],
5144 		double      TimePerVMGroupFlip[],
5145 		double      TimePerVMRequestVBlank[],
5146 		double      TimePerVMRequestFlip[])
5147 {
5148 	unsigned int k;
5149 	unsigned int   num_group_per_lower_vm_stage;
5150 	unsigned int   num_req_per_lower_vm_stage;
5151 
5152 #ifdef __DML_VBA_DEBUG__
5153 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5154 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5155 #endif
5156 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5157 
5158 #ifdef __DML_VBA_DEBUG__
5159 		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5160 		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5161 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5162 				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
5163 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5164 				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
5165 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5166 				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5167 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5168 				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5169 #endif
5170 
5171 		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5172 			if (DCCEnable[k] == false) {
5173 				if (BytePerPixelC[k] > 0) {
5174 					num_group_per_lower_vm_stage = dml_ceil(
5175 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5176 							(double) (vm_group_bytes[k]), 1.0) +
5177 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5178 							(double) (vm_group_bytes[k]), 1.0);
5179 				} else {
5180 					num_group_per_lower_vm_stage = dml_ceil(
5181 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5182 							(double) (vm_group_bytes[k]), 1.0);
5183 				}
5184 			} else {
5185 				if (GPUVMMaxPageTableLevels == 1) {
5186 					if (BytePerPixelC[k] > 0) {
5187 						num_group_per_lower_vm_stage = dml_ceil(
5188 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5189 							(double) (vm_group_bytes[k]), 1.0) +
5190 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5191 							(double) (vm_group_bytes[k]), 1.0);
5192 					} else {
5193 						num_group_per_lower_vm_stage = dml_ceil(
5194 								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5195 								(double) (vm_group_bytes[k]), 1.0);
5196 					}
5197 				} else {
5198 					if (BytePerPixelC[k] > 0) {
5199 						num_group_per_lower_vm_stage = 2 + dml_ceil(
5200 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5201 							(double) (vm_group_bytes[k]), 1) +
5202 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5203 							(double) (vm_group_bytes[k]), 1) +
5204 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5205 							(double) (vm_group_bytes[k]), 1) +
5206 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5207 							(double) (vm_group_bytes[k]), 1);
5208 					} else {
5209 						num_group_per_lower_vm_stage = 1 + dml_ceil(
5210 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5211 							(double) (vm_group_bytes[k]), 1) + dml_ceil(
5212 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5213 							(double) (vm_group_bytes[k]), 1);
5214 					}
5215 				}
5216 			}
5217 
5218 			if (DCCEnable[k] == false) {
5219 				if (BytePerPixelC[k] > 0) {
5220 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5221 							dpde0_bytes_per_frame_ub_c[k] / 64;
5222 				} else {
5223 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5224 				}
5225 			} else {
5226 				if (GPUVMMaxPageTableLevels == 1) {
5227 					if (BytePerPixelC[k] > 0) {
5228 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5229 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5230 					} else {
5231 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5232 					}
5233 				} else {
5234 					if (BytePerPixelC[k] > 0) {
5235 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5236 								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5237 								meta_pte_bytes_per_frame_ub_l[k] / 64 +
5238 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5239 					} else {
5240 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5241 								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5242 					}
5243 				}
5244 			}
5245 
5246 			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5247 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5248 			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5249 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5250 			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5251 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5252 			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5253 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5254 
5255 			if (GPUVMMaxPageTableLevels > 2) {
5256 				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5257 				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5258 				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5259 				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5260 			}
5261 
5262 		} else {
5263 			TimePerVMGroupVBlank[k] = 0;
5264 			TimePerVMGroupFlip[k] = 0;
5265 			TimePerVMRequestVBlank[k] = 0;
5266 			TimePerVMRequestFlip[k] = 0;
5267 		}
5268 
5269 #ifdef __DML_VBA_DEBUG__
5270 		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5271 		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5272 		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5273 		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5274 #endif
5275 	}
5276 } // CalculateVMGroupAndRequestTimes
5277 
5278 void dml32_CalculateDCCConfiguration(
5279 		bool             DCCEnabled,
5280 		bool             DCCProgrammingAssumesScanDirectionUnknown,
5281 		enum source_format_class SourcePixelFormat,
5282 		unsigned int             SurfaceWidthLuma,
5283 		unsigned int             SurfaceWidthChroma,
5284 		unsigned int             SurfaceHeightLuma,
5285 		unsigned int             SurfaceHeightChroma,
5286 		unsigned int                nomDETInKByte,
5287 		unsigned int             RequestHeight256ByteLuma,
5288 		unsigned int             RequestHeight256ByteChroma,
5289 		enum dm_swizzle_mode     TilingFormat,
5290 		unsigned int             BytePerPixelY,
5291 		unsigned int             BytePerPixelC,
5292 		double              BytePerPixelDETY,
5293 		double              BytePerPixelDETC,
5294 		enum dm_rotation_angle   SourceRotation,
5295 		/* Output */
5296 		unsigned int        *MaxUncompressedBlockLuma,
5297 		unsigned int        *MaxUncompressedBlockChroma,
5298 		unsigned int        *MaxCompressedBlockLuma,
5299 		unsigned int        *MaxCompressedBlockChroma,
5300 		unsigned int        *IndependentBlockLuma,
5301 		unsigned int        *IndependentBlockChroma)
5302 {
5303 	typedef enum {
5304 		REQ_256Bytes,
5305 		REQ_128BytesNonContiguous,
5306 		REQ_128BytesContiguous,
5307 		REQ_NA
5308 	} RequestType;
5309 
5310 	RequestType   RequestLuma;
5311 	RequestType   RequestChroma;
5312 
5313 	unsigned int   segment_order_horz_contiguous_luma;
5314 	unsigned int   segment_order_horz_contiguous_chroma;
5315 	unsigned int   segment_order_vert_contiguous_luma;
5316 	unsigned int   segment_order_vert_contiguous_chroma;
5317 	unsigned int req128_horz_wc_l;
5318 	unsigned int req128_horz_wc_c;
5319 	unsigned int req128_vert_wc_l;
5320 	unsigned int req128_vert_wc_c;
5321 	unsigned int MAS_vp_horz_limit;
5322 	unsigned int MAS_vp_vert_limit;
5323 	unsigned int max_vp_horz_width;
5324 	unsigned int max_vp_vert_height;
5325 	unsigned int eff_surf_width_l;
5326 	unsigned int eff_surf_width_c;
5327 	unsigned int eff_surf_height_l;
5328 	unsigned int eff_surf_height_c;
5329 	unsigned int full_swath_bytes_horz_wc_l;
5330 	unsigned int full_swath_bytes_horz_wc_c;
5331 	unsigned int full_swath_bytes_vert_wc_l;
5332 	unsigned int full_swath_bytes_vert_wc_c;
5333 	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5334 
5335 	unsigned int   yuv420;
5336 	unsigned int   horz_div_l;
5337 	unsigned int   horz_div_c;
5338 	unsigned int   vert_div_l;
5339 	unsigned int   vert_div_c;
5340 
5341 	unsigned int     swath_buf_size;
5342 	double   detile_buf_vp_horz_limit;
5343 	double   detile_buf_vp_vert_limit;
5344 
5345 	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5346 			SourcePixelFormat == dm_420_12) ? 1 : 0);
5347 	horz_div_l = 1;
5348 	horz_div_c = 1;
5349 	vert_div_l = 1;
5350 	vert_div_c = 1;
5351 
5352 	if (BytePerPixelY == 1)
5353 		vert_div_l = 0;
5354 	if (BytePerPixelC == 1)
5355 		vert_div_c = 0;
5356 
5357 	if (BytePerPixelC == 0) {
5358 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5359 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5360 				BytePerPixelY / (1 + horz_div_l));
5361 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5362 				(1 + vert_div_l));
5363 	} else {
5364 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5365 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5366 				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5367 				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5368 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5369 				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5370 				(1 + vert_div_c) / (1 + yuv420));
5371 	}
5372 
5373 	if (SourcePixelFormat == dm_420_10) {
5374 		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5375 		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5376 	}
5377 
5378 	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5379 	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5380 
5381 	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5382 	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5383 	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5384 	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5385 	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5386 	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5387 	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5388 	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5389 
5390 	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5391 	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5392 	if (BytePerPixelC > 0) {
5393 		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5394 		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5395 	} else {
5396 		full_swath_bytes_horz_wc_c = 0;
5397 		full_swath_bytes_vert_wc_c = 0;
5398 	}
5399 
5400 	if (SourcePixelFormat == dm_420_10) {
5401 		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5402 		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5403 		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5404 		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5405 	}
5406 
5407 	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5408 		req128_horz_wc_l = 0;
5409 		req128_horz_wc_c = 0;
5410 	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5411 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5412 		req128_horz_wc_l = 0;
5413 		req128_horz_wc_c = 1;
5414 	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5415 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5416 		req128_horz_wc_l = 1;
5417 		req128_horz_wc_c = 0;
5418 	} else {
5419 		req128_horz_wc_l = 1;
5420 		req128_horz_wc_c = 1;
5421 	}
5422 
5423 	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5424 		req128_vert_wc_l = 0;
5425 		req128_vert_wc_c = 0;
5426 	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5427 			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5428 		req128_vert_wc_l = 0;
5429 		req128_vert_wc_c = 1;
5430 	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5431 			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5432 		req128_vert_wc_l = 1;
5433 		req128_vert_wc_c = 0;
5434 	} else {
5435 		req128_vert_wc_l = 1;
5436 		req128_vert_wc_c = 1;
5437 	}
5438 
5439 	if (BytePerPixelY == 2) {
5440 		segment_order_horz_contiguous_luma = 0;
5441 		segment_order_vert_contiguous_luma = 1;
5442 	} else {
5443 		segment_order_horz_contiguous_luma = 1;
5444 		segment_order_vert_contiguous_luma = 0;
5445 	}
5446 
5447 	if (BytePerPixelC == 2) {
5448 		segment_order_horz_contiguous_chroma = 0;
5449 		segment_order_vert_contiguous_chroma = 1;
5450 	} else {
5451 		segment_order_horz_contiguous_chroma = 1;
5452 		segment_order_vert_contiguous_chroma = 0;
5453 	}
5454 #ifdef __DML_VBA_DEBUG__
5455 	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5456 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5457 	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5458 	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5459 	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5460 	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5461 	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5462 	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5463 	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5464 			__func__, segment_order_horz_contiguous_chroma);
5465 #endif
5466 
5467 	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5468 		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5469 			RequestLuma = REQ_256Bytes;
5470 		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5471 				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5472 			RequestLuma = REQ_128BytesNonContiguous;
5473 		else
5474 			RequestLuma = REQ_128BytesContiguous;
5475 
5476 		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5477 			RequestChroma = REQ_256Bytes;
5478 		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5479 				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5480 			RequestChroma = REQ_128BytesNonContiguous;
5481 		else
5482 			RequestChroma = REQ_128BytesContiguous;
5483 
5484 	} else if (!IsVertical(SourceRotation)) {
5485 		if (req128_horz_wc_l == 0)
5486 			RequestLuma = REQ_256Bytes;
5487 		else if (segment_order_horz_contiguous_luma == 0)
5488 			RequestLuma = REQ_128BytesNonContiguous;
5489 		else
5490 			RequestLuma = REQ_128BytesContiguous;
5491 
5492 		if (req128_horz_wc_c == 0)
5493 			RequestChroma = REQ_256Bytes;
5494 		else if (segment_order_horz_contiguous_chroma == 0)
5495 			RequestChroma = REQ_128BytesNonContiguous;
5496 		else
5497 			RequestChroma = REQ_128BytesContiguous;
5498 
5499 	} else {
5500 		if (req128_vert_wc_l == 0)
5501 			RequestLuma = REQ_256Bytes;
5502 		else if (segment_order_vert_contiguous_luma == 0)
5503 			RequestLuma = REQ_128BytesNonContiguous;
5504 		else
5505 			RequestLuma = REQ_128BytesContiguous;
5506 
5507 		if (req128_vert_wc_c == 0)
5508 			RequestChroma = REQ_256Bytes;
5509 		else if (segment_order_vert_contiguous_chroma == 0)
5510 			RequestChroma = REQ_128BytesNonContiguous;
5511 		else
5512 			RequestChroma = REQ_128BytesContiguous;
5513 	}
5514 
5515 	if (RequestLuma == REQ_256Bytes) {
5516 		*MaxUncompressedBlockLuma = 256;
5517 		*MaxCompressedBlockLuma = 256;
5518 		*IndependentBlockLuma = 0;
5519 	} else if (RequestLuma == REQ_128BytesContiguous) {
5520 		*MaxUncompressedBlockLuma = 256;
5521 		*MaxCompressedBlockLuma = 128;
5522 		*IndependentBlockLuma = 128;
5523 	} else {
5524 		*MaxUncompressedBlockLuma = 256;
5525 		*MaxCompressedBlockLuma = 64;
5526 		*IndependentBlockLuma = 64;
5527 	}
5528 
5529 	if (RequestChroma == REQ_256Bytes) {
5530 		*MaxUncompressedBlockChroma = 256;
5531 		*MaxCompressedBlockChroma = 256;
5532 		*IndependentBlockChroma = 0;
5533 	} else if (RequestChroma == REQ_128BytesContiguous) {
5534 		*MaxUncompressedBlockChroma = 256;
5535 		*MaxCompressedBlockChroma = 128;
5536 		*IndependentBlockChroma = 128;
5537 	} else {
5538 		*MaxUncompressedBlockChroma = 256;
5539 		*MaxCompressedBlockChroma = 64;
5540 		*IndependentBlockChroma = 64;
5541 	}
5542 
5543 	if (DCCEnabled != true || BytePerPixelC == 0) {
5544 		*MaxUncompressedBlockChroma = 0;
5545 		*MaxCompressedBlockChroma = 0;
5546 		*IndependentBlockChroma = 0;
5547 	}
5548 
5549 	if (DCCEnabled != true) {
5550 		*MaxUncompressedBlockLuma = 0;
5551 		*MaxCompressedBlockLuma = 0;
5552 		*IndependentBlockLuma = 0;
5553 	}
5554 
5555 #ifdef __DML_VBA_DEBUG__
5556 	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5557 	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5558 	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5559 	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5560 	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5561 	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5562 #endif
5563 
5564 } // CalculateDCCConfiguration
5565 
5566 void dml32_CalculateStutterEfficiency(
5567 		unsigned int      CompressedBufferSizeInkByte,
5568 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5569 		bool   UnboundedRequestEnabled,
5570 		unsigned int      MetaFIFOSizeInKEntries,
5571 		unsigned int      ZeroSizeBufferEntries,
5572 		unsigned int      PixelChunkSizeInKByte,
5573 		unsigned int   NumberOfActiveSurfaces,
5574 		unsigned int      ROBBufferSizeInKByte,
5575 		double    TotalDataReadBandwidth,
5576 		double    DCFCLK,
5577 		double    ReturnBW,
5578 		unsigned int      CompbufReservedSpace64B,
5579 		unsigned int      CompbufReservedSpaceZs,
5580 		double    SRExitTime,
5581 		double    SRExitZ8Time,
5582 		bool   SynchronizeTimingsFinal,
5583 		unsigned int   BlendingAndTiming[],
5584 		double    StutterEnterPlusExitWatermark,
5585 		double    Z8StutterEnterPlusExitWatermark,
5586 		bool   ProgressiveToInterlaceUnitInOPP,
5587 		bool   Interlace[],
5588 		double    MinTTUVBlank[],
5589 		unsigned int   DPPPerSurface[],
5590 		unsigned int      DETBufferSizeY[],
5591 		unsigned int   BytePerPixelY[],
5592 		double    BytePerPixelDETY[],
5593 		double      SwathWidthY[],
5594 		unsigned int   SwathHeightY[],
5595 		unsigned int   SwathHeightC[],
5596 		double    NetDCCRateLuma[],
5597 		double    NetDCCRateChroma[],
5598 		double    DCCFractionOfZeroSizeRequestsLuma[],
5599 		double    DCCFractionOfZeroSizeRequestsChroma[],
5600 		unsigned int      HTotal[],
5601 		unsigned int      VTotal[],
5602 		double    PixelClock[],
5603 		double    VRatio[],
5604 		enum dm_rotation_angle SourceRotation[],
5605 		unsigned int   BlockHeight256BytesY[],
5606 		unsigned int   BlockWidth256BytesY[],
5607 		unsigned int   BlockHeight256BytesC[],
5608 		unsigned int   BlockWidth256BytesC[],
5609 		unsigned int   DCCYMaxUncompressedBlock[],
5610 		unsigned int   DCCCMaxUncompressedBlock[],
5611 		unsigned int      VActive[],
5612 		bool   DCCEnable[],
5613 		bool   WritebackEnable[],
5614 		double    ReadBandwidthSurfaceLuma[],
5615 		double    ReadBandwidthSurfaceChroma[],
5616 		double    meta_row_bw[],
5617 		double    dpte_row_bw[],
5618 
5619 		/* Output */
5620 		double   *StutterEfficiencyNotIncludingVBlank,
5621 		double   *StutterEfficiency,
5622 		unsigned int     *NumberOfStutterBurstsPerFrame,
5623 		double   *Z8StutterEfficiencyNotIncludingVBlank,
5624 		double   *Z8StutterEfficiency,
5625 		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5626 		double   *StutterPeriod,
5627 		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5628 {
5629 
5630 	bool FoundCriticalSurface = false;
5631 	unsigned int SwathSizeCriticalSurface = 0;
5632 	unsigned int LastChunkOfSwathSize;
5633 	unsigned int MissingPartOfLastSwathOfDETSize;
5634 	double LastZ8StutterPeriod = 0.0;
5635 	double LastStutterPeriod = 0.0;
5636 	unsigned int TotalNumberOfActiveOTG = 0;
5637 	double doublePixelClock;
5638 	unsigned int doubleHTotal;
5639 	unsigned int doubleVTotal;
5640 	bool SameTiming = true;
5641 	double DETBufferingTimeY;
5642 	double SwathWidthYCriticalSurface = 0.0;
5643 	double SwathHeightYCriticalSurface = 0.0;
5644 	double VActiveTimeCriticalSurface = 0.0;
5645 	double FrameTimeCriticalSurface = 0.0;
5646 	unsigned int BytePerPixelYCriticalSurface = 0;
5647 	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5648 	unsigned int DETBufferSizeYCriticalSurface = 0;
5649 	double MinTTUVBlankCriticalSurface = 0.0;
5650 	unsigned int BlockWidth256BytesYCriticalSurface = 0;
5651 	bool doublePlaneCriticalSurface = 0;
5652 	bool doublePipeCriticalSurface = 0;
5653 	double TotalCompressedReadBandwidth;
5654 	double TotalRowReadBandwidth;
5655 	double AverageDCCCompressionRate;
5656 	double EffectiveCompressedBufferSize;
5657 	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5658 	double StutterBurstTime;
5659 	unsigned int TotalActiveWriteback;
5660 	double LinesInDETY;
5661 	double LinesInDETYRoundedDownToSwath;
5662 	double MaximumEffectiveCompressionLuma;
5663 	double MaximumEffectiveCompressionChroma;
5664 	double TotalZeroSizeRequestReadBandwidth;
5665 	double TotalZeroSizeCompressedReadBandwidth;
5666 	double AverageDCCZeroSizeFraction;
5667 	double AverageZeroSizeCompressionRate;
5668 	unsigned int k;
5669 
5670 	TotalZeroSizeRequestReadBandwidth = 0;
5671 	TotalZeroSizeCompressedReadBandwidth = 0;
5672 	TotalRowReadBandwidth = 0;
5673 	TotalCompressedReadBandwidth = 0;
5674 
5675 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5676 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5677 			if (DCCEnable[k] == true) {
5678 				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5679 						|| (!IsVertical(SourceRotation[k])
5680 								&& BlockHeight256BytesY[k] > SwathHeightY[k])
5681 						|| DCCYMaxUncompressedBlock[k] < 256) {
5682 					MaximumEffectiveCompressionLuma = 2;
5683 				} else {
5684 					MaximumEffectiveCompressionLuma = 4;
5685 				}
5686 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5687 						+ ReadBandwidthSurfaceLuma[k]
5688 								/ dml_min(NetDCCRateLuma[k],
5689 										MaximumEffectiveCompressionLuma);
5690 #ifdef __DML_VBA_DEBUG__
5691 				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5692 						__func__, k, ReadBandwidthSurfaceLuma[k]);
5693 				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5694 						__func__, k, NetDCCRateLuma[k]);
5695 				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5696 						__func__, k, MaximumEffectiveCompressionLuma);
5697 #endif
5698 				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5699 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5700 				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5701 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5702 								/ MaximumEffectiveCompressionLuma;
5703 
5704 				if (ReadBandwidthSurfaceChroma[k] > 0) {
5705 					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5706 							|| (!IsVertical(SourceRotation[k])
5707 									&& BlockHeight256BytesC[k] > SwathHeightC[k])
5708 							|| DCCCMaxUncompressedBlock[k] < 256) {
5709 						MaximumEffectiveCompressionChroma = 2;
5710 					} else {
5711 						MaximumEffectiveCompressionChroma = 4;
5712 					}
5713 					TotalCompressedReadBandwidth =
5714 							TotalCompressedReadBandwidth
5715 							+ ReadBandwidthSurfaceChroma[k]
5716 							/ dml_min(NetDCCRateChroma[k],
5717 							MaximumEffectiveCompressionChroma);
5718 #ifdef __DML_VBA_DEBUG__
5719 					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5720 							__func__, k, ReadBandwidthSurfaceChroma[k]);
5721 					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5722 							__func__, k, NetDCCRateChroma[k]);
5723 					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5724 							__func__, k, MaximumEffectiveCompressionChroma);
5725 #endif
5726 					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5727 							+ ReadBandwidthSurfaceChroma[k]
5728 									* DCCFractionOfZeroSizeRequestsChroma[k];
5729 					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5730 							+ ReadBandwidthSurfaceChroma[k]
5731 									* DCCFractionOfZeroSizeRequestsChroma[k]
5732 									/ MaximumEffectiveCompressionChroma;
5733 				}
5734 			} else {
5735 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5736 						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5737 			}
5738 			TotalRowReadBandwidth = TotalRowReadBandwidth
5739 					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5740 		}
5741 	}
5742 
5743 	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5744 	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5745 
5746 #ifdef __DML_VBA_DEBUG__
5747 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5748 	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5749 	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5750 	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5751 			__func__, TotalZeroSizeCompressedReadBandwidth);
5752 	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5753 	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5754 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5755 	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5756 	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5757 	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5758 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5759 #endif
5760 	if (AverageDCCZeroSizeFraction == 1) {
5761 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5762 				/ TotalZeroSizeCompressedReadBandwidth;
5763 		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5764 				* AverageZeroSizeCompressionRate
5765 				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5766 						* AverageZeroSizeCompressionRate;
5767 	} else if (AverageDCCZeroSizeFraction > 0) {
5768 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5769 				/ TotalZeroSizeCompressedReadBandwidth;
5770 		EffectiveCompressedBufferSize = dml_min(
5771 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5772 				(double) MetaFIFOSizeInKEntries * 1024 * 64
5773 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5774 					+ 1 / AverageDCCCompressionRate))
5775 					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5776 					* AverageDCCCompressionRate,
5777 					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5778 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5779 
5780 #ifdef __DML_VBA_DEBUG__
5781 		dml_print("DML::%s: min 1 = %f\n", __func__,
5782 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5783 		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5784 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5785 						AverageDCCCompressionRate));
5786 		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5787 				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5788 		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5789 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5790 #endif
5791 	} else {
5792 		EffectiveCompressedBufferSize = dml_min(
5793 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5794 				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5795 				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5796 						* AverageDCCCompressionRate;
5797 
5798 #ifdef __DML_VBA_DEBUG__
5799 		dml_print("DML::%s: min 1 = %f\n", __func__,
5800 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5801 		dml_print("DML::%s: min 2 = %f\n", __func__,
5802 				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5803 #endif
5804 	}
5805 
5806 #ifdef __DML_VBA_DEBUG__
5807 	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5808 	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5809 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5810 #endif
5811 
5812 	*StutterPeriod = 0;
5813 
5814 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5815 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5816 			LinesInDETY = ((double) DETBufferSizeY[k]
5817 					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5818 							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5819 					/ BytePerPixelDETY[k] / SwathWidthY[k];
5820 			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5821 			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5822 					/ VRatio[k];
5823 #ifdef __DML_VBA_DEBUG__
5824 			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5825 			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5826 			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5827 			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5828 					__func__, k, ReadBandwidthSurfaceLuma[k]);
5829 			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5830 			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5831 			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5832 					__func__, k, LinesInDETYRoundedDownToSwath);
5833 			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5834 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5835 			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5836 			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5837 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5838 #endif
5839 
5840 			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5841 				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5842 
5843 				FoundCriticalSurface = true;
5844 				*StutterPeriod = DETBufferingTimeY;
5845 				FrameTimeCriticalSurface = (
5846 						isInterlaceTiming ?
5847 								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5848 						* (double) HTotal[k] / PixelClock[k];
5849 				VActiveTimeCriticalSurface = (
5850 						isInterlaceTiming ?
5851 								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5852 						* (double) HTotal[k] / PixelClock[k];
5853 				BytePerPixelYCriticalSurface = BytePerPixelY[k];
5854 				SwathWidthYCriticalSurface = SwathWidthY[k];
5855 				SwathHeightYCriticalSurface = SwathHeightY[k];
5856 				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5857 				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5858 						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
5859 				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5860 				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5861 				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5862 				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5863 
5864 #ifdef __DML_VBA_DEBUG__
5865 				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5866 						__func__, k, FoundCriticalSurface);
5867 				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5868 						__func__, k, *StutterPeriod);
5869 				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5870 						__func__, k, MinTTUVBlankCriticalSurface);
5871 				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5872 						__func__, k, FrameTimeCriticalSurface);
5873 				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5874 						__func__, k, VActiveTimeCriticalSurface);
5875 				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5876 						__func__, k, BytePerPixelYCriticalSurface);
5877 				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5878 						__func__, k, SwathWidthYCriticalSurface);
5879 				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5880 						__func__, k, SwathHeightYCriticalSurface);
5881 				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5882 						__func__, k, BlockWidth256BytesYCriticalSurface);
5883 				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5884 						__func__, k, doublePlaneCriticalSurface);
5885 				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5886 						__func__, k, doublePipeCriticalSurface);
5887 				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5888 						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5889 #endif
5890 			}
5891 		}
5892 	}
5893 
5894 	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5895 			EffectiveCompressedBufferSize);
5896 #ifdef __DML_VBA_DEBUG__
5897 	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5898 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5899 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5900 			__func__, *StutterPeriod * TotalDataReadBandwidth);
5901 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5902 	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5903 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5904 	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5905 	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5906 	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5907 	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5908 #endif
5909 
5910 	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5911 			/ ReturnBW
5912 			+ (*StutterPeriod * TotalDataReadBandwidth
5913 					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5914 			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5915 #ifdef __DML_VBA_DEBUG__
5916 	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5917 			AverageDCCCompressionRate / ReturnBW);
5918 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5919 			__func__, (*StutterPeriod * TotalDataReadBandwidth));
5920 	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5921 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5922 	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5923 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5924 #endif
5925 	StutterBurstTime = dml_max(StutterBurstTime,
5926 			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5927 					* SwathWidthYCriticalSurface / ReturnBW);
5928 
5929 #ifdef __DML_VBA_DEBUG__
5930 	dml_print("DML::%s: Time to finish residue swath=%f\n",
5931 			__func__,
5932 			LinesToFinishSwathTransferStutterCriticalSurface *
5933 			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5934 #endif
5935 
5936 	TotalActiveWriteback = 0;
5937 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5938 		if (WritebackEnable[k])
5939 			TotalActiveWriteback = TotalActiveWriteback + 1;
5940 	}
5941 
5942 	if (TotalActiveWriteback == 0) {
5943 #ifdef __DML_VBA_DEBUG__
5944 		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5945 		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5946 		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5947 		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5948 #endif
5949 		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5950 				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5951 		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5952 				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5953 		*NumberOfStutterBurstsPerFrame = (
5954 				*StutterEfficiencyNotIncludingVBlank > 0 ?
5955 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5956 		*Z8NumberOfStutterBurstsPerFrame = (
5957 				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5958 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5959 	} else {
5960 		*StutterEfficiencyNotIncludingVBlank = 0.;
5961 		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
5962 		*NumberOfStutterBurstsPerFrame = 0;
5963 		*Z8NumberOfStutterBurstsPerFrame = 0;
5964 	}
5965 #ifdef __DML_VBA_DEBUG__
5966 	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5967 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5968 			__func__, *StutterEfficiencyNotIncludingVBlank);
5969 	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5970 			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
5971 	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5972 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5973 #endif
5974 
5975 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5976 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5977 			if (BlendingAndTiming[k] == k) {
5978 				if (TotalNumberOfActiveOTG == 0) {
5979 					doublePixelClock = PixelClock[k];
5980 					doubleHTotal = HTotal[k];
5981 					doubleVTotal = VTotal[k];
5982 				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
5983 						|| doubleVTotal != VTotal[k]) {
5984 					SameTiming = false;
5985 				}
5986 				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
5987 			}
5988 		}
5989 	}
5990 
5991 	if (*StutterEfficiencyNotIncludingVBlank > 0) {
5992 		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
5993 
5994 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
5995 				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
5996 			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
5997 						+ StutterBurstTime * VActiveTimeCriticalSurface
5998 						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
5999 		} else {
6000 			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6001 		}
6002 	} else {
6003 		*StutterEfficiency = 0;
6004 	}
6005 
6006 	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6007 		LastZ8StutterPeriod = VActiveTimeCriticalSurface
6008 				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6009 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6010 				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6011 			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6012 				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6013 		} else {
6014 			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6015 		}
6016 	} else {
6017 		*Z8StutterEfficiency = 0.;
6018 	}
6019 
6020 #ifdef __DML_VBA_DEBUG__
6021 	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6022 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6023 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6024 	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6025 	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6026 	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6027 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6028 			__func__, *StutterEfficiencyNotIncludingVBlank);
6029 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6030 #endif
6031 
6032 	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6033 			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6034 	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6035 	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6036 			- DETBufferSizeYCriticalSurface;
6037 
6038 	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6039 			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6040 			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6041 			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6042 
6043 #ifdef __DML_VBA_DEBUG__
6044 	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6045 	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6046 	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6047 	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6048 #endif
6049 } // CalculateStutterEfficiency
6050 
6051 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6052 		unsigned int    ConfigReturnBufferSizeInKByte,
6053 		unsigned int    ROBBufferSizeInKByte,
6054 		unsigned int MaxNumDPP,
6055 		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6056 		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6057 
6058 		/* Output */
6059 		unsigned int *MaxTotalDETInKByte,
6060 		unsigned int *nomDETInKByte,
6061 		unsigned int *MinCompressedBufferSizeInKByte)
6062 {
6063 	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6064 	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6065 
6066 	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6067 			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6068 	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6069 	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6070 
6071 #ifdef __DML_VBA_DEBUG__
6072 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6073 	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6074 	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6075 	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6076 	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6077 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6078 #endif
6079 
6080 	if (det_buff_size_override_en) {
6081 		*nomDETInKByte = det_buff_size_override_val;
6082 #ifdef __DML_VBA_DEBUG__
6083 		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6084 #endif
6085 	}
6086 } // CalculateMaxDETAndMinCompressedBufferSize
6087 
6088 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6089 		double ReturnBW,
6090 		bool NotUrgentLatencyHiding[],
6091 		double ReadBandwidthLuma[],
6092 		double ReadBandwidthChroma[],
6093 		double cursor_bw[],
6094 		double meta_row_bandwidth[],
6095 		double dpte_row_bandwidth[],
6096 		unsigned int NumberOfDPP[],
6097 		double UrgentBurstFactorLuma[],
6098 		double UrgentBurstFactorChroma[],
6099 		double UrgentBurstFactorCursor[])
6100 {
6101 	unsigned int k;
6102 	bool NotEnoughUrgentLatencyHiding = false;
6103 	bool CalculateVActiveBandwithSupport_val = false;
6104 	double VActiveBandwith = 0;
6105 
6106 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6107 		if (NotUrgentLatencyHiding[k]) {
6108 			NotEnoughUrgentLatencyHiding = true;
6109 		}
6110 	}
6111 
6112 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6113 		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6114 	}
6115 
6116 	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6117 
6118 #ifdef __DML_VBA_DEBUG__
6119 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6120 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6121 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6122 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6123 #endif
6124 	return CalculateVActiveBandwithSupport_val;
6125 }
6126 
6127 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6128 		double ReturnBW,
6129 		bool NotUrgentLatencyHiding[],
6130 		double ReadBandwidthLuma[],
6131 		double ReadBandwidthChroma[],
6132 		double PrefetchBandwidthLuma[],
6133 		double PrefetchBandwidthChroma[],
6134 		double cursor_bw[],
6135 		double meta_row_bandwidth[],
6136 		double dpte_row_bandwidth[],
6137 		double cursor_bw_pre[],
6138 		double prefetch_vmrow_bw[],
6139 		unsigned int NumberOfDPP[],
6140 		double UrgentBurstFactorLuma[],
6141 		double UrgentBurstFactorChroma[],
6142 		double UrgentBurstFactorCursor[],
6143 		double UrgentBurstFactorLumaPre[],
6144 		double UrgentBurstFactorChromaPre[],
6145 		double UrgentBurstFactorCursorPre[],
6146 		double PrefetchBW[],
6147 		double VRatio[],
6148 		double MaxVRatioPre,
6149 
6150 		/* output */
6151 		double  *MaxPrefetchBandwidth,
6152 		double  *FractionOfUrgentBandwidth,
6153 		bool *PrefetchBandwidthSupport)
6154 {
6155 	unsigned int k;
6156 	double ActiveBandwidthPerSurface;
6157 	bool NotEnoughUrgentLatencyHiding = false;
6158 	double TotalActiveBandwidth = 0;
6159 	double TotalPrefetchBandwidth = 0;
6160 
6161 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6162 		if (NotUrgentLatencyHiding[k]) {
6163 			NotEnoughUrgentLatencyHiding = true;
6164 		}
6165 	}
6166 
6167 	*MaxPrefetchBandwidth = 0;
6168 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6169 		ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6170 
6171 		TotalActiveBandwidth += ActiveBandwidthPerSurface;
6172 
6173 		TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6174 
6175 		*MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6176 				ActiveBandwidthPerSurface,
6177 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6178 	}
6179 
6180 	if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6181 		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6182 	else
6183 		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6184 
6185 	*FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6186 }
6187 
6188 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6189 		double ReturnBW,
6190 		double ReadBandwidthLuma[],
6191 		double ReadBandwidthChroma[],
6192 		double PrefetchBandwidthLuma[],
6193 		double PrefetchBandwidthChroma[],
6194 		double cursor_bw[],
6195 		double cursor_bw_pre[],
6196 		unsigned int NumberOfDPP[],
6197 		double UrgentBurstFactorLuma[],
6198 		double UrgentBurstFactorChroma[],
6199 		double UrgentBurstFactorCursor[],
6200 		double UrgentBurstFactorLumaPre[],
6201 		double UrgentBurstFactorChromaPre[],
6202 		double UrgentBurstFactorCursorPre[])
6203 {
6204 	unsigned int k;
6205 	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6206 
6207 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6208 		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6209 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6210 	}
6211 
6212 	return CalculateBandwidthAvailableForImmediateFlip_val;
6213 }
6214 
6215 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6216 		double ReturnBW,
6217 		enum immediate_flip_requirement ImmediateFlipRequirement[],
6218 		double final_flip_bw[],
6219 		double ReadBandwidthLuma[],
6220 		double ReadBandwidthChroma[],
6221 		double PrefetchBandwidthLuma[],
6222 		double PrefetchBandwidthChroma[],
6223 		double cursor_bw[],
6224 		double meta_row_bandwidth[],
6225 		double dpte_row_bandwidth[],
6226 		double cursor_bw_pre[],
6227 		double prefetch_vmrow_bw[],
6228 		unsigned int NumberOfDPP[],
6229 		double UrgentBurstFactorLuma[],
6230 		double UrgentBurstFactorChroma[],
6231 		double UrgentBurstFactorCursor[],
6232 		double UrgentBurstFactorLumaPre[],
6233 		double UrgentBurstFactorChromaPre[],
6234 		double UrgentBurstFactorCursorPre[],
6235 
6236 		/* output */
6237 		double  *TotalBandwidth,
6238 		double  *FractionOfUrgentBandwidth,
6239 		bool *ImmediateFlipBandwidthSupport)
6240 {
6241 	unsigned int k;
6242 	*TotalBandwidth = 0;
6243 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6244 		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6245 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6246 					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6247 					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6248 		} else {
6249 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6250 					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6251 					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6252 		}
6253 	}
6254 	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6255 	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6256 }
6257 
6258 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6259 		double ReturnBW,
6260 		double UrgentLatency,
6261 		unsigned int SwathHeightY[],
6262 		unsigned int SwathHeightC[],
6263 		unsigned int SwathWidthY[],
6264 		unsigned int SwathWidthC[],
6265 		double  BytePerPixelInDETY[],
6266 		double  BytePerPixelInDETC[],
6267 		unsigned int    DETBufferSizeY[],
6268 		unsigned int    DETBufferSizeC[],
6269 		unsigned int	NumOfDPP[],
6270 		unsigned int	HTotal[],
6271 		double	PixelClock[],
6272 		double	VRatioY[],
6273 		double	VRatioC[],
6274 		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6275 		enum unbounded_requesting_policy UseUnboundedRequesting)
6276 {
6277 	int k;
6278 	double SwathSizeAllSurfaces = 0;
6279 	double SwathSizeAllSurfacesInFetchTimeUs;
6280 	double DETSwathLatencyHidingUs;
6281 	double DETSwathLatencyHidingYUs;
6282 	double DETSwathLatencyHidingCUs;
6283 	double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6284 	double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6285 	bool NotEnoughDETSwathFillLatencyHiding = false;
6286 
6287 	if (UseUnboundedRequesting == dm_unbounded_requesting)
6288 		return false;
6289 
6290 	/* calculate sum of single swath size for all pipes in bytes */
6291 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6292 		SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6293 
6294 		if (SwathHeightC[k] != 0)
6295 			SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6296 		else
6297 			SwathSizePerSurfaceC[k] = 0;
6298 
6299 		SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6300 	}
6301 
6302 	SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6303 
6304 	/* ensure all DET - 1 swath can hide a fetch for all surfaces */
6305 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6306 		double LineTime = HTotal[k] / PixelClock[k];
6307 
6308 		/* only care if surface is not phantom */
6309 		if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6310 			DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6311 
6312 			if (SwathHeightC[k] != 0) {
6313 				DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6314 
6315 				DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6316 			} else {
6317 				DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6318 			}
6319 
6320 			/* DET must be able to hide time to fetch 1 swath for each surface */
6321 			if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6322 				NotEnoughDETSwathFillLatencyHiding = true;
6323 				break;
6324 			}
6325 		}
6326 	}
6327 
6328 	return NotEnoughDETSwathFillLatencyHiding;
6329 }
6330