1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29 
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31 
dml32_dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)32 unsigned int dml32_dscceComputeDelay(
33 		unsigned int bpc,
34 		double BPP,
35 		unsigned int sliceWidth,
36 		unsigned int numSlices,
37 		enum output_format_class pixelFormat,
38 		enum output_encoder_class Output)
39 {
40 	// valid bpc         = source bits per component in the set of {8, 10, 12}
41 	// valid bpp         = increments of 1/16 of a bit
42 	//                    min = 6/7/8 in N420/N422/444, respectively
43 	//                    max = such that compression is 1:1
44 	//valid sliceWidth  = number of pixels per slice line,
45 	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 
49 	// fixed value
50 	unsigned int rcModelSize = 8192;
51 
52 	// N422/N420 operate at 2 pixels per clock
53 	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 	Delay, pixels;
55 
56 	if (pixelFormat == dm_420)
57 		pixelsPerClock = 2;
58 	else if (pixelFormat == dm_n422)
59 		pixelsPerClock = 2;
60 	// #all other modes operate at 1 pixel per clock
61 	else
62 		pixelsPerClock = 1;
63 
64 	//initial transmit delay as per PPS
65 	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66 
67 	//compute ssm delay
68 	if (bpc == 8)
69 		D = 81;
70 	else if (bpc == 10)
71 		D = 89;
72 	else
73 		D = 113;
74 
75 	//divide by pixel per cycle to compute slice width as seen by DSC
76 	w = sliceWidth / pixelsPerClock;
77 
78 	//422 mode has an additional cycle of delay
79 	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 		s = 0;
81 	else
82 		s = 1;
83 
84 	//main calculation for the dscce
85 	ix = initalXmitDelay + 45;
86 	wx = (w + 2) / 3;
87 	p = 3 * wx - w;
88 	l0 = ix / w;
89 	a = ix + p * l0;
90 	ax = (a + 2) / 3 + D + 6 + 1;
91 	L = (ax + wx - 1) / wx;
92 	if ((ix % w) == 0 && p != 0)
93 		lstall = 1;
94 	else
95 		lstall = 0;
96 	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97 
98 	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 	pixels = Delay * 3 * pixelsPerClock;
100 
101 #ifdef __DML_VBA_DEBUG__
102 	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 	dml_print("DML::%s: Output: %d\n", __func__, Output);
108 	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110 
111 	return pixels;
112 }
113 
dml32_dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 	unsigned int Delay = 0;
117 
118 	if (pixelFormat == dm_420) {
119 		//   sfr
120 		Delay = Delay + 2;
121 		//   dsccif
122 		Delay = Delay + 0;
123 		//   dscc - input deserializer
124 		Delay = Delay + 3;
125 		//   dscc gets pixels every other cycle
126 		Delay = Delay + 2;
127 		//   dscc - input cdc fifo
128 		Delay = Delay + 12;
129 		//   dscc gets pixels every other cycle
130 		Delay = Delay + 13;
131 		//   dscc - cdc uncertainty
132 		Delay = Delay + 2;
133 		//   dscc - output cdc fifo
134 		Delay = Delay + 7;
135 		//   dscc gets pixels every other cycle
136 		Delay = Delay + 3;
137 		//   dscc - cdc uncertainty
138 		Delay = Delay + 2;
139 		//   dscc - output serializer
140 		Delay = Delay + 1;
141 		//   sft
142 		Delay = Delay + 1;
143 	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 		//   sfr
145 		Delay = Delay + 2;
146 		//   dsccif
147 		Delay = Delay + 1;
148 		//   dscc - input deserializer
149 		Delay = Delay + 5;
150 		//  dscc - input cdc fifo
151 		Delay = Delay + 25;
152 		//   dscc - cdc uncertainty
153 		Delay = Delay + 2;
154 		//   dscc - output cdc fifo
155 		Delay = Delay + 10;
156 		//   dscc - cdc uncertainty
157 		Delay = Delay + 2;
158 		//   dscc - output serializer
159 		Delay = Delay + 1;
160 		//   sft
161 		Delay = Delay + 1;
162 	} else {
163 		//   sfr
164 		Delay = Delay + 2;
165 		//   dsccif
166 		Delay = Delay + 0;
167 		//   dscc - input deserializer
168 		Delay = Delay + 3;
169 		//   dscc - input cdc fifo
170 		Delay = Delay + 12;
171 		//   dscc - cdc uncertainty
172 		Delay = Delay + 2;
173 		//   dscc - output cdc fifo
174 		Delay = Delay + 7;
175 		//   dscc - output serializer
176 		Delay = Delay + 1;
177 		//   dscc - cdc uncertainty
178 		Delay = Delay + 2;
179 		//   sft
180 		Delay = Delay + 1;
181 	}
182 
183 	return Delay;
184 }
185 
186 
IsVertical(enum dm_rotation_angle Scan)187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 	bool is_vert = false;
190 
191 	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 		is_vert = true;
193 	else
194 		is_vert = false;
195 	return is_vert;
196 }
197 
dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 		double HRatio,
200 		double HRatioChroma,
201 		double VRatio,
202 		double VRatioChroma,
203 		double MaxDCHUBToPSCLThroughput,
204 		double MaxPSCLToLBThroughput,
205 		double PixelClock,
206 		enum source_format_class SourcePixelFormat,
207 		unsigned int HTaps,
208 		unsigned int HTapsChroma,
209 		unsigned int VTaps,
210 		unsigned int VTapsChroma,
211 
212 		/* output */
213 		double *PSCL_THROUGHPUT,
214 		double *PSCL_THROUGHPUT_CHROMA,
215 		double *DPPCLKUsingSingleDPP)
216 {
217 	double DPPCLKUsingSingleDPPLuma;
218 	double DPPCLKUsingSingleDPPChroma;
219 
220 	if (HRatio > 1) {
221 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 				dml_ceil((double) HTaps / 6.0, 1.0));
223 	} else {
224 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 	}
226 
227 	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 			*PSCL_THROUGHPUT, 1);
229 
230 	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232 
233 	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 			SourcePixelFormat != dm_rgbe_alpha)) {
235 		*PSCL_THROUGHPUT_CHROMA = 0;
236 		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 	} else {
238 		if (HRatioChroma > 1) {
239 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 		} else {
242 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 		}
244 		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 	}
250 }
251 
dml32_CalculateBytePerPixelAndBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC)252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 		enum source_format_class SourcePixelFormat,
254 		enum dm_swizzle_mode SurfaceTiling,
255 
256 		/* Output */
257 		unsigned int *BytePerPixelY,
258 		unsigned int *BytePerPixelC,
259 		double  *BytePerPixelDETY,
260 		double  *BytePerPixelDETC,
261 		unsigned int *BlockHeight256BytesY,
262 		unsigned int *BlockHeight256BytesC,
263 		unsigned int *BlockWidth256BytesY,
264 		unsigned int *BlockWidth256BytesC,
265 		unsigned int *MacroTileHeightY,
266 		unsigned int *MacroTileHeightC,
267 		unsigned int *MacroTileWidthY,
268 		unsigned int *MacroTileWidthC)
269 {
270 	if (SourcePixelFormat == dm_444_64) {
271 		*BytePerPixelDETY = 8;
272 		*BytePerPixelDETC = 0;
273 		*BytePerPixelY = 8;
274 		*BytePerPixelC = 0;
275 	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 		*BytePerPixelDETY = 4;
277 		*BytePerPixelDETC = 0;
278 		*BytePerPixelY = 4;
279 		*BytePerPixelC = 0;
280 	} else if (SourcePixelFormat == dm_444_16) {
281 		*BytePerPixelDETY = 2;
282 		*BytePerPixelDETC = 0;
283 		*BytePerPixelY = 2;
284 		*BytePerPixelC = 0;
285 	} else if (SourcePixelFormat == dm_444_8) {
286 		*BytePerPixelDETY = 1;
287 		*BytePerPixelDETC = 0;
288 		*BytePerPixelY = 1;
289 		*BytePerPixelC = 0;
290 	} else if (SourcePixelFormat == dm_rgbe_alpha) {
291 		*BytePerPixelDETY = 4;
292 		*BytePerPixelDETC = 1;
293 		*BytePerPixelY = 4;
294 		*BytePerPixelC = 1;
295 	} else if (SourcePixelFormat == dm_420_8) {
296 		*BytePerPixelDETY = 1;
297 		*BytePerPixelDETC = 2;
298 		*BytePerPixelY = 1;
299 		*BytePerPixelC = 2;
300 	} else if (SourcePixelFormat == dm_420_12) {
301 		*BytePerPixelDETY = 2;
302 		*BytePerPixelDETC = 4;
303 		*BytePerPixelY = 2;
304 		*BytePerPixelC = 4;
305 	} else {
306 		*BytePerPixelDETY = 4.0 / 3;
307 		*BytePerPixelDETC = 8.0 / 3;
308 		*BytePerPixelY = 2;
309 		*BytePerPixelC = 4;
310 	}
311 #ifdef __DML_VBA_DEBUG__
312 	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
316 	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
317 #endif
318 	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 			|| SourcePixelFormat == dm_444_16
320 			|| SourcePixelFormat == dm_444_8
321 			|| SourcePixelFormat == dm_mono_16
322 			|| SourcePixelFormat == dm_mono_8
323 			|| SourcePixelFormat == dm_rgbe)) {
324 		if (SurfaceTiling == dm_sw_linear)
325 			*BlockHeight256BytesY = 1;
326 		else if (SourcePixelFormat == dm_444_64)
327 			*BlockHeight256BytesY = 4;
328 		else if (SourcePixelFormat == dm_444_8)
329 			*BlockHeight256BytesY = 16;
330 		else
331 			*BlockHeight256BytesY = 8;
332 
333 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 		*BlockHeight256BytesC = 0;
335 		*BlockWidth256BytesC = 0;
336 	} else {
337 		if (SurfaceTiling == dm_sw_linear) {
338 			*BlockHeight256BytesY = 1;
339 			*BlockHeight256BytesC = 1;
340 		} else if (SourcePixelFormat == dm_rgbe_alpha) {
341 			*BlockHeight256BytesY = 8;
342 			*BlockHeight256BytesC = 16;
343 		} else if (SourcePixelFormat == dm_420_8) {
344 			*BlockHeight256BytesY = 16;
345 			*BlockHeight256BytesC = 8;
346 		} else {
347 			*BlockHeight256BytesY = 8;
348 			*BlockHeight256BytesC = 8;
349 		}
350 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 	}
353 #ifdef __DML_VBA_DEBUG__
354 	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
355 	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
357 	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359 
360 	if (SurfaceTiling == dm_sw_linear) {
361 		*MacroTileHeightY = *BlockHeight256BytesY;
362 		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 		*MacroTileHeightC = *BlockHeight256BytesC;
364 		if (*MacroTileHeightC == 0)
365 			*MacroTileWidthC = 0;
366 		else
367 			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 		if (*MacroTileHeightC == 0)
374 			*MacroTileWidthC = 0;
375 		else
376 			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 	} else {
378 		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 		if (*MacroTileHeightC == 0)
382 			*MacroTileWidthC = 0;
383 		else
384 			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 	}
386 
387 #ifdef __DML_VBA_DEBUG__
388 	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
389 	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
391 	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394 
dml32_CalculateSwathAndDETConfiguration(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int ConfigReturnBufferSizeInKByte,unsigned int MaxTotalDETInKByte,unsigned int MinCompressedBufferSizeInKByte,double ForceSingleDPP,unsigned int NumberOfActiveSurfaces,unsigned int nomDETInKByte,enum unbounded_requesting_policy UseUnboundedRequestingFinal,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,unsigned int PixelChunkSizeKBytes,unsigned int ROBSizeKBytes,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum output_encoder_class Output[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],enum odm_combine_mode ODMMode[],unsigned int BlendingAndTiming[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],unsigned int HActive[],double HRatio[],double HRatioChroma[],unsigned int DPPPerSurface[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int DETBufferSizeInKByte[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool * UnboundedRequestEnabled,unsigned int * CompressedBufferSizeInkByte,unsigned int * CompBufReservedSpaceKBytes,bool * CompBufReservedSpaceNeedAdjustment,bool ViewportSizeSupportPerSurface[],bool * ViewportSizeSupport)395 void dml32_CalculateSwathAndDETConfiguration(
396 		unsigned int DETSizeOverride[],
397 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 		unsigned int ConfigReturnBufferSizeInKByte,
399 		unsigned int MaxTotalDETInKByte,
400 		unsigned int MinCompressedBufferSizeInKByte,
401 		double ForceSingleDPP,
402 		unsigned int NumberOfActiveSurfaces,
403 		unsigned int nomDETInKByte,
404 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 		unsigned int PixelChunkSizeKBytes,
407 		unsigned int ROBSizeKBytes,
408 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 		enum output_encoder_class Output[],
410 		double ReadBandwidthLuma[],
411 		double ReadBandwidthChroma[],
412 		double MaximumSwathWidthLuma[],
413 		double MaximumSwathWidthChroma[],
414 		enum dm_rotation_angle SourceRotation[],
415 		bool ViewportStationary[],
416 		enum source_format_class SourcePixelFormat[],
417 		enum dm_swizzle_mode SurfaceTiling[],
418 		unsigned int ViewportWidth[],
419 		unsigned int ViewportHeight[],
420 		unsigned int ViewportXStart[],
421 		unsigned int ViewportYStart[],
422 		unsigned int ViewportXStartC[],
423 		unsigned int ViewportYStartC[],
424 		unsigned int SurfaceWidthY[],
425 		unsigned int SurfaceWidthC[],
426 		unsigned int SurfaceHeightY[],
427 		unsigned int SurfaceHeightC[],
428 		unsigned int Read256BytesBlockHeightY[],
429 		unsigned int Read256BytesBlockHeightC[],
430 		unsigned int Read256BytesBlockWidthY[],
431 		unsigned int Read256BytesBlockWidthC[],
432 		enum odm_combine_mode ODMMode[],
433 		unsigned int BlendingAndTiming[],
434 		unsigned int BytePerPixY[],
435 		unsigned int BytePerPixC[],
436 		double BytePerPixDETY[],
437 		double BytePerPixDETC[],
438 		unsigned int HActive[],
439 		double HRatio[],
440 		double HRatioChroma[],
441 		unsigned int DPPPerSurface[],
442 
443 		/* Output */
444 		unsigned int swath_width_luma_ub[],
445 		unsigned int swath_width_chroma_ub[],
446 		double SwathWidth[],
447 		double SwathWidthChroma[],
448 		unsigned int SwathHeightY[],
449 		unsigned int SwathHeightC[],
450 		unsigned int DETBufferSizeInKByte[],
451 		unsigned int DETBufferSizeY[],
452 		unsigned int DETBufferSizeC[],
453 		bool *UnboundedRequestEnabled,
454 		unsigned int *CompressedBufferSizeInkByte,
455 		unsigned int *CompBufReservedSpaceKBytes,
456 		bool *CompBufReservedSpaceNeedAdjustment,
457 		bool ViewportSizeSupportPerSurface[],
458 		bool *ViewportSizeSupport)
459 {
460 	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 	unsigned int RoundedUpSwathSizeBytesY;
465 	unsigned int RoundedUpSwathSizeBytesC;
466 	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 	unsigned int k;
469 	unsigned int TotalActiveDPP = 0;
470 	bool NoChromaSurfaces = true;
471 	unsigned int DETBufferSizeInKByteForSwathCalculation;
472 
473 #ifdef __DML_VBA_DEBUG__
474 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 	dml32_CalculateSwathWidth(ForceSingleDPP,
479 			NumberOfActiveSurfaces,
480 			SourcePixelFormat,
481 			SourceRotation,
482 			ViewportStationary,
483 			ViewportWidth,
484 			ViewportHeight,
485 			ViewportXStart,
486 			ViewportYStart,
487 			ViewportXStartC,
488 			ViewportYStartC,
489 			SurfaceWidthY,
490 			SurfaceWidthC,
491 			SurfaceHeightY,
492 			SurfaceHeightC,
493 			ODMMode,
494 			BytePerPixY,
495 			BytePerPixC,
496 			Read256BytesBlockHeightY,
497 			Read256BytesBlockHeightC,
498 			Read256BytesBlockWidthY,
499 			Read256BytesBlockWidthC,
500 			BlendingAndTiming,
501 			HActive,
502 			HRatio,
503 			DPPPerSurface,
504 
505 			/* Output */
506 			SwathWidthdoubleDPP,
507 			SwathWidthdoubleDPPChroma,
508 			SwathWidth,
509 			SwathWidthChroma,
510 			MaximumSwathHeightY,
511 			MaximumSwathHeightC,
512 			swath_width_luma_ub,
513 			swath_width_chroma_ub);
514 
515 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 				RoundedUpMaxSwathSizeBytesY[k]);
525 		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 				RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531 
532 		if (SourcePixelFormat[k] == dm_420_10) {
533 			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 		}
536 	}
537 
538 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 			NoChromaSurfaces = false;
543 		}
544 	}
545 
546 	// By default, just set the reserved space to 2 pixel chunks size
547 	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548 
549 	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553 
554 	if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 	}
557 
558 	#ifdef __DML_VBA_DEBUG__
559 		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
560 		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
561 	#endif
562 
563 	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564 
565 	dml32_CalculateDETBufferSize(DETSizeOverride,
566 			UseMALLForPStateChange,
567 			ForceSingleDPP,
568 			NumberOfActiveSurfaces,
569 			*UnboundedRequestEnabled,
570 			nomDETInKByte,
571 			MaxTotalDETInKByte,
572 			ConfigReturnBufferSizeInKByte,
573 			MinCompressedBufferSizeInKByte,
574 			CompressedBufferSegmentSizeInkByteFinal,
575 			SourcePixelFormat,
576 			ReadBandwidthLuma,
577 			ReadBandwidthChroma,
578 			RoundedUpMaxSwathSizeBytesY,
579 			RoundedUpMaxSwathSizeBytesC,
580 			DPPPerSurface,
581 
582 			/* Output */
583 			DETBufferSizeInKByte,    // per hubp pipe
584 			CompressedBufferSizeInkByte);
585 
586 #ifdef __DML_VBA_DEBUG__
587 	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594 
595 	*ViewportSizeSupport = true;
596 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597 
598 		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 				DETBufferSizeInKByteForSwathCalculation);
603 #endif
604 
605 		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 			SwathHeightY[k] = MaximumSwathHeightY[k];
608 			SwathHeightC[k] = MaximumSwathHeightC[k];
609 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 			SwathHeightC[k] = MaximumSwathHeightC[k];
616 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 			SwathHeightY[k] = MaximumSwathHeightY[k];
622 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 		} else {
626 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 		}
631 
632 		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 			*ViewportSizeSupport = false;
637 			ViewportSizeSupportPerSurface[k] = false;
638 		} else {
639 			ViewportSizeSupportPerSurface[k] = true;
640 		}
641 
642 		if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 			DETBufferSizeC[k] = 0;
648 		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 		} else {
655 #ifdef __DML_VBA_DEBUG__
656 			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 		}
661 
662 #ifdef __DML_VBA_DEBUG__
663 		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 				k, RoundedUpMaxSwathSizeBytesY[k]);
667 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 				k, RoundedUpMaxSwathSizeBytesC[k]);
669 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 				ViewportSizeSupportPerSurface[k]);
676 #endif
677 
678 	}
679 } // CalculateSwathAndDETConfiguration
680 
dml32_CalculateSwathWidth(bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum source_format_class SourcePixelFormat[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int BlendingAndTiming[],unsigned int HActive[],double HRatio[],unsigned int DPPPerSurface[],double SwathWidthdoubleDPPY[],double SwathWidthdoubleDPPC[],double SwathWidthY[],double SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])681 void dml32_CalculateSwathWidth(
682 		bool				ForceSingleDPP,
683 		unsigned int			NumberOfActiveSurfaces,
684 		enum source_format_class	SourcePixelFormat[],
685 		enum dm_rotation_angle		SourceRotation[],
686 		bool				ViewportStationary[],
687 		unsigned int			ViewportWidth[],
688 		unsigned int			ViewportHeight[],
689 		unsigned int			ViewportXStart[],
690 		unsigned int			ViewportYStart[],
691 		unsigned int			ViewportXStartC[],
692 		unsigned int			ViewportYStartC[],
693 		unsigned int			SurfaceWidthY[],
694 		unsigned int			SurfaceWidthC[],
695 		unsigned int			SurfaceHeightY[],
696 		unsigned int			SurfaceHeightC[],
697 		enum odm_combine_mode		ODMMode[],
698 		unsigned int			BytePerPixY[],
699 		unsigned int			BytePerPixC[],
700 		unsigned int			Read256BytesBlockHeightY[],
701 		unsigned int			Read256BytesBlockHeightC[],
702 		unsigned int			Read256BytesBlockWidthY[],
703 		unsigned int			Read256BytesBlockWidthC[],
704 		unsigned int			BlendingAndTiming[],
705 		unsigned int			HActive[],
706 		double				HRatio[],
707 		unsigned int			DPPPerSurface[],
708 
709 		/* Output */
710 		double				SwathWidthdoubleDPPY[],
711 		double				SwathWidthdoubleDPPC[],
712 		double				SwathWidthY[], // per-pipe
713 		double				SwathWidthC[], // per-pipe
714 		unsigned int			MaximumSwathHeightY[],
715 		unsigned int			MaximumSwathHeightC[],
716 		unsigned int			swath_width_luma_ub[], // per-pipe
717 		unsigned int			swath_width_chroma_ub[]) // per-pipe
718 {
719 	unsigned int k, j;
720 	enum odm_combine_mode MainSurfaceODMMode;
721 
722 	unsigned int surface_width_ub_l;
723 	unsigned int surface_height_ub_l;
724 	unsigned int surface_width_ub_c = 0;
725 	unsigned int surface_height_ub_c = 0;
726 
727 #ifdef __DML_VBA_DEBUG__
728 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731 
732 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 		if (!IsVertical(SourceRotation[k]))
734 			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 		else
736 			SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737 
738 #ifdef __DML_VBA_DEBUG__
739 		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742 
743 		MainSurfaceODMMode = ODMMode[k];
744 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 			if (BlendingAndTiming[k] == j)
746 				MainSurfaceODMMode = ODMMode[j];
747 		}
748 
749 		if (ForceSingleDPP) {
750 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 		} else {
752 			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 						dml_round(HActive[k] / 4.0 * HRatio[k]));
755 			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 						dml_round(HActive[k] / 2.0 * HRatio[k]));
758 			} else if (DPPPerSurface[k] == 2) {
759 				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 			} else {
761 				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 			}
763 		}
764 
765 #ifdef __DML_VBA_DEBUG__
766 		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772 
773 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 				SourcePixelFormat[k] == dm_420_12) {
775 			SwathWidthC[k] = SwathWidthY[k] / 2;
776 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 		} else {
778 			SwathWidthC[k] = SwathWidthY[k];
779 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 		}
781 
782 		if (ForceSingleDPP == true) {
783 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 		}
786 
787 		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789 
790 		if (!IsVertical(SourceRotation[k])) {
791 			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 						dml_floor(ViewportXStart[k] +
796 								SwathWidthY[k] +
797 								Read256BytesBlockWidthY[k] - 1,
798 								Read256BytesBlockWidthY[k]) -
799 								dml_floor(ViewportXStart[k],
800 								Read256BytesBlockWidthY[k]));
801 			} else {
802 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 						dml_ceil(SwathWidthY[k] - 1,
804 								Read256BytesBlockWidthY[k]) +
805 								Read256BytesBlockWidthY[k]);
806 			}
807 			if (BytePerPixC[k] > 0) {
808 				surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 									Read256BytesBlockWidthC[k] - 1,
813 									Read256BytesBlockWidthC[k]) -
814 									dml_floor(ViewportXStartC[k],
815 									Read256BytesBlockWidthC[k]));
816 				} else {
817 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 							dml_ceil(SwathWidthC[k] - 1,
819 								Read256BytesBlockWidthC[k]) +
820 								Read256BytesBlockWidthC[k]);
821 				}
822 			} else {
823 				swath_width_chroma_ub[k] = 0;
824 			}
825 		} else {
826 			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828 
829 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 						Read256BytesBlockHeightY[k]) -
833 						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 			} else {
835 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 			}
838 			if (BytePerPixC[k] > 0) {
839 				surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 									Read256BytesBlockHeightC[k] - 1,
844 									Read256BytesBlockHeightC[k]) -
845 									dml_floor(ViewportYStartC[k],
846 											Read256BytesBlockHeightC[k]));
847 				} else {
848 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 							Read256BytesBlockHeightC[k]);
851 				}
852 			} else {
853 				swath_width_chroma_ub[k] = 0;
854 			}
855 		}
856 
857 #ifdef __DML_VBA_DEBUG__
858 		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873 
874 	}
875 } // CalculateSwathWidth
876 
dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,unsigned int TotalNumberOfActiveDPP,bool NoChroma,enum output_encoder_class Output,enum dm_swizzle_mode SurfaceTiling,bool CompBufReservedSpaceNeedAdjustment,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 			unsigned int TotalNumberOfActiveDPP,
879 			bool NoChroma,
880 			enum output_encoder_class Output,
881 			enum dm_swizzle_mode SurfaceTiling,
882 			bool CompBufReservedSpaceNeedAdjustment,
883 			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 	bool ret_val = false;
886 
887 	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 			TotalNumberOfActiveDPP == 1 && NoChroma);
889 	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 		ret_val = false;
891 
892 	if (SurfaceTiling == dm_sw_linear)
893 		ret_val = false;
894 
895 	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 		ret_val = false;
897 
898 #ifdef __DML_VBA_DEBUG__
899 	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
900 	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
902 #endif
903 
904 	return (ret_val);
905 }
906 
dml32_CalculateDETBufferSize(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum source_format_class SourcePixelFormat[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int RoundedUpMaxSwathSizeBytesY[],unsigned int RoundedUpMaxSwathSizeBytesC[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)907 void dml32_CalculateDETBufferSize(
908 		unsigned int DETSizeOverride[],
909 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 		bool ForceSingleDPP,
911 		unsigned int NumberOfActiveSurfaces,
912 		bool UnboundedRequestEnabled,
913 		unsigned int nomDETInKByte,
914 		unsigned int MaxTotalDETInKByte,
915 		unsigned int ConfigReturnBufferSizeInKByte,
916 		unsigned int MinCompressedBufferSizeInKByte,
917 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 		enum source_format_class SourcePixelFormat[],
919 		double ReadBandwidthLuma[],
920 		double ReadBandwidthChroma[],
921 		unsigned int RoundedUpMaxSwathSizeBytesY[],
922 		unsigned int RoundedUpMaxSwathSizeBytesC[],
923 		unsigned int DPPPerSurface[],
924 		/* Output */
925 		unsigned int DETBufferSizeInKByte[],
926 		unsigned int *CompressedBufferSizeInkByte)
927 {
928 	unsigned int DETBufferSizePoolInKByte;
929 	unsigned int NextDETBufferPieceInKByte;
930 	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 	bool NextPotentialSurfaceToAssignDETPieceFound;
932 	unsigned int NextSurfaceToAssignDETPiece;
933 	double TotalBandwidth;
934 	double BandwidthOfSurfacesNotAssignedDETPiece;
935 	unsigned int max_minDET;
936 	unsigned int minDET;
937 	unsigned int minDET_pipe;
938 	unsigned int j, k;
939 
940 #ifdef __DML_VBA_DEBUG__
941 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 			CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951 
952 	// Note: Will use default det size if that fits 2 swaths
953 	if (UnboundedRequestEnabled) {
954 		if (DETSizeOverride[0] > 0) {
955 			DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 		} else {
957 			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 					((double) RoundedUpMaxSwathSizeBytesY[0] +
959 							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 		}
961 		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 	} else {
963 		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 			DETBufferSizeInKByte[k] = nomDETInKByte;
966 			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 					SourcePixelFormat[k] == dm_420_12) {
968 				max_minDET = nomDETInKByte - 64;
969 			} else {
970 				max_minDET = nomDETInKByte;
971 			}
972 			minDET = 128;
973 			minDET_pipe = 0;
974 
975 			// add DET resource until can hold 2 full swaths
976 			while (minDET <= max_minDET && minDET_pipe == 0) {
977 				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 					minDET_pipe = minDET;
980 				minDET = minDET + 64;
981 			}
982 
983 #ifdef __DML_VBA_DEBUG__
984 			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
985 			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
986 			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
987 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 					RoundedUpMaxSwathSizeBytesY[k]);
989 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 					RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992 
993 			if (minDET_pipe == 0) {
994 				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 						__func__, k, minDET_pipe);
999 #endif
1000 			}
1001 
1002 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 				DETBufferSizeInKByte[k] = 0;
1004 			} else if (DETSizeOverride[k] > 0) {
1005 				DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 				DETBufferSizeInKByte[k] = minDET_pipe;
1010 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 			}
1013 
1014 #ifdef __DML_VBA_DEBUG__
1015 			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 		}
1021 
1022 		TotalBandwidth = 0;
1023 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 		}
1027 #ifdef __DML_VBA_DEBUG__
1028 		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036 
1037 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 			} else {
1046 				DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 			}
1048 #ifdef __DML_VBA_DEBUG__
1049 			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 					DETPieceAssignedToThisSurfaceAlready[k]);
1051 			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 					BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 		}
1055 
1056 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 			NextPotentialSurfaceToAssignDETPieceFound = false;
1058 			NextSurfaceToAssignDETPiece = 0;
1059 
1060 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 						ReadBandwidthLuma[k]);
1064 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 						ReadBandwidthChroma[k]);
1066 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 						NextSurfaceToAssignDETPiece);
1072 #endif
1073 				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 						(!NextPotentialSurfaceToAssignDETPieceFound ||
1075 						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 					NextSurfaceToAssignDETPiece = k;
1079 					NextPotentialSurfaceToAssignDETPieceFound = true;
1080 				}
1081 #ifdef __DML_VBA_DEBUG__
1082 				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 			}
1088 
1089 			if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 				// Note: To show the banker's rounding behavior in VBA and also the fact
1091 				// that the DET buffer size varies due to precision issue
1092 				//
1093 				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
1094 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 				// BandwidthOfSurfacesNotAssignedDETPiece /
1097 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1099 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 				 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 				//
1104 				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106 
1107 				NextDETBufferPieceInKByte = dml_min(
1108 					dml_round((double) DETBufferSizePoolInKByte *
1109 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 						BandwidthOfSurfacesNotAssignedDETPiece /
1112 						((ForceSingleDPP ? 1 :
1113 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 						(ForceSingleDPP ? 1 :
1115 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 						dml_floor((double) DETBufferSizePoolInKByte,
1117 						(ForceSingleDPP ? 1 :
1118 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119 
1120 				// Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 				// We should limit the per-pipe DET size to the nominal / max per pipe.
1122 				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 					} else {
1128 						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 						// already has the max per-pipe value
1130 						NextDETBufferPieceInKByte = 0;
1131 					}
1132 				}
1133 
1134 #ifdef __DML_VBA_DEBUG__
1135 				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 					DETBufferSizePoolInKByte);
1137 				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 					NextSurfaceToAssignDETPiece);
1139 				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 					NextDETBufferPieceInKByte);
1147 				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 					__func__, j, NextSurfaceToAssignDETPiece,
1149 					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151 
1152 				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 						+ NextDETBufferPieceInKByte
1155 						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159 
1160 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 			}
1166 		}
1167 		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 	}
1169 	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170 
1171 #ifdef __DML_VBA_DEBUG__
1172 	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 	}
1178 #endif
1179 } // CalculateDETBufferSize
1180 
dml32_CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum output_format_class OutFormat,enum output_encoder_class Output,enum odm_combine_policy ODMUse,double StateDispclk,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum odm_combine_mode * ODMMode,double * RequiredDISPCLKPerSurface)1181 void dml32_CalculateODMMode(
1182 		unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 		unsigned int HActive,
1184 		enum output_format_class OutFormat,
1185 		enum output_encoder_class Output,
1186 		enum odm_combine_policy ODMUse,
1187 		double StateDispclk,
1188 		double MaxDispclk,
1189 		bool DSCEnable,
1190 		unsigned int TotalNumberOfActiveDPP,
1191 		unsigned int MaxNumDPP,
1192 		double PixelClock,
1193 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 		double DISPCLKRampingMargin,
1195 		double DISPCLKDPPCLKVCOSpeed,
1196 		unsigned int NumberOfDSCSlices,
1197 
1198 		/* Output */
1199 		bool *TotalAvailablePipesSupport,
1200 		unsigned int *NumberOfDPP,
1201 		enum odm_combine_mode *ODMMode,
1202 		double *RequiredDISPCLKPerSurface)
1203 {
1204 
1205 	double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208 
1209 	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 			MaxDispclk);
1212 	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 			MaxDispclk);
1215 	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 			MaxDispclk);
1218 	*TotalAvailablePipesSupport = true;
1219 	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220 
1221 	if (ODMUse == dm_odm_combine_policy_none)
1222 		*ODMMode = dm_odm_combine_mode_disabled;
1223 
1224 	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 	*NumberOfDPP = 0;
1226 
1227 	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 	// (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229 
1230 	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 					|| NumberOfDSCSlices > 8)))) {
1234 		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 			*ODMMode = dm_odm_combine_mode_4to1;
1236 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 			*NumberOfDPP = 4;
1238 		} else {
1239 			*TotalAvailablePipesSupport = false;
1240 		}
1241 	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 					|| (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 			*ODMMode = dm_odm_combine_mode_2to1;
1248 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 			*NumberOfDPP = 2;
1250 		} else {
1251 			*TotalAvailablePipesSupport = false;
1252 		}
1253 	} else {
1254 		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 			*NumberOfDPP = 1;
1256 		else
1257 			*TotalAvailablePipesSupport = false;
1258 	}
1259 	if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 			ODMUse != dm_odm_combine_policy_4to1) {
1261 		if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 			*ODMMode = dm_odm_combine_mode_disabled;
1263 			*NumberOfDPP = 0;
1264 			*TotalAvailablePipesSupport = false;
1265 		} else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 				*ODMMode == dm_odm_combine_mode_4to1) {
1267 			*ODMMode = dm_odm_combine_mode_4to1;
1268 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 			*NumberOfDPP = 4;
1270 		} else {
1271 			*ODMMode = dm_odm_combine_mode_2to1;
1272 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 			*NumberOfDPP = 2;
1274 		}
1275 	}
1276 	if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 			HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 		*ODMMode = dm_odm_combine_mode_disabled;
1279 		*NumberOfDPP = 0;
1280 		*TotalAvailablePipesSupport = false;
1281 	}
1282 }
1283 
dml32_CalculateRequiredDispclk(enum odm_combine_mode ODMMode,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,double MaxDispclk)1284 double dml32_CalculateRequiredDispclk(
1285 		enum odm_combine_mode ODMMode,
1286 		double PixelClock,
1287 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 		double DISPCLKRampingMargin,
1289 		double DISPCLKDPPCLKVCOSpeed,
1290 		double MaxDispclk)
1291 {
1292 	double RequiredDispclk = 0.;
1293 	double PixelClockAfterODM;
1294 	double DISPCLKWithRampingRoundedToDFSGranularity;
1295 	double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 	double MaxDispclkRoundedDownToDFSGranularity;
1297 
1298 	if (ODMMode == dm_odm_combine_mode_4to1)
1299 		PixelClockAfterODM = PixelClock / 4;
1300 	else if (ODMMode == dm_odm_combine_mode_2to1)
1301 		PixelClockAfterODM = PixelClock / 2;
1302 	else
1303 		PixelClockAfterODM = PixelClock;
1304 
1305 
1306 	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309 
1310 	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312 
1313 	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314 
1315 	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 	else
1320 		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321 
1322 	return RequiredDispclk;
1323 }
1324 
dml32_RoundToDFSGranularity(double Clock,bool round_up,double VCOSpeed)1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 	if (Clock <= 0.0)
1328 		return 0.0;
1329 
1330 	if (round_up)
1331 		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 	else
1333 		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335 
dml32_CalculateOutputLink(double PHYCLKPerState,double PHYCLKD18PerState,double PHYCLKD32PerState,double Downspreading,bool IsMainSurfaceUsingTheIndicatedTiming,enum output_encoder_class Output,enum output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,bool DSCEnable,unsigned int OutputLinkDPLanes,enum dm_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,double * RequiresFEC,double * OutBpp,enum dm_output_type * OutputType,enum dm_output_rate * OutputRate,unsigned int * RequiredSlots)1336 void dml32_CalculateOutputLink(
1337 		double PHYCLKPerState,
1338 		double PHYCLKD18PerState,
1339 		double PHYCLKD32PerState,
1340 		double Downspreading,
1341 		bool IsMainSurfaceUsingTheIndicatedTiming,
1342 		enum output_encoder_class Output,
1343 		enum output_format_class OutputFormat,
1344 		unsigned int HTotal,
1345 		unsigned int HActive,
1346 		double PixelClockBackEnd,
1347 		double ForcedOutputLinkBPP,
1348 		unsigned int DSCInputBitPerComponent,
1349 		unsigned int NumberOfDSCSlices,
1350 		double AudioSampleRate,
1351 		unsigned int AudioSampleLayout,
1352 		enum odm_combine_mode ODMModeNoDSC,
1353 		enum odm_combine_mode ODMModeDSC,
1354 		bool DSCEnable,
1355 		unsigned int OutputLinkDPLanes,
1356 		enum dm_output_link_dp_rate OutputLinkDPRate,
1357 
1358 		/* Output */
1359 		bool *RequiresDSC,
1360 		double *RequiresFEC,
1361 		double  *OutBpp,
1362 		enum dm_output_type *OutputType,
1363 		enum dm_output_rate *OutputRate,
1364 		unsigned int *RequiredSlots)
1365 {
1366 	bool LinkDSCEnable;
1367 	unsigned int dummy;
1368 	*RequiresDSC = false;
1369 	*RequiresFEC = false;
1370 	*OutBpp = 0;
1371 	*OutputType = dm_output_type_unknown;
1372 	*OutputRate = dm_output_rate_unknown;
1373 
1374 	if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 		if (Output == dm_hdmi) {
1376 			*RequiresDSC = false;
1377 			*RequiresFEC = false;
1378 			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 					ODMModeNoDSC, ODMModeDSC, &dummy);
1382 			//OutputTypeAndRate = "HDMI";
1383 			*OutputType = dm_output_type_hdmi;
1384 
1385 		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 			if (DSCEnable == true) {
1387 				*RequiresDSC = true;
1388 				LinkDSCEnable = true;
1389 				if (Output == dm_dp || Output == dm_dp2p0)
1390 					*RequiresFEC = true;
1391 				else
1392 					*RequiresFEC = false;
1393 			} else {
1394 				*RequiresDSC = false;
1395 				LinkDSCEnable = false;
1396 				if (Output == dm_dp2p0)
1397 					*RequiresFEC = true;
1398 				else
1399 					*RequiresFEC = false;
1400 			}
1401 			if (Output == dm_dp2p0) {
1402 				*OutBpp = 0;
1403 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 						PHYCLKD32PerState >= 10000 / 32) {
1405 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 					if (*OutBpp == 0 && PHYCLKD32PerState < 13500 / 32 && DSCEnable == true &&
1411 							ForcedOutputLinkBPP == 0) {
1412 						*RequiresDSC = true;
1413 						LinkDSCEnable = true;
1414 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 								OutputFormat, DSCInputBitPerComponent,
1418 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 					}
1421 					//OutputTypeAndRate = Output & " UHBR10";
1422 					*OutputType = dm_output_type_dp2p0;
1423 					*OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 				}
1425 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 						*OutBpp == 0 && PHYCLKD32PerState >= 13500 / 32) {
1427 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 
1433 					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 							ForcedOutputLinkBPP == 0) {
1435 						*RequiresDSC = true;
1436 						LinkDSCEnable = true;
1437 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 								OutputFormat, DSCInputBitPerComponent,
1441 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 					}
1444 					//OutputTypeAndRate = Output & " UHBR13p5";
1445 					*OutputType = dm_output_type_dp2p0;
1446 					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 				}
1448 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 						*RequiresDSC = true;
1457 						LinkDSCEnable = true;
1458 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 								OutputFormat, DSCInputBitPerComponent,
1462 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 					}
1465 					//OutputTypeAndRate = Output & " UHBR20";
1466 					*OutputType = dm_output_type_dp2p0;
1467 					*OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 				}
1469 			} else {
1470 				*OutBpp = 0;
1471 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 						PHYCLKPerState >= 270) {
1473 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 							ForcedOutputLinkBPP == 0) {
1480 						*RequiresDSC = true;
1481 						LinkDSCEnable = true;
1482 						if (Output == dm_dp)
1483 							*RequiresFEC = true;
1484 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 								OutputFormat, DSCInputBitPerComponent,
1488 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 					}
1491 					//OutputTypeAndRate = Output & " HBR";
1492 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 					*OutputRate = dm_output_rate_dp_rate_hbr;
1494 				}
1495 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 						*OutBpp == 0 && PHYCLKPerState >= 540) {
1497 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502 
1503 					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 							ForcedOutputLinkBPP == 0) {
1505 						*RequiresDSC = true;
1506 						LinkDSCEnable = true;
1507 						if (Output == dm_dp)
1508 							*RequiresFEC = true;
1509 
1510 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 								OutputFormat, DSCInputBitPerComponent,
1514 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 					}
1517 					//OutputTypeAndRate = Output & " HBR2";
1518 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 					*OutputRate = dm_output_rate_dp_rate_hbr2;
1520 				}
1521 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 							ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 							RequiredSlots);
1528 
1529 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 						*RequiresDSC = true;
1531 						LinkDSCEnable = true;
1532 						if (Output == dm_dp)
1533 							*RequiresFEC = true;
1534 
1535 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 								OutputFormat, DSCInputBitPerComponent,
1539 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 					}
1542 					//OutputTypeAndRate = Output & " HBR3";
1543 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 					*OutputRate = dm_output_rate_dp_rate_hbr3;
1545 				}
1546 			}
1547 		}
1548 	}
1549 }
1550 
dml32_CalculateDPPCLK(unsigned int NumberOfActiveSurfaces,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKDPPCLKVCOSpeed,double DPPCLKUsingSingleDPP[],unsigned int DPPPerSurface[],double * GlobalDPPCLK,double Dppclk[])1551 void dml32_CalculateDPPCLK(
1552 		unsigned int NumberOfActiveSurfaces,
1553 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 		double DISPCLKDPPCLKVCOSpeed,
1555 		double DPPCLKUsingSingleDPP[],
1556 		unsigned int DPPPerSurface[],
1557 
1558 		/* output */
1559 		double *GlobalDPPCLK,
1560 		double Dppclk[])
1561 {
1562 	unsigned int k;
1563 	*GlobalDPPCLK = 0;
1564 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 	}
1568 	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572 
dml32_TruncToValidBPP(double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,unsigned int * RequiredSlots)1573 double dml32_TruncToValidBPP(
1574 		double LinkBitRate,
1575 		unsigned int Lanes,
1576 		unsigned int HTotal,
1577 		unsigned int HActive,
1578 		double PixelClock,
1579 		double DesiredBPP,
1580 		bool DSCEnable,
1581 		enum output_encoder_class Output,
1582 		enum output_format_class Format,
1583 		unsigned int DSCInputBitPerComponent,
1584 		unsigned int DSCSlices,
1585 		unsigned int AudioRate,
1586 		unsigned int AudioLayout,
1587 		enum odm_combine_mode ODMModeNoDSC,
1588 		enum odm_combine_mode ODMModeDSC,
1589 		/* Output */
1590 		unsigned int *RequiredSlots)
1591 {
1592 	double    MaxLinkBPP;
1593 	unsigned int   MinDSCBPP;
1594 	double    MaxDSCBPP;
1595 	unsigned int   NonDSCBPP0;
1596 	unsigned int   NonDSCBPP1;
1597 	unsigned int   NonDSCBPP2;
1598 
1599 	if (Format == dm_420) {
1600 		NonDSCBPP0 = 12;
1601 		NonDSCBPP1 = 15;
1602 		NonDSCBPP2 = 18;
1603 		MinDSCBPP = 6;
1604 		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16;
1605 	} else if (Format == dm_444) {
1606 		NonDSCBPP0 = 24;
1607 		NonDSCBPP1 = 30;
1608 		NonDSCBPP2 = 36;
1609 		MinDSCBPP = 8;
1610 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1611 	} else {
1612 		if (Output == dm_hdmi) {
1613 			NonDSCBPP0 = 24;
1614 			NonDSCBPP1 = 24;
1615 			NonDSCBPP2 = 24;
1616 		} else {
1617 			NonDSCBPP0 = 16;
1618 			NonDSCBPP1 = 20;
1619 			NonDSCBPP2 = 24;
1620 		}
1621 		if (Format == dm_n422) {
1622 			MinDSCBPP = 7;
1623 			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1624 		} else {
1625 			MinDSCBPP = 8;
1626 			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1627 		}
1628 	}
1629 	if (Output == dm_dp2p0) {
1630 		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1631 	} else if (DSCEnable && Output == dm_dp) {
1632 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1633 	} else {
1634 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1635 	}
1636 
1637 	if (DSCEnable) {
1638 		if (ODMModeDSC == dm_odm_combine_mode_4to1)
1639 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1640 		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1641 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1642 		else if (ODMModeDSC == dm_odm_split_mode_1to2)
1643 			MaxLinkBPP = 2 * MaxLinkBPP;
1644 	} else {
1645 		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1646 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1647 		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1648 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1649 		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1650 			MaxLinkBPP = 2 * MaxLinkBPP;
1651 	}
1652 
1653 	if (DesiredBPP == 0) {
1654 		if (DSCEnable) {
1655 			if (MaxLinkBPP < MinDSCBPP)
1656 				return BPP_INVALID;
1657 			else if (MaxLinkBPP >= MaxDSCBPP)
1658 				return MaxDSCBPP;
1659 			else
1660 				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1661 		} else {
1662 			if (MaxLinkBPP >= NonDSCBPP2)
1663 				return NonDSCBPP2;
1664 			else if (MaxLinkBPP >= NonDSCBPP1)
1665 				return NonDSCBPP1;
1666 			else if (MaxLinkBPP >= NonDSCBPP0)
1667 				return 16.0;
1668 			else
1669 				return BPP_INVALID;
1670 		}
1671 	} else {
1672 		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1673 				DesiredBPP <= NonDSCBPP0)) ||
1674 				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1675 			return BPP_INVALID;
1676 		else
1677 			return DesiredBPP;
1678 	}
1679 
1680 	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1681 
1682 	return BPP_INVALID;
1683 } // TruncToValidBPP
1684 
dml32_RequiredDTBCLK(bool DSCEnable,double PixelClock,enum output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)1685 double dml32_RequiredDTBCLK(
1686 		bool              DSCEnable,
1687 		double               PixelClock,
1688 		enum output_format_class  OutputFormat,
1689 		double               OutputBpp,
1690 		unsigned int              DSCSlices,
1691 		unsigned int                 HTotal,
1692 		unsigned int                 HActive,
1693 		unsigned int              AudioRate,
1694 		unsigned int              AudioLayout)
1695 {
1696 	double PixelWordRate;
1697 	double HCActive;
1698 	double HCBlank;
1699 	double AverageTribyteRate;
1700 	double HActiveTribyteRate;
1701 
1702 	if (DSCEnable != true)
1703 		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1704 
1705 	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1706 	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1707 			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1708 	HCBlank = 64 + 32 *
1709 			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1710 	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1711 	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1712 	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1713 }
1714 
dml32_DSCDelayRequirement(bool DSCEnabled,enum odm_combine_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum output_format_class OutputFormat,enum output_encoder_class Output,double PixelClock,double PixelClockBackEnd,double dsc_delay_factor_wa)1715 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1716 		enum odm_combine_mode ODMMode,
1717 		unsigned int DSCInputBitPerComponent,
1718 		double OutputBpp,
1719 		unsigned int HActive,
1720 		unsigned int HTotal,
1721 		unsigned int NumberOfDSCSlices,
1722 		enum output_format_class  OutputFormat,
1723 		enum output_encoder_class Output,
1724 		double PixelClock,
1725 		double PixelClockBackEnd,
1726 		double dsc_delay_factor_wa)
1727 {
1728 	unsigned int DSCDelayRequirement_val;
1729 
1730 	if (DSCEnabled == true && OutputBpp != 0) {
1731 		if (ODMMode == dm_odm_combine_mode_4to1) {
1732 			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1733 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1734 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1735 		} else if (ODMMode == dm_odm_combine_mode_2to1) {
1736 			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1737 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1738 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1739 		} else {
1740 			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1741 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1742 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1743 		}
1744 
1745 		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1746 				dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1747 
1748 		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1749 
1750 	} else {
1751 		DSCDelayRequirement_val = 0;
1752 	}
1753 
1754 #ifdef __DML_VBA_DEBUG__
1755 	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1756 	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1757 	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1758 	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1759 	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1760 	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1761 	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1762 #endif
1763 
1764 	return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1765 }
1766 
dml32_CalculateSurfaceSizeInMall(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],bool DCCEnable[],bool ViewportStationary[],unsigned int ViewportXStartY[],unsigned int ViewportYStartY[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int ViewportWidthY[],unsigned int ViewportHeightY[],unsigned int BytesPerPixelY[],unsigned int ViewportWidthC[],unsigned int ViewportHeightC[],unsigned int BytesPerPixelC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int DCCMetaPitchY[],unsigned int DCCMetaPitchC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)1767 void dml32_CalculateSurfaceSizeInMall(
1768 		unsigned int NumberOfActiveSurfaces,
1769 		unsigned int MALLAllocatedForDCN,
1770 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1771 		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1772 		bool DCCEnable[],
1773 		bool ViewportStationary[],
1774 		unsigned int ViewportXStartY[],
1775 		unsigned int ViewportYStartY[],
1776 		unsigned int ViewportXStartC[],
1777 		unsigned int ViewportYStartC[],
1778 		unsigned int ViewportWidthY[],
1779 		unsigned int ViewportHeightY[],
1780 		unsigned int BytesPerPixelY[],
1781 		unsigned int ViewportWidthC[],
1782 		unsigned int ViewportHeightC[],
1783 		unsigned int BytesPerPixelC[],
1784 		unsigned int SurfaceWidthY[],
1785 		unsigned int SurfaceWidthC[],
1786 		unsigned int SurfaceHeightY[],
1787 		unsigned int SurfaceHeightC[],
1788 		unsigned int Read256BytesBlockWidthY[],
1789 		unsigned int Read256BytesBlockWidthC[],
1790 		unsigned int Read256BytesBlockHeightY[],
1791 		unsigned int Read256BytesBlockHeightC[],
1792 		unsigned int ReadBlockWidthY[],
1793 		unsigned int ReadBlockWidthC[],
1794 		unsigned int ReadBlockHeightY[],
1795 		unsigned int ReadBlockHeightC[],
1796 		unsigned int DCCMetaPitchY[],
1797 		unsigned int DCCMetaPitchC[],
1798 
1799 		/* Output */
1800 		unsigned int    SurfaceSizeInMALL[],
1801 		bool *ExceededMALLSize)
1802 {
1803 	unsigned int k;
1804 	unsigned int TotalSurfaceSizeInMALLForSS = 0;
1805 	unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1806 	unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1807 
1808 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1809 		if (ViewportStationary[k]) {
1810 			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1811 					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1812 						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1813 						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1814 						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1815 						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1816 						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1817 
1818 			if (ReadBlockWidthC[k] > 0) {
1819 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1820 						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1821 							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1822 							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1823 							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1824 							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1825 							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1826 							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1827 							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1828 							BytesPerPixelC[k];
1829 			}
1830 			if (DCCEnable[k] == true) {
1831 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1832 						(dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
1833 							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1834 							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1835 							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1836 							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1837 							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1838 							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1839 							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
1840 							Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1841 				if (Read256BytesBlockWidthC[k] > 0) {
1842 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1843 							dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
1844 								Read256BytesBlockWidthC[k]),
1845 								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1846 								* Read256BytesBlockWidthC[k] - 1, 8 *
1847 								Read256BytesBlockWidthC[k]) -
1848 								dml_floor(ViewportXStartC[k], 8 *
1849 								Read256BytesBlockWidthC[k])) *
1850 								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1851 								Read256BytesBlockHeightC[k]),
1852 								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1853 								8 * Read256BytesBlockHeightC[k] - 1, 8 *
1854 								Read256BytesBlockHeightC[k]) -
1855 								dml_floor(ViewportYStartC[k], 8 *
1856 								Read256BytesBlockHeightC[k])) *
1857 								BytesPerPixelC[k] / 256;
1858 				}
1859 			}
1860 		} else {
1861 			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1862 					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1863 					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1864 							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1865 							BytesPerPixelY[k];
1866 			if (ReadBlockWidthC[k] > 0) {
1867 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1868 						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1869 								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1870 						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1871 								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1872 								BytesPerPixelC[k];
1873 			}
1874 			if (DCCEnable[k] == true) {
1875 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1876 						(dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
1877 								Read256BytesBlockWidthY[k] - 1), 8 *
1878 								Read256BytesBlockWidthY[k]) *
1879 						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1880 								Read256BytesBlockHeightY[k] - 1), 8 *
1881 								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1882 
1883 				if (Read256BytesBlockWidthC[k] > 0) {
1884 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1885 							dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
1886 									Read256BytesBlockWidthC[k] - 1), 8 *
1887 									Read256BytesBlockWidthC[k]) *
1888 							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1889 									Read256BytesBlockHeightC[k] - 1), 8 *
1890 									Read256BytesBlockHeightC[k]) *
1891 									BytesPerPixelC[k] / 256;
1892 				}
1893 			}
1894 		}
1895 	}
1896 
1897 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1898 		/* SS and Subvp counted separate as they are never used at the same time */
1899 		if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1900 			TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1901 		else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1902 			TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1903 	}
1904 	*ExceededMALLSize =  (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1905 							(TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1906 } // CalculateSurfaceSizeInMall
1907 
dml32_CalculateVMRowAndSwath(unsigned int NumberOfActiveSurfaces,DmlPipe myPipe[],unsigned int SurfaceSizeInMALL[],unsigned int PTEBufferSizeInRequestsLuma,unsigned int PTEBufferSizeInRequestsChroma,unsigned int DCCMetaBufferSizeBytes,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int MALLAllocatedForDCN,double SwathWidthY[],double SwathWidthC[],bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes[],unsigned int HostVMMinPageSize,bool PTEBufferSizeNotExceeded[],bool DCCMetaBufferSizeNotExceeded[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int dpte_row_height_luma[],unsigned int dpte_row_height_chroma[],unsigned int dpte_row_height_linear_luma[],unsigned int dpte_row_height_linear_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int vm_group_bytes[],unsigned int dpte_group_bytes[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PTERequestSizeY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int PTERequestSizeC[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_c[],double PrefetchSourceLinesY[],double PrefetchSourceLinesC[],double VInitPreFillY[],double VInitPreFillC[],unsigned int MaxNumSwathY[],unsigned int MaxNumSwathC[],double meta_row_bw[],double dpte_row_bw[],double PixelPTEBytesPerRow[],double PDEAndMetaPTEBytesFrame[],double MetaRowByte[],bool use_one_row_for_frame[],bool use_one_row_for_frame_flip[],bool UsesMALLForStaticScreen[],bool PTE_BUFFER_MODE[],unsigned int BIGK_FRAGMENT_SIZE[])1908 void dml32_CalculateVMRowAndSwath(
1909 		unsigned int NumberOfActiveSurfaces,
1910 		DmlPipe myPipe[],
1911 		unsigned int SurfaceSizeInMALL[],
1912 		unsigned int PTEBufferSizeInRequestsLuma,
1913 		unsigned int PTEBufferSizeInRequestsChroma,
1914 		unsigned int DCCMetaBufferSizeBytes,
1915 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1916 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1917 		unsigned int MALLAllocatedForDCN,
1918 		double SwathWidthY[],
1919 		double SwathWidthC[],
1920 		bool GPUVMEnable,
1921 		bool HostVMEnable,
1922 		unsigned int HostVMMaxNonCachedPageTableLevels,
1923 		unsigned int GPUVMMaxPageTableLevels,
1924 		unsigned int GPUVMMinPageSizeKBytes[],
1925 		unsigned int HostVMMinPageSize,
1926 
1927 		/* Output */
1928 		bool PTEBufferSizeNotExceeded[],
1929 		bool DCCMetaBufferSizeNotExceeded[],
1930 		unsigned int dpte_row_width_luma_ub[],
1931 		unsigned int dpte_row_width_chroma_ub[],
1932 		unsigned int dpte_row_height_luma[],
1933 		unsigned int dpte_row_height_chroma[],
1934 		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1935 		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1936 		unsigned int meta_req_width[],
1937 		unsigned int meta_req_width_chroma[],
1938 		unsigned int meta_req_height[],
1939 		unsigned int meta_req_height_chroma[],
1940 		unsigned int meta_row_width[],
1941 		unsigned int meta_row_width_chroma[],
1942 		unsigned int meta_row_height[],
1943 		unsigned int meta_row_height_chroma[],
1944 		unsigned int vm_group_bytes[],
1945 		unsigned int dpte_group_bytes[],
1946 		unsigned int PixelPTEReqWidthY[],
1947 		unsigned int PixelPTEReqHeightY[],
1948 		unsigned int PTERequestSizeY[],
1949 		unsigned int PixelPTEReqWidthC[],
1950 		unsigned int PixelPTEReqHeightC[],
1951 		unsigned int PTERequestSizeC[],
1952 		unsigned int dpde0_bytes_per_frame_ub_l[],
1953 		unsigned int meta_pte_bytes_per_frame_ub_l[],
1954 		unsigned int dpde0_bytes_per_frame_ub_c[],
1955 		unsigned int meta_pte_bytes_per_frame_ub_c[],
1956 		double PrefetchSourceLinesY[],
1957 		double PrefetchSourceLinesC[],
1958 		double VInitPreFillY[],
1959 		double VInitPreFillC[],
1960 		unsigned int MaxNumSwathY[],
1961 		unsigned int MaxNumSwathC[],
1962 		double meta_row_bw[],
1963 		double dpte_row_bw[],
1964 		double PixelPTEBytesPerRow[],
1965 		double PDEAndMetaPTEBytesFrame[],
1966 		double MetaRowByte[],
1967 		bool use_one_row_for_frame[],
1968 		bool use_one_row_for_frame_flip[],
1969 		bool UsesMALLForStaticScreen[],
1970 		bool PTE_BUFFER_MODE[],
1971 		unsigned int BIGK_FRAGMENT_SIZE[])
1972 {
1973 	unsigned int k;
1974 	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1975 	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1976 	unsigned int PDEAndMetaPTEBytesFrameY;
1977 	unsigned int PDEAndMetaPTEBytesFrameC;
1978 	unsigned int MetaRowByteY[DC__NUM_DPP__MAX];
1979 	unsigned int MetaRowByteC[DC__NUM_DPP__MAX];
1980 	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1981 	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1982 	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1983 	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1984 	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1985 	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1986 	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1987 	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1988 	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1989 
1990 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1991 		if (HostVMEnable == true) {
1992 			vm_group_bytes[k] = 512;
1993 			dpte_group_bytes[k] = 512;
1994 		} else if (GPUVMEnable == true) {
1995 			vm_group_bytes[k] = 2048;
1996 			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1997 				dpte_group_bytes[k] = 512;
1998 			else
1999 				dpte_group_bytes[k] = 2048;
2000 		} else {
2001 			vm_group_bytes[k] = 0;
2002 			dpte_group_bytes[k] = 0;
2003 		}
2004 
2005 		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2006 				myPipe[k].SourcePixelFormat == dm_420_12 ||
2007 				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2008 			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2009 					!IsVertical(myPipe[k].SourceRotation)) {
2010 				PTEBufferSizeInRequestsForLuma[k] =
2011 						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2012 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2013 			} else {
2014 				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2015 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2016 			}
2017 
2018 			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2019 					myPipe[k].ViewportStationary,
2020 					myPipe[k].DCCEnable,
2021 					myPipe[k].DPPPerSurface,
2022 					myPipe[k].BlockHeight256BytesC,
2023 					myPipe[k].BlockWidth256BytesC,
2024 					myPipe[k].SourcePixelFormat,
2025 					myPipe[k].SurfaceTiling,
2026 					myPipe[k].BytePerPixelC,
2027 					myPipe[k].SourceRotation,
2028 					SwathWidthC[k],
2029 					myPipe[k].ViewportHeightChroma,
2030 					myPipe[k].ViewportXStartC,
2031 					myPipe[k].ViewportYStartC,
2032 					GPUVMEnable,
2033 					HostVMEnable,
2034 					HostVMMaxNonCachedPageTableLevels,
2035 					GPUVMMaxPageTableLevels,
2036 					GPUVMMinPageSizeKBytes[k],
2037 					HostVMMinPageSize,
2038 					PTEBufferSizeInRequestsForChroma[k],
2039 					myPipe[k].PitchC,
2040 					myPipe[k].DCCMetaPitchC,
2041 					myPipe[k].BlockWidthC,
2042 					myPipe[k].BlockHeightC,
2043 
2044 					/* Output */
2045 					&MetaRowByteC[k],
2046 					&PixelPTEBytesPerRowC[k],
2047 					&dpte_row_width_chroma_ub[k],
2048 					&dpte_row_height_chroma[k],
2049 					&dpte_row_height_linear_chroma[k],
2050 					&PixelPTEBytesPerRowC_one_row_per_frame[k],
2051 					&dpte_row_width_chroma_ub_one_row_per_frame[k],
2052 					&dpte_row_height_chroma_one_row_per_frame[k],
2053 					&meta_req_width_chroma[k],
2054 					&meta_req_height_chroma[k],
2055 					&meta_row_width_chroma[k],
2056 					&meta_row_height_chroma[k],
2057 					&PixelPTEReqWidthC[k],
2058 					&PixelPTEReqHeightC[k],
2059 					&PTERequestSizeC[k],
2060 					&dpde0_bytes_per_frame_ub_c[k],
2061 					&meta_pte_bytes_per_frame_ub_c[k]);
2062 
2063 			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2064 					myPipe[k].VRatioChroma,
2065 					myPipe[k].VTapsChroma,
2066 					myPipe[k].InterlaceEnable,
2067 					myPipe[k].ProgressiveToInterlaceUnitInOPP,
2068 					myPipe[k].SwathHeightC,
2069 					myPipe[k].SourceRotation,
2070 					myPipe[k].ViewportStationary,
2071 					SwathWidthC[k],
2072 					myPipe[k].ViewportHeightChroma,
2073 					myPipe[k].ViewportXStartC,
2074 					myPipe[k].ViewportYStartC,
2075 
2076 					/* Output */
2077 					&VInitPreFillC[k],
2078 					&MaxNumSwathC[k]);
2079 		} else {
2080 			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2081 			PTEBufferSizeInRequestsForChroma[k] = 0;
2082 			PixelPTEBytesPerRowC[k] = 0;
2083 			PDEAndMetaPTEBytesFrameC = 0;
2084 			MetaRowByteC[k] = 0;
2085 			MaxNumSwathC[k] = 0;
2086 			PrefetchSourceLinesC[k] = 0;
2087 			dpte_row_height_chroma_one_row_per_frame[k] = 0;
2088 			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2089 			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2090 		}
2091 
2092 		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2093 				myPipe[k].ViewportStationary,
2094 				myPipe[k].DCCEnable,
2095 				myPipe[k].DPPPerSurface,
2096 				myPipe[k].BlockHeight256BytesY,
2097 				myPipe[k].BlockWidth256BytesY,
2098 				myPipe[k].SourcePixelFormat,
2099 				myPipe[k].SurfaceTiling,
2100 				myPipe[k].BytePerPixelY,
2101 				myPipe[k].SourceRotation,
2102 				SwathWidthY[k],
2103 				myPipe[k].ViewportHeight,
2104 				myPipe[k].ViewportXStart,
2105 				myPipe[k].ViewportYStart,
2106 				GPUVMEnable,
2107 				HostVMEnable,
2108 				HostVMMaxNonCachedPageTableLevels,
2109 				GPUVMMaxPageTableLevels,
2110 				GPUVMMinPageSizeKBytes[k],
2111 				HostVMMinPageSize,
2112 				PTEBufferSizeInRequestsForLuma[k],
2113 				myPipe[k].PitchY,
2114 				myPipe[k].DCCMetaPitchY,
2115 				myPipe[k].BlockWidthY,
2116 				myPipe[k].BlockHeightY,
2117 
2118 				/* Output */
2119 				&MetaRowByteY[k],
2120 				&PixelPTEBytesPerRowY[k],
2121 				&dpte_row_width_luma_ub[k],
2122 				&dpte_row_height_luma[k],
2123 				&dpte_row_height_linear_luma[k],
2124 				&PixelPTEBytesPerRowY_one_row_per_frame[k],
2125 				&dpte_row_width_luma_ub_one_row_per_frame[k],
2126 				&dpte_row_height_luma_one_row_per_frame[k],
2127 				&meta_req_width[k],
2128 				&meta_req_height[k],
2129 				&meta_row_width[k],
2130 				&meta_row_height[k],
2131 				&PixelPTEReqWidthY[k],
2132 				&PixelPTEReqHeightY[k],
2133 				&PTERequestSizeY[k],
2134 				&dpde0_bytes_per_frame_ub_l[k],
2135 				&meta_pte_bytes_per_frame_ub_l[k]);
2136 
2137 		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2138 				myPipe[k].VRatio,
2139 				myPipe[k].VTaps,
2140 				myPipe[k].InterlaceEnable,
2141 				myPipe[k].ProgressiveToInterlaceUnitInOPP,
2142 				myPipe[k].SwathHeightY,
2143 				myPipe[k].SourceRotation,
2144 				myPipe[k].ViewportStationary,
2145 				SwathWidthY[k],
2146 				myPipe[k].ViewportHeight,
2147 				myPipe[k].ViewportXStart,
2148 				myPipe[k].ViewportYStart,
2149 
2150 				/* Output */
2151 				&VInitPreFillY[k],
2152 				&MaxNumSwathY[k]);
2153 
2154 		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2155 		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2156 
2157 		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2158 				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2159 			PTEBufferSizeNotExceeded[k] = true;
2160 		} else {
2161 			PTEBufferSizeNotExceeded[k] = false;
2162 		}
2163 
2164 		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2165 			PTEBufferSizeInRequestsForLuma[k] &&
2166 			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2167 	}
2168 
2169 	dml32_CalculateMALLUseForStaticScreen(
2170 			NumberOfActiveSurfaces,
2171 			MALLAllocatedForDCN,
2172 			UseMALLForStaticScreen,   // mode
2173 			SurfaceSizeInMALL,
2174 			one_row_per_frame_fits_in_buffer,
2175 			/* Output */
2176 			UsesMALLForStaticScreen); // boolen
2177 
2178 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2179 		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2180 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2181 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2182 				(GPUVMMinPageSizeKBytes[k] > 64);
2183 		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2184 	}
2185 
2186 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2187 #ifdef __DML_VBA_DEBUG__
2188 		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2189 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2190 #endif
2191 		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2192 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2193 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2194 				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2195 
2196 		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2197 				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2198 
2199 		if (use_one_row_for_frame[k]) {
2200 			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2201 			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2202 			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2203 			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2204 			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2205 			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2206 			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2207 		}
2208 
2209 		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2210 			DCCMetaBufferSizeNotExceeded[k] = true;
2211 		else
2212 			DCCMetaBufferSizeNotExceeded[k] = false;
2213 
2214 		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2215 		if (use_one_row_for_frame[k])
2216 			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2217 
2218 		dml32_CalculateRowBandwidth(
2219 				GPUVMEnable,
2220 				myPipe[k].SourcePixelFormat,
2221 				myPipe[k].VRatio,
2222 				myPipe[k].VRatioChroma,
2223 				myPipe[k].DCCEnable,
2224 				myPipe[k].HTotal / myPipe[k].PixelClock,
2225 				MetaRowByteY[k], MetaRowByteC[k],
2226 				meta_row_height[k],
2227 				meta_row_height_chroma[k],
2228 				PixelPTEBytesPerRowY[k],
2229 				PixelPTEBytesPerRowC[k],
2230 				dpte_row_height_luma[k],
2231 				dpte_row_height_chroma[k],
2232 
2233 				/* Output */
2234 				&meta_row_bw[k],
2235 				&dpte_row_bw[k]);
2236 #ifdef __DML_VBA_DEBUG__
2237 		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2238 		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2239 				__func__, k, use_one_row_for_frame_flip[k]);
2240 		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2241 				__func__, k, UseMALLForPStateChange[k]);
2242 		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2243 		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2244 				__func__, k, dpte_row_width_luma_ub[k]);
2245 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2246 		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2247 				__func__, k, dpte_row_height_chroma[k]);
2248 		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2249 				__func__, k, dpte_row_width_chroma_ub[k]);
2250 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2251 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2252 		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2253 				__func__, k, PTEBufferSizeNotExceeded[k]);
2254 		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2255 		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2256 #endif
2257 	}
2258 } // CalculateVMRowAndSwath
2259 
dml32_CalculateVMAndRowBytes(bool ViewportStationary,bool DCCEnable,unsigned int NumberOfDPPs,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum dm_rotation_angle SourceRotation,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int MacroTileWidth,unsigned int MacroTileHeight,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * dpte_row_height_linear,unsigned int * PixelPTEBytesPerRow_one_row_per_frame,unsigned int * dpte_row_width_ub_one_row_per_frame,unsigned int * dpte_row_height_one_row_per_frame,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)2260 unsigned int dml32_CalculateVMAndRowBytes(
2261 		bool ViewportStationary,
2262 		bool DCCEnable,
2263 		unsigned int NumberOfDPPs,
2264 		unsigned int BlockHeight256Bytes,
2265 		unsigned int BlockWidth256Bytes,
2266 		enum source_format_class SourcePixelFormat,
2267 		unsigned int SurfaceTiling,
2268 		unsigned int BytePerPixel,
2269 		enum dm_rotation_angle SourceRotation,
2270 		double SwathWidth,
2271 		unsigned int ViewportHeight,
2272 		unsigned int    ViewportXStart,
2273 		unsigned int    ViewportYStart,
2274 		bool GPUVMEnable,
2275 		bool HostVMEnable,
2276 		unsigned int HostVMMaxNonCachedPageTableLevels,
2277 		unsigned int GPUVMMaxPageTableLevels,
2278 		unsigned int GPUVMMinPageSizeKBytes,
2279 		unsigned int HostVMMinPageSize,
2280 		unsigned int PTEBufferSizeInRequests,
2281 		unsigned int Pitch,
2282 		unsigned int DCCMetaPitch,
2283 		unsigned int MacroTileWidth,
2284 		unsigned int MacroTileHeight,
2285 
2286 		/* Output */
2287 		unsigned int *MetaRowByte,
2288 		unsigned int *PixelPTEBytesPerRow,
2289 		unsigned int    *dpte_row_width_ub,
2290 		unsigned int *dpte_row_height,
2291 		unsigned int *dpte_row_height_linear,
2292 		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2293 		unsigned int    *dpte_row_width_ub_one_row_per_frame,
2294 		unsigned int    *dpte_row_height_one_row_per_frame,
2295 		unsigned int *MetaRequestWidth,
2296 		unsigned int *MetaRequestHeight,
2297 		unsigned int *meta_row_width,
2298 		unsigned int *meta_row_height,
2299 		unsigned int *PixelPTEReqWidth,
2300 		unsigned int *PixelPTEReqHeight,
2301 		unsigned int *PTERequestSize,
2302 		unsigned int    *DPDE0BytesFrame,
2303 		unsigned int    *MetaPTEBytesFrame)
2304 {
2305 	unsigned int MPDEBytesFrame;
2306 	unsigned int DCCMetaSurfaceBytes;
2307 	unsigned int ExtraDPDEBytesFrame;
2308 	unsigned int PDEAndMetaPTEBytesFrame;
2309 	unsigned int HostVMDynamicLevels = 0;
2310 	unsigned int    MacroTileSizeBytes;
2311 	unsigned int    vp_height_meta_ub;
2312 	unsigned int    vp_height_dpte_ub;
2313 	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2314 
2315 	if (GPUVMEnable == true && HostVMEnable == true) {
2316 		if (HostVMMinPageSize < 2048)
2317 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2318 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2319 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2320 		else
2321 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2322 	}
2323 
2324 	*MetaRequestHeight = 8 * BlockHeight256Bytes;
2325 	*MetaRequestWidth = 8 * BlockWidth256Bytes;
2326 	if (SurfaceTiling == dm_sw_linear) {
2327 		*meta_row_height = 32;
2328 		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2329 				- dml_floor(ViewportXStart, *MetaRequestWidth);
2330 	} else if (!IsVertical(SourceRotation)) {
2331 		*meta_row_height = *MetaRequestHeight;
2332 		if (ViewportStationary && NumberOfDPPs == 1) {
2333 			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2334 					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2335 		} else {
2336 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2337 		}
2338 		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2339 	} else {
2340 		*meta_row_height = *MetaRequestWidth;
2341 		if (ViewportStationary && NumberOfDPPs == 1) {
2342 			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2343 					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2344 		} else {
2345 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2346 		}
2347 		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2348 	}
2349 
2350 	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2351 		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2352 				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2353 	} else if (!IsVertical(SourceRotation)) {
2354 		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2355 	} else {
2356 		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2357 	}
2358 
2359 	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2360 
2361 	if (GPUVMEnable == true) {
2362 		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2363 				(8 * 4.0 * 1024), 1) + 1) * 64;
2364 		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2365 	} else {
2366 		*MetaPTEBytesFrame = 0;
2367 		MPDEBytesFrame = 0;
2368 	}
2369 
2370 	if (DCCEnable != true) {
2371 		*MetaPTEBytesFrame = 0;
2372 		MPDEBytesFrame = 0;
2373 		*MetaRowByte = 0;
2374 	}
2375 
2376 	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2377 
2378 	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2379 		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2380 			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2381 					MacroTileHeight - 1, MacroTileHeight) -
2382 					dml_floor(ViewportYStart, MacroTileHeight);
2383 		} else if (!IsVertical(SourceRotation)) {
2384 			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2385 		} else {
2386 			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2387 		}
2388 		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2389 				(8 * 2097152), 1) + 1);
2390 		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2391 	} else {
2392 		*DPDE0BytesFrame = 0;
2393 		ExtraDPDEBytesFrame = 0;
2394 		vp_height_dpte_ub = 0;
2395 	}
2396 
2397 	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2398 
2399 #ifdef __DML_VBA_DEBUG__
2400 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2401 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2402 	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2403 	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2404 	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2405 	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2406 	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2407 	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2408 	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2409 	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2410 	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2411 	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2412 	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2413 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2414 	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2415 	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2416 	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2417 #endif
2418 
2419 	if (HostVMEnable == true)
2420 		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2421 
2422 	if (SurfaceTiling == dm_sw_linear) {
2423 		*PixelPTEReqHeight = 1;
2424 		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2425 		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2426 		*PTERequestSize = 64;
2427 	} else if (GPUVMMinPageSizeKBytes == 4) {
2428 		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2429 		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2430 		*PTERequestSize = 128;
2431 	} else {
2432 		*PixelPTEReqHeight = MacroTileHeight;
2433 		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2434 		*PTERequestSize = 64;
2435 	}
2436 #ifdef __DML_VBA_DEBUG__
2437 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2438 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2439 	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2440 	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2441 	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2442 	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2443 	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2444 #endif
2445 
2446 	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2447 	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2448 			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2449 					(double) *PixelPTEReqWidth;
2450 	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2451 			*PTERequestSize;
2452 
2453 	if (SurfaceTiling == dm_sw_linear) {
2454 		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2455 				*PixelPTEReqWidth / Pitch), 1));
2456 #ifdef __DML_VBA_DEBUG__
2457 		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2458 				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2459 		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2460 				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2461 		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2462 				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2463 		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2464 				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2465 						*PixelPTEReqWidth / Pitch), 1));
2466 		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2467 #endif
2468 		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2469 				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2470 		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2471 
2472 		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2473 		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2474 				PixelPTEReqWidth_linear / Pitch), 1);
2475 		if (*dpte_row_height_linear > 128)
2476 			*dpte_row_height_linear = 128;
2477 
2478 	} else if (!IsVertical(SourceRotation)) {
2479 		*dpte_row_height = *PixelPTEReqHeight;
2480 
2481 		if (GPUVMMinPageSizeKBytes > 64) {
2482 			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2483 					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2484 		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
2485 			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2486 					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2487 					dml_floor(ViewportXStart, *PixelPTEReqWidth);
2488 		} else {
2489 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2490 					*PixelPTEReqWidth;
2491 		}
2492 
2493 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2494 	} else {
2495 		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2496 
2497 		if (ViewportStationary && (NumberOfDPPs == 1)) {
2498 			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2499 					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2500 		} else {
2501 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2502 					* *PixelPTEReqHeight;
2503 		}
2504 
2505 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2506 	}
2507 
2508 	if (GPUVMEnable != true)
2509 		*PixelPTEBytesPerRow = 0;
2510 	if (HostVMEnable == true)
2511 		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2512 
2513 #ifdef __DML_VBA_DEBUG__
2514 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2515 	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2516 	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2517 	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2518 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2519 	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2520 	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2521 	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2522 			__func__, *dpte_row_width_ub_one_row_per_frame);
2523 	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2524 			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
2525 	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2526 			*MetaPTEBytesFrame);
2527 #endif
2528 
2529 	return PDEAndMetaPTEBytesFrame;
2530 } // CalculateVMAndRowBytes
2531 
dml32_CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dm_rotation_angle SourceRotation,bool ViewportStationary,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)2532 double dml32_CalculatePrefetchSourceLines(
2533 		double VRatio,
2534 		unsigned int VTaps,
2535 		bool Interlace,
2536 		bool ProgressiveToInterlaceUnitInOPP,
2537 		unsigned int SwathHeight,
2538 		enum dm_rotation_angle SourceRotation,
2539 		bool ViewportStationary,
2540 		double SwathWidth,
2541 		unsigned int ViewportHeight,
2542 		unsigned int ViewportXStart,
2543 		unsigned int ViewportYStart,
2544 
2545 		/* Output */
2546 		double *VInitPreFill,
2547 		unsigned int *MaxNumSwath)
2548 {
2549 
2550 	unsigned int vp_start_rot;
2551 	unsigned int sw0_tmp;
2552 	unsigned int MaxPartialSwath;
2553 	double numLines;
2554 
2555 #ifdef __DML_VBA_DEBUG__
2556 	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2557 	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2558 	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2559 	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2560 	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2561 	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2562 #endif
2563 	if (ProgressiveToInterlaceUnitInOPP)
2564 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2565 	else
2566 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2567 
2568 	if (ViewportStationary) {
2569 		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2570 			vp_start_rot = SwathHeight -
2571 					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2572 		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2573 			vp_start_rot = ViewportXStart;
2574 		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2575 			vp_start_rot = SwathHeight -
2576 					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2577 		} else {
2578 			vp_start_rot = ViewportYStart;
2579 		}
2580 		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2581 		if (sw0_tmp < *VInitPreFill)
2582 			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2583 		else
2584 			*MaxNumSwath = 1;
2585 		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2586 	} else {
2587 		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2588 		if (*VInitPreFill > 1)
2589 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2590 		else
2591 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2592 	}
2593 	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2594 
2595 #ifdef __DML_VBA_DEBUG__
2596 	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2597 	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2598 	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2599 	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2600 	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2601 #endif
2602 	return numLines;
2603 
2604 } // CalculatePrefetchSourceLines
2605 
dml32_CalculateMALLUseForStaticScreen(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCNFinal,enum dm_use_mall_for_static_screen_mode * UseMALLForStaticScreen,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool UsesMALLForStaticScreen[])2606 void dml32_CalculateMALLUseForStaticScreen(
2607 		unsigned int NumberOfActiveSurfaces,
2608 		unsigned int MALLAllocatedForDCNFinal,
2609 		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2610 		unsigned int SurfaceSizeInMALL[],
2611 		bool one_row_per_frame_fits_in_buffer[],
2612 
2613 		/* output */
2614 		bool UsesMALLForStaticScreen[])
2615 {
2616 	unsigned int k;
2617 	unsigned int SurfaceToAddToMALL;
2618 	bool CanAddAnotherSurfaceToMALL;
2619 	unsigned int TotalSurfaceSizeInMALL;
2620 
2621 	TotalSurfaceSizeInMALL = 0;
2622 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2623 		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2624 		if (UsesMALLForStaticScreen[k])
2625 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2626 #ifdef __DML_VBA_DEBUG__
2627 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2628 		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2629 #endif
2630 	}
2631 
2632 	SurfaceToAddToMALL = 0;
2633 	CanAddAnotherSurfaceToMALL = true;
2634 	while (CanAddAnotherSurfaceToMALL) {
2635 		CanAddAnotherSurfaceToMALL = false;
2636 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2637 			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2638 					!UsesMALLForStaticScreen[k] &&
2639 					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2640 					one_row_per_frame_fits_in_buffer[k] &&
2641 					(!CanAddAnotherSurfaceToMALL ||
2642 					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2643 				CanAddAnotherSurfaceToMALL = true;
2644 				SurfaceToAddToMALL = k;
2645 #ifdef __DML_VBA_DEBUG__
2646 				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2647 						__func__, k, UseMALLForStaticScreen[k]);
2648 #endif
2649 			}
2650 		}
2651 		if (CanAddAnotherSurfaceToMALL) {
2652 			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2653 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2654 
2655 #ifdef __DML_VBA_DEBUG__
2656 			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2657 			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2658 #endif
2659 
2660 		}
2661 	}
2662 }
2663 
dml32_CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)2664 void dml32_CalculateRowBandwidth(
2665 		bool GPUVMEnable,
2666 		enum source_format_class SourcePixelFormat,
2667 		double VRatio,
2668 		double VRatioChroma,
2669 		bool DCCEnable,
2670 		double LineTime,
2671 		unsigned int MetaRowByteLuma,
2672 		unsigned int MetaRowByteChroma,
2673 		unsigned int meta_row_height_luma,
2674 		unsigned int meta_row_height_chroma,
2675 		unsigned int PixelPTEBytesPerRowLuma,
2676 		unsigned int PixelPTEBytesPerRowChroma,
2677 		unsigned int dpte_row_height_luma,
2678 		unsigned int dpte_row_height_chroma,
2679 		/* Output */
2680 		double *meta_row_bw,
2681 		double *dpte_row_bw)
2682 {
2683 	if (DCCEnable != true) {
2684 		*meta_row_bw = 0;
2685 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2686 			SourcePixelFormat == dm_rgbe_alpha) {
2687 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2688 				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2689 	} else {
2690 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2691 	}
2692 
2693 	if (GPUVMEnable != true) {
2694 		*dpte_row_bw = 0;
2695 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2696 			SourcePixelFormat == dm_rgbe_alpha) {
2697 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2698 				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2699 	} else {
2700 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2701 	}
2702 }
2703 
dml32_CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)2704 double dml32_CalculateUrgentLatency(
2705 		double UrgentLatencyPixelDataOnly,
2706 		double UrgentLatencyPixelMixedWithVMData,
2707 		double UrgentLatencyVMDataOnly,
2708 		bool   DoUrgentLatencyAdjustment,
2709 		double UrgentLatencyAdjustmentFabricClockComponent,
2710 		double UrgentLatencyAdjustmentFabricClockReference,
2711 		double FabricClock)
2712 {
2713 	double   ret;
2714 
2715 	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2716 	if (DoUrgentLatencyAdjustment == true) {
2717 		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2718 				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2719 	}
2720 	return ret;
2721 }
2722 
dml32_CalculateUrgentBurstFactor(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)2723 void dml32_CalculateUrgentBurstFactor(
2724 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2725 		unsigned int    swath_width_luma_ub,
2726 		unsigned int    swath_width_chroma_ub,
2727 		unsigned int SwathHeightY,
2728 		unsigned int SwathHeightC,
2729 		double  LineTime,
2730 		double  UrgentLatency,
2731 		double  CursorBufferSize,
2732 		unsigned int CursorWidth,
2733 		unsigned int CursorBPP,
2734 		double  VRatio,
2735 		double  VRatioC,
2736 		double  BytePerPixelInDETY,
2737 		double  BytePerPixelInDETC,
2738 		unsigned int    DETBufferSizeY,
2739 		unsigned int    DETBufferSizeC,
2740 		/* Output */
2741 		double *UrgentBurstFactorCursor,
2742 		double *UrgentBurstFactorLuma,
2743 		double *UrgentBurstFactorChroma,
2744 		bool   *NotEnoughUrgentLatencyHiding)
2745 {
2746 	double       LinesInDETLuma;
2747 	double       LinesInDETChroma;
2748 	unsigned int LinesInCursorBuffer;
2749 	double       CursorBufferSizeInTime;
2750 	double       DETBufferSizeInTimeLuma;
2751 	double       DETBufferSizeInTimeChroma;
2752 
2753 	*NotEnoughUrgentLatencyHiding = 0;
2754 
2755 	if (CursorWidth > 0) {
2756 		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2757 				(CursorWidth * CursorBPP / 8.0)), 1.0);
2758 		if (VRatio > 0) {
2759 			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2760 			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2761 				*NotEnoughUrgentLatencyHiding = 1;
2762 				*UrgentBurstFactorCursor = 0;
2763 			} else {
2764 				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
2765 						(CursorBufferSizeInTime - UrgentLatency);
2766 			}
2767 		} else {
2768 			*UrgentBurstFactorCursor = 1;
2769 		}
2770 	}
2771 
2772 	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2773 			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2774 
2775 	if (VRatio > 0) {
2776 		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2777 		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2778 			*NotEnoughUrgentLatencyHiding = 1;
2779 			*UrgentBurstFactorLuma = 0;
2780 		} else {
2781 			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2782 		}
2783 	} else {
2784 		*UrgentBurstFactorLuma = 1;
2785 	}
2786 
2787 	if (BytePerPixelInDETC > 0) {
2788 		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2789 					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2790 					/ swath_width_chroma_ub;
2791 
2792 		if (VRatio > 0) {
2793 			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2794 			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2795 				*NotEnoughUrgentLatencyHiding = 1;
2796 				*UrgentBurstFactorChroma = 0;
2797 			} else {
2798 				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2799 						/ (DETBufferSizeInTimeChroma - UrgentLatency);
2800 			}
2801 		} else {
2802 			*UrgentBurstFactorChroma = 1;
2803 		}
2804 	}
2805 } // CalculateUrgentBurstFactor
2806 
dml32_CalculateDCFCLKDeepSleep(unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)2807 void dml32_CalculateDCFCLKDeepSleep(
2808 		unsigned int NumberOfActiveSurfaces,
2809 		unsigned int BytePerPixelY[],
2810 		unsigned int BytePerPixelC[],
2811 		double VRatio[],
2812 		double VRatioChroma[],
2813 		double SwathWidthY[],
2814 		double SwathWidthC[],
2815 		unsigned int DPPPerSurface[],
2816 		double HRatio[],
2817 		double HRatioChroma[],
2818 		double PixelClock[],
2819 		double PSCL_THROUGHPUT[],
2820 		double PSCL_THROUGHPUT_CHROMA[],
2821 		double Dppclk[],
2822 		double ReadBandwidthLuma[],
2823 		double ReadBandwidthChroma[],
2824 		unsigned int ReturnBusWidth,
2825 
2826 		/* Output */
2827 		double *DCFClkDeepSleep)
2828 {
2829 	unsigned int k;
2830 	double   DisplayPipeLineDeliveryTimeLuma;
2831 	double   DisplayPipeLineDeliveryTimeChroma;
2832 	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2833 	double ReadBandwidth = 0.0;
2834 
2835 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2836 
2837 		if (VRatio[k] <= 1) {
2838 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2839 					/ PixelClock[k];
2840 		} else {
2841 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2842 		}
2843 		if (BytePerPixelC[k] == 0) {
2844 			DisplayPipeLineDeliveryTimeChroma = 0;
2845 		} else {
2846 			if (VRatioChroma[k] <= 1) {
2847 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2848 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2849 			} else {
2850 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2851 						/ Dppclk[k];
2852 			}
2853 		}
2854 
2855 		if (BytePerPixelC[k] > 0) {
2856 			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2857 					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2858 					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2859 					32.0 / DisplayPipeLineDeliveryTimeChroma);
2860 		} else {
2861 			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2862 					64.0 / DisplayPipeLineDeliveryTimeLuma;
2863 		}
2864 		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2865 
2866 #ifdef __DML_VBA_DEBUG__
2867 		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2868 		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2869 #endif
2870 	}
2871 
2872 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2873 		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2874 
2875 	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2876 
2877 #ifdef __DML_VBA_DEBUG__
2878 	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2879 	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2880 	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2881 	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2882 #endif
2883 
2884 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2885 		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2886 #ifdef __DML_VBA_DEBUG__
2887 	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2888 #endif
2889 } // CalculateDCFCLKDeepSleep
2890 
dml32_CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)2891 double dml32_CalculateWriteBackDelay(
2892 		enum source_format_class WritebackPixelFormat,
2893 		double WritebackHRatio,
2894 		double WritebackVRatio,
2895 		unsigned int WritebackVTaps,
2896 		unsigned int         WritebackDestinationWidth,
2897 		unsigned int         WritebackDestinationHeight,
2898 		unsigned int         WritebackSourceHeight,
2899 		unsigned int HTotal)
2900 {
2901 	double CalculateWriteBackDelay;
2902 	double Line_length;
2903 	double Output_lines_last_notclamped;
2904 	double WritebackVInit;
2905 
2906 	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2907 	Line_length = dml_max((double) WritebackDestinationWidth,
2908 			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2909 	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2910 			dml_ceil(((double)WritebackSourceHeight -
2911 					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2912 	if (Output_lines_last_notclamped < 0) {
2913 		CalculateWriteBackDelay = 0;
2914 	} else {
2915 		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2916 				(HTotal - WritebackDestinationWidth) + 80;
2917 	}
2918 	return CalculateWriteBackDelay;
2919 }
2920 
dml32_UseMinimumDCFCLK(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool DRRDisplay[],bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,unsigned int MaxInterDCNTileRepeaters,unsigned int MaxPrefetchMode,double DRAMClockChangeLatencyFinal,double FCLKChangeLatency,double SREnterPlusExitTime,unsigned int ReturnBusWidth,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,unsigned int PixelChunkSizeInKByte,unsigned int MetaChunkSize,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,bool ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,unsigned int VTotal[],unsigned int VActive[],unsigned int DynamicMetadataTransmittedBytes[],unsigned int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFClkDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],unsigned int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])2921 void dml32_UseMinimumDCFCLK(
2922 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2923 		bool DRRDisplay[],
2924 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2925 		unsigned int MaxInterDCNTileRepeaters,
2926 		unsigned int MaxPrefetchMode,
2927 		double DRAMClockChangeLatencyFinal,
2928 		double FCLKChangeLatency,
2929 		double SREnterPlusExitTime,
2930 		unsigned int ReturnBusWidth,
2931 		unsigned int RoundTripPingLatencyCycles,
2932 		unsigned int ReorderingBytes,
2933 		unsigned int PixelChunkSizeInKByte,
2934 		unsigned int MetaChunkSize,
2935 		bool GPUVMEnable,
2936 		unsigned int GPUVMMaxPageTableLevels,
2937 		bool HostVMEnable,
2938 		unsigned int NumberOfActiveSurfaces,
2939 		double HostVMMinPageSize,
2940 		unsigned int HostVMMaxNonCachedPageTableLevels,
2941 		bool DynamicMetadataVMEnabled,
2942 		bool ImmediateFlipRequirement,
2943 		bool ProgressiveToInterlaceUnitInOPP,
2944 		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2945 		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2946 		unsigned int VTotal[],
2947 		unsigned int VActive[],
2948 		unsigned int DynamicMetadataTransmittedBytes[],
2949 		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2950 		bool Interlace[],
2951 		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2952 		double RequiredDISPCLK[][2],
2953 		double UrgLatency[],
2954 		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2955 		double ProjectedDCFClkDeepSleep[][2],
2956 		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2957 		unsigned int TotalNumberOfActiveDPP[][2],
2958 		unsigned int TotalNumberOfDCCActiveDPP[][2],
2959 		unsigned int dpte_group_bytes[],
2960 		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2961 		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2962 		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2963 		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2964 		unsigned int BytePerPixelY[],
2965 		unsigned int BytePerPixelC[],
2966 		unsigned int HTotal[],
2967 		double PixelClock[],
2968 		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2969 		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2970 		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2971 		bool DynamicMetadataEnable[],
2972 		double ReadBandwidthLuma[],
2973 		double ReadBandwidthChroma[],
2974 		double DCFCLKPerState[],
2975 		/* Output */
2976 		double DCFCLKState[][2])
2977 {
2978 	unsigned int i, j, k;
2979 	unsigned int     dummy1;
2980 	double dummy2, dummy3;
2981 	double   NormalEfficiency;
2982 	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2983 
2984 	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2985 	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2986 		for  (j = 0; j <= 1; ++j) {
2987 			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2988 			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2989 			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2990 			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2991 			double MinimumTWait = 0.0;
2992 			double DPTEBandwidth;
2993 			double DCFCLKRequiredForAverageBandwidth;
2994 			unsigned int ExtraLatencyBytes;
2995 			double ExtraLatencyCycles;
2996 			double DCFCLKRequiredForPeakBandwidth;
2997 			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2998 			double MinimumTvmPlus2Tr0;
2999 
3000 			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
3001 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3002 				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3003 						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3004 								/ (15.75 * HTotal[k] / PixelClock[k]);
3005 			}
3006 
3007 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3008 				NoOfDPPState[k] = NoOfDPP[i][j][k];
3009 
3010 			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3011 			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3012 
3013 			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3014 					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3015 					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3016 					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3017 					HostVMMaxNonCachedPageTableLevels);
3018 			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3019 					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3020 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3021 				double DCFCLKCyclesRequiredInPrefetch;
3022 				double PrefetchTime;
3023 
3024 				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3025 						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3026 						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3027 								* BytePerPixelC[k]) / NormalEfficiency
3028 						/ ReturnBusWidth;
3029 				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3030 						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3031 								/ NormalEfficiency / ReturnBusWidth
3032 								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3033 						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3034 								/ ReturnBusWidth
3035 						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3036 						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
3037 				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3038 						* HTotal[k] / PixelClock[k];
3039 				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3040 						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3041 						UrgLatency[i] * GPUVMMaxPageTableLevels *
3042 						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3043 
3044 				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3045 						UseMALLForPStateChange[k],
3046 						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3047 						DRRDisplay[k],
3048 						DRAMClockChangeLatencyFinal,
3049 						FCLKChangeLatency,
3050 						UrgLatency[i],
3051 						SREnterPlusExitTime);
3052 
3053 				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3054 						MinimumTWait - UrgLatency[i] *
3055 						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3056 						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3057 						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3058 						DynamicMetadataVMExtraLatency[k];
3059 
3060 				if (PrefetchTime > 0) {
3061 					double ExpectedVRatioPrefetch;
3062 
3063 					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3064 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3065 							DCFCLKCyclesRequiredInPrefetch);
3066 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3067 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3068 							PrefetchPixelLinesTime[k] *
3069 							dml_max(1.0, ExpectedVRatioPrefetch) *
3070 							dml_max(1.0, ExpectedVRatioPrefetch / 4);
3071 					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3072 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3073 								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3074 								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3075 								NormalEfficiency / ReturnBusWidth;
3076 					}
3077 				} else {
3078 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3079 				}
3080 				if (DynamicMetadataEnable[k] == true) {
3081 					double TSetupPipe;
3082 					double TdmbfPipe;
3083 					double TdmsksPipe;
3084 					double TdmecPipe;
3085 					double AllowedTimeForUrgentExtraLatency;
3086 
3087 					dml32_CalculateVUpdateAndDynamicMetadataParameters(
3088 							MaxInterDCNTileRepeaters,
3089 							RequiredDPPCLKPerSurface[i][j][k],
3090 							RequiredDISPCLK[i][j],
3091 							ProjectedDCFClkDeepSleep[i][j],
3092 							PixelClock[k],
3093 							HTotal[k],
3094 							VTotal[k] - VActive[k],
3095 							DynamicMetadataTransmittedBytes[k],
3096 							DynamicMetadataLinesBeforeActiveRequired[k],
3097 							Interlace[k],
3098 							ProgressiveToInterlaceUnitInOPP,
3099 
3100 							/* output */
3101 							&TSetupPipe,
3102 							&TdmbfPipe,
3103 							&TdmecPipe,
3104 							&TdmsksPipe,
3105 							&dummy1,
3106 							&dummy2,
3107 							&dummy3);
3108 					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3109 							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3110 							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3111 					if (AllowedTimeForUrgentExtraLatency > 0)
3112 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3113 								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3114 								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3115 					else
3116 						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3117 				}
3118 			}
3119 			DCFCLKRequiredForPeakBandwidth = 0;
3120 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3121 				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3122 						DCFCLKRequiredForPeakBandwidthPerSurface[k];
3123 			}
3124 			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3125 					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3126 					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3127 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3128 				double MaximumTvmPlus2Tr0PlusTsw;
3129 
3130 				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3131 						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3132 				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3133 					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3134 				} else {
3135 					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3136 							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3137 								MinimumTvmPlus2Tr0 -
3138 								PrefetchPixelLinesTime[k] / 4),
3139 							(2 * ExtraLatencyCycles +
3140 								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3141 								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3142 				}
3143 			}
3144 			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3145 					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3146 		}
3147 	}
3148 }
3149 
dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3150 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3151 		unsigned int TotalNumberOfActiveDPP,
3152 		unsigned int PixelChunkSizeInKByte,
3153 		unsigned int TotalNumberOfDCCActiveDPP,
3154 		unsigned int MetaChunkSize,
3155 		bool GPUVMEnable,
3156 		bool HostVMEnable,
3157 		unsigned int NumberOfActiveSurfaces,
3158 		unsigned int NumberOfDPP[],
3159 		unsigned int dpte_group_bytes[],
3160 		double HostVMInefficiencyFactor,
3161 		double HostVMMinPageSize,
3162 		unsigned int HostVMMaxNonCachedPageTableLevels)
3163 {
3164 	unsigned int k;
3165 	double   ret;
3166 	unsigned int  HostVMDynamicLevels;
3167 
3168 	if (GPUVMEnable == true && HostVMEnable == true) {
3169 		if (HostVMMinPageSize < 2048)
3170 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3171 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3172 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3173 		else
3174 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3175 	} else {
3176 		HostVMDynamicLevels = 0;
3177 	}
3178 
3179 	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3180 			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3181 
3182 	if (GPUVMEnable == true) {
3183 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3184 			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3185 					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3186 		}
3187 	}
3188 	return ret;
3189 }
3190 
dml32_CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3191 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3192 		unsigned int MaxInterDCNTileRepeaters,
3193 		double Dppclk,
3194 		double Dispclk,
3195 		double DCFClkDeepSleep,
3196 		double PixelClock,
3197 		unsigned int HTotal,
3198 		unsigned int VBlank,
3199 		unsigned int DynamicMetadataTransmittedBytes,
3200 		unsigned int DynamicMetadataLinesBeforeActiveRequired,
3201 		unsigned int InterlaceEnable,
3202 		bool ProgressiveToInterlaceUnitInOPP,
3203 
3204 		/* output */
3205 		double *TSetup,
3206 		double *Tdmbf,
3207 		double *Tdmec,
3208 		double *Tdmsks,
3209 		unsigned int *VUpdateOffsetPix,
3210 		double *VUpdateWidthPix,
3211 		double *VReadyOffsetPix)
3212 {
3213 	double TotalRepeaterDelayTime;
3214 
3215 	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3216 	*VUpdateWidthPix  =
3217 			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3218 	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3219 			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3220 	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3221 	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3222 	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3223 	*Tdmec = HTotal / PixelClock;
3224 
3225 	if (DynamicMetadataLinesBeforeActiveRequired == 0)
3226 		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3227 	else
3228 		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3229 
3230 	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3231 		*Tdmsks = *Tdmsks / 2;
3232 #ifdef __DML_VBA_DEBUG__
3233 	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3234 	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3235 	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3236 
3237 	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3238 			__func__, DynamicMetadataLinesBeforeActiveRequired);
3239 	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3240 	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3241 	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3242 	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3243 #endif
3244 }
3245 
dml32_CalculateTWait(unsigned int PrefetchMode,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,bool DRRDisplay,double DRAMClockChangeLatency,double FCLKChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3246 double dml32_CalculateTWait(
3247 		unsigned int PrefetchMode,
3248 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3249 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3250 		bool DRRDisplay,
3251 		double DRAMClockChangeLatency,
3252 		double FCLKChangeLatency,
3253 		double UrgentLatency,
3254 		double SREnterPlusExitTime)
3255 {
3256 	double TWait = 0.0;
3257 
3258 	if (PrefetchMode == 0 &&
3259 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3260 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3261 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3262 			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3263 		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3264 	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3265 		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3266 	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3267 		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3268 	} else {
3269 		TWait = UrgentLatency;
3270 	}
3271 
3272 #ifdef __DML_VBA_DEBUG__
3273 	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3274 	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3275 #endif
3276 	return TWait;
3277 } // CalculateTWait
3278 
3279 // Function: get_return_bw_mbps
3280 // Megabyte per second
dml32_get_return_bw_mbps(const soc_bounding_box_st * soc,const int VoltageLevel,const bool HostVMEnable,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3281 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3282 		const int VoltageLevel,
3283 		const bool HostVMEnable,
3284 		const double DCFCLK,
3285 		const double FabricClock,
3286 		const double DRAMSpeed)
3287 {
3288 	double ReturnBW = 0.;
3289 	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3290 	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3291 	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3292 	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3293 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3294 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3295 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3296 	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3297 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3298 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3299 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3300 
3301 	if (HostVMEnable != true)
3302 		ReturnBW = PixelDataOnlyReturnBW;
3303 	else
3304 		ReturnBW = PixelMixedWithVMDataReturnBW;
3305 
3306 #ifdef __DML_VBA_DEBUG__
3307 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3308 	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3309 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3310 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3311 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3312 	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3313 	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3314 	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3315 	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3316 	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3317 	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3318 #endif
3319 	return ReturnBW;
3320 }
3321 
3322 // Function: get_return_bw_mbps_vm_only
3323 // Megabyte per second
dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st * soc,const int VoltageLevel,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3324 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3325 		const int VoltageLevel,
3326 		const double DCFCLK,
3327 		const double FabricClock,
3328 		const double DRAMSpeed)
3329 {
3330 	double VMDataOnlyReturnBW = dml_min3(
3331 			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3332 			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3333 					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3334 			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3335 					* (VoltageLevel < 2 ?
3336 							soc->pct_ideal_dram_bw_after_urgent_strobe :
3337 							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3338 #ifdef __DML_VBA_DEBUG__
3339 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3340 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3341 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3342 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3343 	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3344 #endif
3345 	return VMDataOnlyReturnBW;
3346 }
3347 
dml32_CalculateExtraLatency(unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3348 double dml32_CalculateExtraLatency(
3349 		unsigned int RoundTripPingLatencyCycles,
3350 		unsigned int ReorderingBytes,
3351 		double DCFCLK,
3352 		unsigned int TotalNumberOfActiveDPP,
3353 		unsigned int PixelChunkSizeInKByte,
3354 		unsigned int TotalNumberOfDCCActiveDPP,
3355 		unsigned int MetaChunkSize,
3356 		double ReturnBW,
3357 		bool GPUVMEnable,
3358 		bool HostVMEnable,
3359 		unsigned int NumberOfActiveSurfaces,
3360 		unsigned int NumberOfDPP[],
3361 		unsigned int dpte_group_bytes[],
3362 		double HostVMInefficiencyFactor,
3363 		double HostVMMinPageSize,
3364 		unsigned int HostVMMaxNonCachedPageTableLevels)
3365 {
3366 	double ExtraLatencyBytes;
3367 	double ExtraLatency;
3368 
3369 	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3370 			ReorderingBytes,
3371 			TotalNumberOfActiveDPP,
3372 			PixelChunkSizeInKByte,
3373 			TotalNumberOfDCCActiveDPP,
3374 			MetaChunkSize,
3375 			GPUVMEnable,
3376 			HostVMEnable,
3377 			NumberOfActiveSurfaces,
3378 			NumberOfDPP,
3379 			dpte_group_bytes,
3380 			HostVMInefficiencyFactor,
3381 			HostVMMinPageSize,
3382 			HostVMMaxNonCachedPageTableLevels);
3383 
3384 	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3385 
3386 #ifdef __DML_VBA_DEBUG__
3387 	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3388 	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3389 	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3390 	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3391 	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3392 #endif
3393 
3394 	return ExtraLatency;
3395 } // CalculateExtraLatency
3396 
dml32_CalculatePrefetchSchedule(struct vba_vars_st * v,unsigned int k,double HostVMInefficiencyFactor,DmlPipe * myPipe,unsigned int DSCDelay,unsigned int DPP_RECOUT_WIDTH,unsigned int VStartup,unsigned int MaxVStartup,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,unsigned int VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,unsigned int VInitPreFillC,unsigned int MaxNumSwathC,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double TPreReq,bool ExtendPrefetchIfPossible,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3397 bool dml32_CalculatePrefetchSchedule(
3398 		struct vba_vars_st *v,
3399 		unsigned int k,
3400 		double HostVMInefficiencyFactor,
3401 		DmlPipe *myPipe,
3402 		unsigned int DSCDelay,
3403 		unsigned int DPP_RECOUT_WIDTH,
3404 		unsigned int VStartup,
3405 		unsigned int MaxVStartup,
3406 		double UrgentLatency,
3407 		double UrgentExtraLatency,
3408 		double TCalc,
3409 		unsigned int PDEAndMetaPTEBytesFrame,
3410 		unsigned int MetaRowByte,
3411 		unsigned int PixelPTEBytesPerRow,
3412 		double PrefetchSourceLinesY,
3413 		unsigned int SwathWidthY,
3414 		unsigned int VInitPreFillY,
3415 		unsigned int MaxNumSwathY,
3416 		double PrefetchSourceLinesC,
3417 		unsigned int SwathWidthC,
3418 		unsigned int VInitPreFillC,
3419 		unsigned int MaxNumSwathC,
3420 		unsigned int swath_width_luma_ub,
3421 		unsigned int swath_width_chroma_ub,
3422 		unsigned int SwathHeightY,
3423 		unsigned int SwathHeightC,
3424 		double TWait,
3425 		double TPreReq,
3426 		bool ExtendPrefetchIfPossible,
3427 		/* Output */
3428 		double   *DSTXAfterScaler,
3429 		double   *DSTYAfterScaler,
3430 		double *DestinationLinesForPrefetch,
3431 		double *PrefetchBandwidth,
3432 		double *DestinationLinesToRequestVMInVBlank,
3433 		double *DestinationLinesToRequestRowInVBlank,
3434 		double *VRatioPrefetchY,
3435 		double *VRatioPrefetchC,
3436 		double *RequiredPrefetchPixDataBWLuma,
3437 		double *RequiredPrefetchPixDataBWChroma,
3438 		bool   *NotEnoughTimeForDynamicMetadata,
3439 		double *Tno_bw,
3440 		double *prefetch_vmrow_bw,
3441 		double *Tdmdl_vm,
3442 		double *Tdmdl,
3443 		double *TSetup,
3444 		unsigned int   *VUpdateOffsetPix,
3445 		double   *VUpdateWidthPix,
3446 		double   *VReadyOffsetPix)
3447 {
3448 	double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3449 	bool MyError = false;
3450 	unsigned int DPPCycles, DISPCLKCycles;
3451 	double DSTTotalPixelsAfterScaler;
3452 	double LineTime;
3453 	double dst_y_prefetch_equ;
3454 	double prefetch_bw_oto;
3455 	double Tvm_oto;
3456 	double Tr0_oto;
3457 	double Tvm_oto_lines;
3458 	double Tr0_oto_lines;
3459 	double dst_y_prefetch_oto;
3460 	double TimeForFetchingMetaPTE = 0;
3461 	double TimeForFetchingRowInVBlank = 0;
3462 	double LinesToRequestPrefetchPixelData = 0;
3463 	double LinesForPrefetchBandwidth = 0;
3464 	unsigned int HostVMDynamicLevelsTrips;
3465 	double  trip_to_mem;
3466 	double  Tvm_trips;
3467 	double  Tr0_trips;
3468 	double  Tvm_trips_rounded;
3469 	double  Tr0_trips_rounded;
3470 	double  Lsw_oto;
3471 	double  Tpre_rounded;
3472 	double  prefetch_bw_equ;
3473 	double  Tvm_equ;
3474 	double  Tr0_equ;
3475 	double  Tdmbf;
3476 	double  Tdmec;
3477 	double  Tdmsks;
3478 	double  prefetch_sw_bytes;
3479 	double  bytes_pp;
3480 	double  dep_bytes;
3481 	unsigned int max_vratio_pre = v->MaxVRatioPre;
3482 	double  min_Lsw;
3483 	double  Tsw_est1 = 0;
3484 	double  Tsw_est3 = 0;
3485 
3486 	if (v->GPUVMEnable == true && v->HostVMEnable == true)
3487 		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3488 	else
3489 		HostVMDynamicLevelsTrips = 0;
3490 #ifdef __DML_VBA_DEBUG__
3491 	dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3492 	dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3493 	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3494 	dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3495 			__func__, v->HostVMEnable, HostVMInefficiencyFactor);
3496 #endif
3497 	dml32_CalculateVUpdateAndDynamicMetadataParameters(
3498 			v->MaxInterDCNTileRepeaters,
3499 			myPipe->Dppclk,
3500 			myPipe->Dispclk,
3501 			myPipe->DCFClkDeepSleep,
3502 			myPipe->PixelClock,
3503 			myPipe->HTotal,
3504 			myPipe->VBlank,
3505 			v->DynamicMetadataTransmittedBytes[k],
3506 			v->DynamicMetadataLinesBeforeActiveRequired[k],
3507 			myPipe->InterlaceEnable,
3508 			myPipe->ProgressiveToInterlaceUnitInOPP,
3509 			TSetup,
3510 
3511 			/* output */
3512 			&Tdmbf,
3513 			&Tdmec,
3514 			&Tdmsks,
3515 			VUpdateOffsetPix,
3516 			VUpdateWidthPix,
3517 			VReadyOffsetPix);
3518 
3519 	LineTime = myPipe->HTotal / myPipe->PixelClock;
3520 	trip_to_mem = UrgentLatency;
3521 	Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3522 
3523 	if (v->DynamicMetadataVMEnabled == true)
3524 		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
3525 	else
3526 		*Tdmdl = TWait + UrgentExtraLatency;
3527 
3528 #ifdef __DML_VBA_ALLOW_DELTA__
3529 	if (v->DynamicMetadataEnable[k] == false)
3530 		*Tdmdl = 0.0;
3531 #endif
3532 
3533 	if (v->DynamicMetadataEnable[k] == true) {
3534 		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3535 			*NotEnoughTimeForDynamicMetadata = true;
3536 #ifdef __DML_VBA_DEBUG__
3537 			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3538 			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3539 					__func__, Tdmbf);
3540 			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3541 			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3542 					__func__, Tdmsks);
3543 			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3544 					__func__, *Tdmdl);
3545 #endif
3546 		} else {
3547 			*NotEnoughTimeForDynamicMetadata = false;
3548 		}
3549 	} else {
3550 		*NotEnoughTimeForDynamicMetadata = false;
3551 	}
3552 
3553 	*Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3554 			v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3555 
3556 	if (myPipe->ScalerEnabled)
3557 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3558 	else
3559 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3560 
3561 	DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3562 
3563 	DISPCLKCycles = v->DISPCLKDelaySubtotal;
3564 
3565 	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3566 		return true;
3567 
3568 	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3569 			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3570 
3571 	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3572 			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3573 			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3574 					myPipe->HActive / 2 : 0)
3575 			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3576 
3577 #ifdef __DML_VBA_DEBUG__
3578 	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3579 	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3580 	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3581 	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3582 	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3583 	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3584 	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3585 	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3586 	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3587 #endif
3588 
3589 	if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3590 		*DSTYAfterScaler = 1;
3591 	else
3592 		*DSTYAfterScaler = 0;
3593 
3594 	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3595 	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3596 	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3597 #ifdef __DML_VBA_DEBUG__
3598 	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3599 	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3600 #endif
3601 
3602 	MyError = false;
3603 
3604 	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3605 
3606 	if (v->GPUVMEnable == true) {
3607 		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3608 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3609 		if (v->GPUVMMaxPageTableLevels >= 3) {
3610 			*Tno_bw = UrgentExtraLatency + trip_to_mem *
3611 					(double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3612 		} else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3613 			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3614 					4.0 * LineTime; // VBA_ERROR
3615 			*Tno_bw = UrgentExtraLatency;
3616 		} else {
3617 			*Tno_bw = 0;
3618 		}
3619 	} else if (myPipe->DCCEnable == true) {
3620 		Tvm_trips_rounded = LineTime / 4.0;
3621 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3622 		*Tno_bw = 0;
3623 	} else {
3624 		Tvm_trips_rounded = LineTime / 4.0;
3625 		Tr0_trips_rounded = LineTime / 2.0;
3626 		*Tno_bw = 0;
3627 	}
3628 	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3629 	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3630 
3631 	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3632 			|| myPipe->SourcePixelFormat == dm_420_12) {
3633 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3634 	} else {
3635 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3636 	}
3637 
3638 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3639 			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3640 	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3641 			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3642 
3643 	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3644 	min_Lsw = dml_max(min_Lsw, 1.0);
3645 	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3646 
3647 	if (v->GPUVMEnable == true) {
3648 		Tvm_oto = dml_max3(
3649 				Tvm_trips,
3650 				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3651 				LineTime / 4.0);
3652 	} else
3653 		Tvm_oto = LineTime / 4.0;
3654 
3655 	if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3656 		Tr0_oto = dml_max4(
3657 				Tr0_trips,
3658 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3659 				(LineTime - Tvm_oto)/2.0,
3660 				LineTime / 4.0);
3661 #ifdef __DML_VBA_DEBUG__
3662 		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3663 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3664 		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3665 		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3666 		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3667 #endif
3668 	} else
3669 		Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3670 
3671 	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3672 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3673 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3674 
3675 	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3676 			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3677 
3678 	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3679 #ifdef __DML_VBA_DEBUG__
3680 	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3681 	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3682 	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3683 	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3684 	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3685 	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3686 	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3687 	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3688 	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3689 	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3690 	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3691 	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3692 	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3693 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3694 	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3695 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3696 	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3697 	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3698 	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3699 	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3700 	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3701 	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3702 	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3703 	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3704 	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3705 	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3706 	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3707 #endif
3708 
3709 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3710 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
3711 #ifdef __DML_VBA_DEBUG__
3712 	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3713 	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3714 	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3715 	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3716 			__func__, VStartup * LineTime);
3717 	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3718 	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3719 	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3720 	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3721 	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3722 	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3723 	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3724 			__func__, *DSTYAfterScaler);
3725 #endif
3726 	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3727 			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3728 
3729 	if (prefetch_sw_bytes < dep_bytes)
3730 		prefetch_sw_bytes = 2 * dep_bytes;
3731 
3732 	*PrefetchBandwidth = 0;
3733 	*DestinationLinesToRequestVMInVBlank = 0;
3734 	*DestinationLinesToRequestRowInVBlank = 0;
3735 	*VRatioPrefetchY = 0;
3736 	*VRatioPrefetchC = 0;
3737 	*RequiredPrefetchPixDataBWLuma = 0;
3738 	if (dst_y_prefetch_equ > 1 &&
3739 			(Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3740 		double PrefetchBandwidth1;
3741 		double PrefetchBandwidth2;
3742 		double PrefetchBandwidth3;
3743 		double PrefetchBandwidth4;
3744 
3745 		if (Tpre_rounded - *Tno_bw > 0) {
3746 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3747 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3748 					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3749 			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3750 		} else
3751 			PrefetchBandwidth1 = 0;
3752 
3753 		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3754 				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3755 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3756 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3757 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3758 		}
3759 
3760 		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3761 			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3762 			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3763 		else
3764 			PrefetchBandwidth2 = 0;
3765 
3766 		if (Tpre_rounded - Tvm_trips_rounded > 0) {
3767 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3768 					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3769 			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3770 		} else
3771 			PrefetchBandwidth3 = 0;
3772 
3773 
3774 		if (VStartup == MaxVStartup &&
3775 				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3776 				LineTime - Tvm_trips_rounded > 0) {
3777 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3778 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3779 		}
3780 
3781 		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3782 			PrefetchBandwidth4 = prefetch_sw_bytes /
3783 					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3784 		} else {
3785 			PrefetchBandwidth4 = 0;
3786 		}
3787 
3788 #ifdef __DML_VBA_DEBUG__
3789 		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3790 		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3791 		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3792 		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3793 		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3794 		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3795 		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3796 		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3797 		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3798 #endif
3799 		{
3800 			bool Case1OK;
3801 			bool Case2OK;
3802 			bool Case3OK;
3803 
3804 			if (PrefetchBandwidth1 > 0) {
3805 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3806 						>= Tvm_trips_rounded
3807 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3808 								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
3809 					Case1OK = true;
3810 				} else {
3811 					Case1OK = false;
3812 				}
3813 			} else {
3814 				Case1OK = false;
3815 			}
3816 
3817 			if (PrefetchBandwidth2 > 0) {
3818 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3819 						>= Tvm_trips_rounded
3820 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3821 						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
3822 					Case2OK = true;
3823 				} else {
3824 					Case2OK = false;
3825 				}
3826 			} else {
3827 				Case2OK = false;
3828 			}
3829 
3830 			if (PrefetchBandwidth3 > 0) {
3831 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3832 						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3833 								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3834 								Tr0_trips_rounded) {
3835 					Case3OK = true;
3836 				} else {
3837 					Case3OK = false;
3838 				}
3839 			} else {
3840 				Case3OK = false;
3841 			}
3842 
3843 			if (Case1OK)
3844 				prefetch_bw_equ = PrefetchBandwidth1;
3845 			else if (Case2OK)
3846 				prefetch_bw_equ = PrefetchBandwidth2;
3847 			else if (Case3OK)
3848 				prefetch_bw_equ = PrefetchBandwidth3;
3849 			else
3850 				prefetch_bw_equ = PrefetchBandwidth4;
3851 
3852 #ifdef __DML_VBA_DEBUG__
3853 			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3854 			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3855 			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3856 			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3857 #endif
3858 
3859 			if (prefetch_bw_equ > 0) {
3860 				if (v->GPUVMEnable == true) {
3861 					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3862 							HostVMInefficiencyFactor / prefetch_bw_equ,
3863 							Tvm_trips, LineTime / 4);
3864 				} else {
3865 					Tvm_equ = LineTime / 4;
3866 				}
3867 
3868 				if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3869 					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3870 							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3871 							(LineTime - Tvm_equ) / 2, LineTime / 4);
3872 				} else {
3873 					Tr0_equ = (LineTime - Tvm_equ) / 2;
3874 				}
3875 			} else {
3876 				Tvm_equ = 0;
3877 				Tr0_equ = 0;
3878 #ifdef __DML_VBA_DEBUG__
3879 				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3880 #endif
3881 			}
3882 		}
3883 
3884 		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3885 			if (dst_y_prefetch_oto * LineTime < TPreReq) {
3886 				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3887 			} else {
3888 				*DestinationLinesForPrefetch = dst_y_prefetch_oto;
3889 			}
3890 			TimeForFetchingMetaPTE = Tvm_oto;
3891 			TimeForFetchingRowInVBlank = Tr0_oto;
3892 			*PrefetchBandwidth = prefetch_bw_oto;
3893 			/* Clamp to oto for bandwidth calculation */
3894 			LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3895 		} else {
3896 			/* For mode programming we want to extend the prefetch as much as possible
3897 			 * (up to oto, or as long as we can for equ) if we're not already applying
3898 			 * the 60us prefetch requirement. This is to avoid intermittent underflow
3899 			 * issues during prefetch.
3900 			 *
3901 			 * The prefetch extension is applied under the following scenarios:
3902 			 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
3903 			 * 2. We're using subvp or drr methods of p-state switch, in which case we
3904 			 *    we don't care if prefetch takes up more of the blanking time
3905 			 *
3906 			 * Mode programming typically chooses the smallest prefetch time possible
3907 			 * (i.e. highest bandwidth during prefetch) presumably to create margin between
3908 			 * p-states / c-states that happen in vblank and prefetch. Therefore we only
3909 			 * apply this prefetch extension when p-state in vblank is not required (UCLK
3910 			 * p-states take up the most vblank time).
3911 			 */
3912 			if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
3913 				MyError = true;
3914 			} else {
3915 				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3916 				TimeForFetchingMetaPTE = Tvm_equ;
3917 				TimeForFetchingRowInVBlank = Tr0_equ;
3918 				*PrefetchBandwidth = prefetch_bw_equ;
3919 				/* Clamp to equ for bandwidth calculation */
3920 				LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3921 			}
3922 		}
3923 
3924 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3925 
3926 		*DestinationLinesToRequestRowInVBlank =
3927 				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3928 
3929 		LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3930 				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3931 
3932 #ifdef __DML_VBA_DEBUG__
3933 		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3934 		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3935 				__func__, *DestinationLinesToRequestVMInVBlank);
3936 		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3937 		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3938 		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3939 				__func__, *DestinationLinesToRequestRowInVBlank);
3940 		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3941 		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3942 #endif
3943 
3944 		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3945 			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3946 			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3947 #ifdef __DML_VBA_DEBUG__
3948 			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3949 			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3950 			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3951 #endif
3952 			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3953 				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3954 					*VRatioPrefetchY =
3955 							dml_max((double) PrefetchSourceLinesY /
3956 									LinesToRequestPrefetchPixelData,
3957 									(double) MaxNumSwathY * SwathHeightY /
3958 									(LinesToRequestPrefetchPixelData -
3959 									(VInitPreFillY - 3.0) / 2.0));
3960 					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3961 				} else {
3962 					MyError = true;
3963 					*VRatioPrefetchY = 0;
3964 				}
3965 #ifdef __DML_VBA_DEBUG__
3966 				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3967 				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3968 				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3969 #endif
3970 			}
3971 
3972 			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3973 			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3974 
3975 #ifdef __DML_VBA_DEBUG__
3976 			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3977 			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3978 			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3979 #endif
3980 			if ((SwathHeightC > 4)) {
3981 				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3982 					*VRatioPrefetchC =
3983 						dml_max(*VRatioPrefetchC,
3984 							(double) MaxNumSwathC * SwathHeightC /
3985 							(LinesToRequestPrefetchPixelData -
3986 							(VInitPreFillC - 3.0) / 2.0));
3987 					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3988 				} else {
3989 					MyError = true;
3990 					*VRatioPrefetchC = 0;
3991 				}
3992 #ifdef __DML_VBA_DEBUG__
3993 				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3994 				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3995 				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3996 #endif
3997 			}
3998 
3999 			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
4000 					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
4001 					/ LineTime;
4002 
4003 #ifdef __DML_VBA_DEBUG__
4004 			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
4005 			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
4006 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4007 			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
4008 					__func__, *RequiredPrefetchPixDataBWLuma);
4009 #endif
4010 			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
4011 					LinesToRequestPrefetchPixelData
4012 					* myPipe->BytePerPixelC
4013 					* swath_width_chroma_ub / LineTime;
4014 		} else {
4015 			MyError = true;
4016 #ifdef __DML_VBA_DEBUG__
4017 			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
4018 					__func__, LinesToRequestPrefetchPixelData);
4019 #endif
4020 			*VRatioPrefetchY = 0;
4021 			*VRatioPrefetchC = 0;
4022 			*RequiredPrefetchPixDataBWLuma = 0;
4023 			*RequiredPrefetchPixDataBWChroma = 0;
4024 		}
4025 #ifdef __DML_VBA_DEBUG__
4026 		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4027 			(double)LinesToRequestPrefetchPixelData * LineTime +
4028 			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4029 		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4030 		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4031 			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4032 		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4033 		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4034 			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4035 			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4036 		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4037 				PixelPTEBytesPerRow);
4038 #endif
4039 	} else {
4040 		MyError = true;
4041 #ifdef __DML_VBA_DEBUG__
4042 		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4043 				__func__, dst_y_prefetch_equ);
4044 #endif
4045 	}
4046 
4047 	{
4048 		double prefetch_vm_bw;
4049 		double prefetch_row_bw;
4050 
4051 		if (PDEAndMetaPTEBytesFrame == 0) {
4052 			prefetch_vm_bw = 0;
4053 		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
4054 #ifdef __DML_VBA_DEBUG__
4055 			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4056 			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4057 			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4058 					__func__, *DestinationLinesToRequestVMInVBlank);
4059 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4060 #endif
4061 			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4062 					(*DestinationLinesToRequestVMInVBlank * LineTime);
4063 #ifdef __DML_VBA_DEBUG__
4064 			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4065 #endif
4066 		} else {
4067 			prefetch_vm_bw = 0;
4068 			MyError = true;
4069 #ifdef __DML_VBA_DEBUG__
4070 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4071 					__func__, *DestinationLinesToRequestVMInVBlank);
4072 #endif
4073 		}
4074 
4075 		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4076 			prefetch_row_bw = 0;
4077 		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
4078 			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4079 					(*DestinationLinesToRequestRowInVBlank * LineTime);
4080 
4081 #ifdef __DML_VBA_DEBUG__
4082 			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4083 			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4084 			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4085 					__func__, *DestinationLinesToRequestRowInVBlank);
4086 			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4087 #endif
4088 		} else {
4089 			prefetch_row_bw = 0;
4090 			MyError = true;
4091 #ifdef __DML_VBA_DEBUG__
4092 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4093 					__func__, *DestinationLinesToRequestRowInVBlank);
4094 #endif
4095 		}
4096 
4097 		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4098 	}
4099 
4100 	if (MyError) {
4101 		*PrefetchBandwidth = 0;
4102 		TimeForFetchingMetaPTE = 0;
4103 		TimeForFetchingRowInVBlank = 0;
4104 		*DestinationLinesToRequestVMInVBlank = 0;
4105 		*DestinationLinesToRequestRowInVBlank = 0;
4106 		*DestinationLinesForPrefetch = 0;
4107 		LinesToRequestPrefetchPixelData = 0;
4108 		*VRatioPrefetchY = 0;
4109 		*VRatioPrefetchC = 0;
4110 		*RequiredPrefetchPixDataBWLuma = 0;
4111 		*RequiredPrefetchPixDataBWChroma = 0;
4112 	}
4113 
4114 	return MyError;
4115 } // CalculatePrefetchSchedule
4116 
dml32_CalculateFlipSchedule(double HostVMInefficiencyFactor,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,bool use_one_row_for_frame_flip,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)4117 void dml32_CalculateFlipSchedule(
4118 		double HostVMInefficiencyFactor,
4119 		double UrgentExtraLatency,
4120 		double UrgentLatency,
4121 		unsigned int GPUVMMaxPageTableLevels,
4122 		bool HostVMEnable,
4123 		unsigned int HostVMMaxNonCachedPageTableLevels,
4124 		bool GPUVMEnable,
4125 		double HostVMMinPageSize,
4126 		double PDEAndMetaPTEBytesPerFrame,
4127 		double MetaRowBytes,
4128 		double DPTEBytesPerRow,
4129 		double BandwidthAvailableForImmediateFlip,
4130 		unsigned int TotImmediateFlipBytes,
4131 		enum source_format_class SourcePixelFormat,
4132 		double LineTime,
4133 		double VRatio,
4134 		double VRatioChroma,
4135 		double Tno_bw,
4136 		bool DCCEnable,
4137 		unsigned int dpte_row_height,
4138 		unsigned int meta_row_height,
4139 		unsigned int dpte_row_height_chroma,
4140 		unsigned int meta_row_height_chroma,
4141 		bool    use_one_row_for_frame_flip,
4142 
4143 		/* Output */
4144 		double *DestinationLinesToRequestVMInImmediateFlip,
4145 		double *DestinationLinesToRequestRowInImmediateFlip,
4146 		double *final_flip_bw,
4147 		bool *ImmediateFlipSupportedForPipe)
4148 {
4149 	double min_row_time = 0.0;
4150 	unsigned int HostVMDynamicLevelsTrips;
4151 	double TimeForFetchingMetaPTEImmediateFlip;
4152 	double TimeForFetchingRowInVBlankImmediateFlip;
4153 	double ImmediateFlipBW = 1.0;
4154 
4155 	if (GPUVMEnable == true && HostVMEnable == true)
4156 		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4157 	else
4158 		HostVMDynamicLevelsTrips = 0;
4159 
4160 #ifdef __DML_VBA_DEBUG__
4161 	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4162 	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4163 #endif
4164 
4165 	if (TotImmediateFlipBytes > 0) {
4166 		if (use_one_row_for_frame_flip) {
4167 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4168 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4169 		} else {
4170 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4171 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4172 		}
4173 		if (GPUVMEnable == true) {
4174 			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4175 					HostVMInefficiencyFactor / ImmediateFlipBW,
4176 					UrgentExtraLatency + UrgentLatency *
4177 					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4178 					LineTime / 4.0);
4179 		} else {
4180 			TimeForFetchingMetaPTEImmediateFlip = 0;
4181 		}
4182 		if ((GPUVMEnable == true || DCCEnable == true)) {
4183 			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4184 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4185 					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4186 		} else {
4187 			TimeForFetchingRowInVBlankImmediateFlip = 0;
4188 		}
4189 
4190 		*DestinationLinesToRequestVMInImmediateFlip =
4191 				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4192 		*DestinationLinesToRequestRowInImmediateFlip =
4193 				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4194 
4195 		if (GPUVMEnable == true) {
4196 			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4197 					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4198 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4199 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4200 		} else if ((GPUVMEnable == true || DCCEnable == true)) {
4201 			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4202 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4203 		} else {
4204 			*final_flip_bw = 0;
4205 		}
4206 	} else {
4207 		TimeForFetchingMetaPTEImmediateFlip = 0;
4208 		TimeForFetchingRowInVBlankImmediateFlip = 0;
4209 		*DestinationLinesToRequestVMInImmediateFlip = 0;
4210 		*DestinationLinesToRequestRowInImmediateFlip = 0;
4211 		*final_flip_bw = 0;
4212 	}
4213 
4214 	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4215 		if (GPUVMEnable == true && DCCEnable != true) {
4216 			min_row_time = dml_min(dpte_row_height *
4217 					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4218 		} else if (GPUVMEnable != true && DCCEnable == true) {
4219 			min_row_time = dml_min(meta_row_height *
4220 					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4221 		} else {
4222 			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4223 					LineTime / VRatio, dpte_row_height_chroma * LineTime /
4224 					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4225 		}
4226 	} else {
4227 		if (GPUVMEnable == true && DCCEnable != true) {
4228 			min_row_time = dpte_row_height * LineTime / VRatio;
4229 		} else if (GPUVMEnable != true && DCCEnable == true) {
4230 			min_row_time = meta_row_height * LineTime / VRatio;
4231 		} else {
4232 			min_row_time =
4233 				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4234 		}
4235 	}
4236 
4237 	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4238 			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4239 					> min_row_time) {
4240 		*ImmediateFlipSupportedForPipe = false;
4241 	} else {
4242 		*ImmediateFlipSupportedForPipe = true;
4243 	}
4244 
4245 #ifdef __DML_VBA_DEBUG__
4246 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4247 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4248 	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4249 			__func__, *DestinationLinesToRequestVMInImmediateFlip);
4250 	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4251 			__func__, *DestinationLinesToRequestRowInImmediateFlip);
4252 	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4253 	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4254 			__func__, TimeForFetchingRowInVBlankImmediateFlip);
4255 	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4256 	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4257 #endif
4258 } // CalculateFlipSchedule
4259 
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct vba_vars_st * v,unsigned int PrefetchMode,double DCFCLK,double ReturnBW,SOCParametersList mmSOCParameters,double SOCCLK,double DCFClkDeepSleep,unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool UnboundedRequestEnabled,unsigned int CompressedBufferSizeInkByte,enum clock_change_support * DRAMClockChangeSupport,double MaxActiveDRAMClockChangeLatencySupported[],unsigned int SubViewportLinesNeededInMALL[],enum dm_fclock_change_support * FCLKChangeSupport,double * MinActiveFCLKChangeLatencySupported,bool * USRRetrainingSupport,double ActiveDRAMClockChangeLatencyMargin[])4260 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4261 		struct vba_vars_st *v,
4262 		unsigned int PrefetchMode,
4263 		double DCFCLK,
4264 		double ReturnBW,
4265 		SOCParametersList mmSOCParameters,
4266 		double SOCCLK,
4267 		double DCFClkDeepSleep,
4268 		unsigned int DETBufferSizeY[],
4269 		unsigned int DETBufferSizeC[],
4270 		unsigned int SwathHeightY[],
4271 		unsigned int SwathHeightC[],
4272 		double SwathWidthY[],
4273 		double SwathWidthC[],
4274 		unsigned int DPPPerSurface[],
4275 		double BytePerPixelDETY[],
4276 		double BytePerPixelDETC[],
4277 		double DSTXAfterScaler[],
4278 		double DSTYAfterScaler[],
4279 		bool UnboundedRequestEnabled,
4280 		unsigned int CompressedBufferSizeInkByte,
4281 
4282 		/* Output */
4283 		enum clock_change_support *DRAMClockChangeSupport,
4284 		double MaxActiveDRAMClockChangeLatencySupported[],
4285 		unsigned int SubViewportLinesNeededInMALL[],
4286 		enum dm_fclock_change_support *FCLKChangeSupport,
4287 		double *MinActiveFCLKChangeLatencySupported,
4288 		bool *USRRetrainingSupport,
4289 		double ActiveDRAMClockChangeLatencyMargin[])
4290 {
4291 	unsigned int i, j, k;
4292 	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4293 	unsigned int DRAMClockChangeSupportNumber = 0;
4294 	unsigned int LastSurfaceWithoutMargin;
4295 	unsigned int DRAMClockChangeMethod = 0;
4296 	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4297 	double MinActiveFCLKChangeMargin = 0.;
4298 	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4299 	double ActiveClockChangeLatencyHidingY;
4300 	double ActiveClockChangeLatencyHidingC;
4301 	double ActiveClockChangeLatencyHiding;
4302 	double EffectiveDETBufferSizeY;
4303 	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4304 	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4305 	double TotalPixelBW = 0.0;
4306 	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4307 	double     EffectiveLBLatencyHidingY;
4308 	double     EffectiveLBLatencyHidingC;
4309 	double     LinesInDETY[DC__NUM_DPP__MAX];
4310 	double     LinesInDETC[DC__NUM_DPP__MAX];
4311 	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4312 	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4313 	double     FullDETBufferingTimeY;
4314 	double     FullDETBufferingTimeC;
4315 	double     WritebackDRAMClockChangeLatencyMargin;
4316 	double     WritebackFCLKChangeLatencyMargin;
4317 	double     WritebackLatencyHiding;
4318 	bool    SameTimingForFCLKChange;
4319 
4320 	unsigned int    TotalActiveWriteback = 0;
4321 	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4322 	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4323 
4324 	v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4325 	v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4326 			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4327 	v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4328 	v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4329 	v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4330 			+ 10 / DCFClkDeepSleep;
4331 	v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4332 			+ 10 / DCFClkDeepSleep;
4333 	v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4334 			+ 10 / DCFClkDeepSleep;
4335 	v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4336 			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4337 
4338 #ifdef __DML_VBA_DEBUG__
4339 	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4340 	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4341 	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4342 	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4343 	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4344 	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4345 	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4346 	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4347 	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4348 	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4349 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4350 			__func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4351 #endif
4352 
4353 
4354 	TotalActiveWriteback = 0;
4355 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4356 		if (v->WritebackEnable[k] == true)
4357 			TotalActiveWriteback = TotalActiveWriteback + 1;
4358 	}
4359 
4360 	if (TotalActiveWriteback <= 1) {
4361 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4362 	} else {
4363 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4364 				+ v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4365 	}
4366 	if (v->USRRetrainingRequiredFinal)
4367 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4368 				+ mmSOCParameters.USRRetrainingLatency;
4369 
4370 	if (TotalActiveWriteback <= 1) {
4371 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4372 				+ mmSOCParameters.WritebackLatency;
4373 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4374 				+ mmSOCParameters.WritebackLatency;
4375 	} else {
4376 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4377 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4378 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4379 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4380 	}
4381 
4382 	if (v->USRRetrainingRequiredFinal)
4383 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4384 				+ mmSOCParameters.USRRetrainingLatency;
4385 
4386 	if (v->USRRetrainingRequiredFinal)
4387 		v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4388 				+ mmSOCParameters.USRRetrainingLatency;
4389 
4390 #ifdef __DML_VBA_DEBUG__
4391 	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4392 			__func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4393 	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4394 	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4395 	dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4396 	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4397 #endif
4398 
4399 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4400 		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4401 				SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4402 	}
4403 
4404 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4405 
4406 		LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4407 		LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4408 
4409 
4410 #ifdef __DML_VBA_DEBUG__
4411 		dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4412 		dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
4413 		dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
4414 		dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
4415 		dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
4416 #endif
4417 
4418 		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4419 		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4420 		EffectiveDETBufferSizeY = DETBufferSizeY[k];
4421 
4422 		if (UnboundedRequestEnabled) {
4423 			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4424 					+ CompressedBufferSizeInkByte * 1024
4425 							* (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4426 							/ (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4427 		}
4428 
4429 		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4430 		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4431 		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4432 
4433 		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4434 				- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4435 
4436 		if (v->NumberOfActiveSurfaces > 1) {
4437 			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4438 					- (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4439 							/ v->PixelClock[k] / v->VRatio[k];
4440 		}
4441 
4442 		if (BytePerPixelDETC[k] > 0) {
4443 			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4444 			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4445 			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4446 					/ v->VRatioChroma[k];
4447 			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4448 					- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4449 							/ v->PixelClock[k];
4450 			if (v->NumberOfActiveSurfaces > 1) {
4451 				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4452 						- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4453 								/ v->PixelClock[k] / v->VRatioChroma[k];
4454 			}
4455 			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4456 					ActiveClockChangeLatencyHidingC);
4457 		} else {
4458 			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4459 		}
4460 
4461 		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4462 				- v->Watermark.DRAMClockChangeWatermark;
4463 		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4464 				- v->Watermark.FCLKChangeWatermark;
4465 		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4466 
4467 		if (v->WritebackEnable[k]) {
4468 			WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4469 					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4470 							/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4471 			if (v->WritebackPixelFormat[k] == dm_444_64)
4472 				WritebackLatencyHiding = WritebackLatencyHiding / 2;
4473 
4474 			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4475 					- v->Watermark.WritebackDRAMClockChangeWatermark;
4476 
4477 			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4478 					- v->Watermark.WritebackFCLKChangeWatermark;
4479 
4480 			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4481 					WritebackFCLKChangeLatencyMargin);
4482 			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4483 					WritebackDRAMClockChangeLatencyMargin);
4484 		}
4485 		MaxActiveDRAMClockChangeLatencySupported[k] =
4486 				(v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4487 						0 :
4488 						(ActiveDRAMClockChangeLatencyMargin[k]
4489 								+ mmSOCParameters.DRAMClockChangeLatency);
4490 	}
4491 
4492 	for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4493 		for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4494 			if (i == j ||
4495 					(v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4496 					(v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4497 					(v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4498 					(v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4499 					v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4500 					v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4501 					(v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4502 				SynchronizedSurfaces[i][j] = true;
4503 			} else {
4504 				SynchronizedSurfaces[i][j] = false;
4505 			}
4506 		}
4507 	}
4508 
4509 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4510 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4511 				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4512 				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4513 			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4514 			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4515 			SurfaceWithMinActiveFCLKChangeMargin = k;
4516 		}
4517 	}
4518 
4519 	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4520 
4521 	SameTimingForFCLKChange = true;
4522 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4523 		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4524 			if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4525 					(SameTimingForFCLKChange ||
4526 					ActiveFCLKChangeLatencyMargin[k] <
4527 					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4528 				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4529 			}
4530 			SameTimingForFCLKChange = false;
4531 		}
4532 	}
4533 
4534 	if (MinActiveFCLKChangeMargin > 0) {
4535 		*FCLKChangeSupport = dm_fclock_change_vactive;
4536 	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4537 			(PrefetchMode <= 1)) {
4538 		*FCLKChangeSupport = dm_fclock_change_vblank;
4539 	} else {
4540 		*FCLKChangeSupport = dm_fclock_change_unsupported;
4541 	}
4542 
4543 	*USRRetrainingSupport = true;
4544 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4545 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4546 				(USRRetrainingLatencyMargin[k] < 0)) {
4547 			*USRRetrainingSupport = false;
4548 		}
4549 	}
4550 
4551 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4552 		if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4553 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4554 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4555 				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4556 			if (PrefetchMode > 0) {
4557 				DRAMClockChangeSupportNumber = 2;
4558 			} else if (DRAMClockChangeSupportNumber == 0) {
4559 				DRAMClockChangeSupportNumber = 1;
4560 				LastSurfaceWithoutMargin = k;
4561 			} else if (DRAMClockChangeSupportNumber == 1 &&
4562 					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4563 				DRAMClockChangeSupportNumber = 2;
4564 			}
4565 		}
4566 	}
4567 
4568 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4569 		if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4570 			DRAMClockChangeMethod = 1;
4571 		else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4572 			DRAMClockChangeMethod = 2;
4573 	}
4574 
4575 	if (DRAMClockChangeMethod == 0) {
4576 		if (DRAMClockChangeSupportNumber == 0)
4577 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4578 		else if (DRAMClockChangeSupportNumber == 1)
4579 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4580 		else
4581 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4582 	} else if (DRAMClockChangeMethod == 1) {
4583 		if (DRAMClockChangeSupportNumber == 0)
4584 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4585 		else if (DRAMClockChangeSupportNumber == 1)
4586 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4587 		else
4588 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4589 	} else {
4590 		if (DRAMClockChangeSupportNumber == 0)
4591 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4592 		else if (DRAMClockChangeSupportNumber == 1)
4593 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4594 		else
4595 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4596 	}
4597 
4598 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4599 		unsigned int dst_y_pstate;
4600 		unsigned int src_y_pstate_l;
4601 		unsigned int src_y_pstate_c;
4602 		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4603 
4604 		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4605 		src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4606 		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4607 		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4608 
4609 #ifdef __DML_VBA_DEBUG__
4610 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4611 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4612 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4613 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4614 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4615 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4616 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4617 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4618 dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
4619 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4620 #endif
4621 		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4622 
4623 		if (BytePerPixelDETC[k] > 0) {
4624 			src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4625 			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4626 			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4627 			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4628 
4629 #ifdef __DML_VBA_DEBUG__
4630 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4631 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4632 dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4633 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4634 #endif
4635 		}
4636 	}
4637 #ifdef __DML_VBA_DEBUG__
4638 	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4639 	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4640 	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4641 			__func__, *MinActiveFCLKChangeLatencySupported);
4642 	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4643 #endif
4644 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4645 
dml32_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize,double DISPCLKDPPCLKVCOSpeed)4646 double dml32_CalculateWriteBackDISPCLK(
4647 		enum source_format_class WritebackPixelFormat,
4648 		double PixelClock,
4649 		double WritebackHRatio,
4650 		double WritebackVRatio,
4651 		unsigned int WritebackHTaps,
4652 		unsigned int WritebackVTaps,
4653 		unsigned int   WritebackSourceWidth,
4654 		unsigned int   WritebackDestinationWidth,
4655 		unsigned int HTotal,
4656 		unsigned int WritebackLineBufferSize,
4657 		double DISPCLKDPPCLKVCOSpeed)
4658 {
4659 	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4660 
4661 	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4662 	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4663 	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4664 			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4665 	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4666 }
4667 
dml32_CalculateMinAndMaxPrefetchMode(enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,unsigned int * MinPrefetchMode,unsigned int * MaxPrefetchMode)4668 void dml32_CalculateMinAndMaxPrefetchMode(
4669 		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4670 		unsigned int             *MinPrefetchMode,
4671 		unsigned int             *MaxPrefetchMode)
4672 {
4673 	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4674 		*MinPrefetchMode = 3;
4675 		*MaxPrefetchMode = 3;
4676 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4677 		*MinPrefetchMode = 2;
4678 		*MaxPrefetchMode = 2;
4679 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4680 		*MinPrefetchMode = 1;
4681 		*MaxPrefetchMode = 1;
4682 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4683 		*MinPrefetchMode = 0;
4684 		*MaxPrefetchMode = 0;
4685 	} else if (AllowForPStateChangeOrStutterInVBlankFinal ==
4686 			dm_prefetch_support_uclk_fclk_and_stutter_if_possible) {
4687 		*MinPrefetchMode = 0;
4688 		*MaxPrefetchMode = 3;
4689 	} else {
4690 		*MinPrefetchMode = 0;
4691 		*MaxPrefetchMode = 3;
4692 	}
4693 } // CalculateMinAndMaxPrefetchMode
4694 
dml32_CalculatePixelDeliveryTimes(unsigned int NumberOfActiveSurfaces,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])4695 void dml32_CalculatePixelDeliveryTimes(
4696 		unsigned int             NumberOfActiveSurfaces,
4697 		double              VRatio[],
4698 		double              VRatioChroma[],
4699 		double              VRatioPrefetchY[],
4700 		double              VRatioPrefetchC[],
4701 		unsigned int             swath_width_luma_ub[],
4702 		unsigned int             swath_width_chroma_ub[],
4703 		unsigned int             DPPPerSurface[],
4704 		double              HRatio[],
4705 		double              HRatioChroma[],
4706 		double              PixelClock[],
4707 		double              PSCL_THROUGHPUT[],
4708 		double              PSCL_THROUGHPUT_CHROMA[],
4709 		double              Dppclk[],
4710 		unsigned int             BytePerPixelC[],
4711 		enum dm_rotation_angle   SourceRotation[],
4712 		unsigned int             NumberOfCursors[],
4713 		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4714 		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4715 		unsigned int             BlockWidth256BytesY[],
4716 		unsigned int             BlockHeight256BytesY[],
4717 		unsigned int             BlockWidth256BytesC[],
4718 		unsigned int             BlockHeight256BytesC[],
4719 
4720 		/* Output */
4721 		double              DisplayPipeLineDeliveryTimeLuma[],
4722 		double              DisplayPipeLineDeliveryTimeChroma[],
4723 		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4724 		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4725 		double              DisplayPipeRequestDeliveryTimeLuma[],
4726 		double              DisplayPipeRequestDeliveryTimeChroma[],
4727 		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4728 		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4729 		double              CursorRequestDeliveryTime[],
4730 		double              CursorRequestDeliveryTimePrefetch[])
4731 {
4732 	double   req_per_swath_ub;
4733 	unsigned int k;
4734 
4735 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4736 
4737 #ifdef __DML_VBA_DEBUG__
4738 		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4739 		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4740 		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4741 		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4742 		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4743 		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4744 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4745 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4746 		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4747 		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4748 		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4749 #endif
4750 
4751 		if (VRatio[k] <= 1) {
4752 			DisplayPipeLineDeliveryTimeLuma[k] =
4753 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4754 		} else {
4755 			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4756 		}
4757 
4758 		if (BytePerPixelC[k] == 0) {
4759 			DisplayPipeLineDeliveryTimeChroma[k] = 0;
4760 		} else {
4761 			if (VRatioChroma[k] <= 1) {
4762 				DisplayPipeLineDeliveryTimeChroma[k] =
4763 					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4764 			} else {
4765 				DisplayPipeLineDeliveryTimeChroma[k] =
4766 					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4767 			}
4768 		}
4769 
4770 		if (VRatioPrefetchY[k] <= 1) {
4771 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4772 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4773 		} else {
4774 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4775 					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4776 		}
4777 
4778 		if (BytePerPixelC[k] == 0) {
4779 			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4780 		} else {
4781 			if (VRatioPrefetchC[k] <= 1) {
4782 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4783 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4784 			} else {
4785 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4786 						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4787 			}
4788 		}
4789 #ifdef __DML_VBA_DEBUG__
4790 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4791 				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4792 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4793 				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4794 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4795 				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4796 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4797 				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4798 #endif
4799 	}
4800 
4801 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4802 		if (!IsVertical(SourceRotation[k]))
4803 			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4804 		else
4805 			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4806 #ifdef __DML_VBA_DEBUG__
4807 		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4808 #endif
4809 
4810 		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4811 		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4812 				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4813 		if (BytePerPixelC[k] == 0) {
4814 			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4815 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4816 		} else {
4817 			if (!IsVertical(SourceRotation[k]))
4818 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4819 			else
4820 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4821 #ifdef __DML_VBA_DEBUG__
4822 			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4823 #endif
4824 			DisplayPipeRequestDeliveryTimeChroma[k] =
4825 					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4826 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4827 					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4828 		}
4829 #ifdef __DML_VBA_DEBUG__
4830 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4831 				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4832 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4833 				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4834 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4835 				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4836 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4837 				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4838 #endif
4839 	}
4840 
4841 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4842 		unsigned int cursor_req_per_width;
4843 
4844 		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4845 				256.0 / 8.0, 1.0);
4846 		if (NumberOfCursors[k] > 0) {
4847 			if (VRatio[k] <= 1) {
4848 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4849 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4850 			} else {
4851 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4852 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4853 			}
4854 			if (VRatioPrefetchY[k] <= 1) {
4855 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4856 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4857 			} else {
4858 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4859 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4860 			}
4861 		} else {
4862 			CursorRequestDeliveryTime[k] = 0;
4863 			CursorRequestDeliveryTimePrefetch[k] = 0;
4864 		}
4865 #ifdef __DML_VBA_DEBUG__
4866 		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4867 				__func__, k, NumberOfCursors[k]);
4868 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4869 				__func__, k, CursorRequestDeliveryTime[k]);
4870 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4871 				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
4872 #endif
4873 	}
4874 } // CalculatePixelDeliveryTimes
4875 
dml32_CalculateMetaAndPTETimes(bool use_one_row_for_frame[],unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])4876 void dml32_CalculateMetaAndPTETimes(
4877 		bool use_one_row_for_frame[],
4878 		unsigned int NumberOfActiveSurfaces,
4879 		bool GPUVMEnable,
4880 		unsigned int MetaChunkSize,
4881 		unsigned int MinMetaChunkSizeBytes,
4882 		unsigned int    HTotal[],
4883 		double  VRatio[],
4884 		double  VRatioChroma[],
4885 		double  DestinationLinesToRequestRowInVBlank[],
4886 		double  DestinationLinesToRequestRowInImmediateFlip[],
4887 		bool DCCEnable[],
4888 		double  PixelClock[],
4889 		unsigned int BytePerPixelY[],
4890 		unsigned int BytePerPixelC[],
4891 		enum dm_rotation_angle SourceRotation[],
4892 		unsigned int dpte_row_height[],
4893 		unsigned int dpte_row_height_chroma[],
4894 		unsigned int meta_row_width[],
4895 		unsigned int meta_row_width_chroma[],
4896 		unsigned int meta_row_height[],
4897 		unsigned int meta_row_height_chroma[],
4898 		unsigned int meta_req_width[],
4899 		unsigned int meta_req_width_chroma[],
4900 		unsigned int meta_req_height[],
4901 		unsigned int meta_req_height_chroma[],
4902 		unsigned int dpte_group_bytes[],
4903 		unsigned int    PTERequestSizeY[],
4904 		unsigned int    PTERequestSizeC[],
4905 		unsigned int    PixelPTEReqWidthY[],
4906 		unsigned int    PixelPTEReqHeightY[],
4907 		unsigned int    PixelPTEReqWidthC[],
4908 		unsigned int    PixelPTEReqHeightC[],
4909 		unsigned int    dpte_row_width_luma_ub[],
4910 		unsigned int    dpte_row_width_chroma_ub[],
4911 
4912 		/* Output */
4913 		double DST_Y_PER_PTE_ROW_NOM_L[],
4914 		double DST_Y_PER_PTE_ROW_NOM_C[],
4915 		double DST_Y_PER_META_ROW_NOM_L[],
4916 		double DST_Y_PER_META_ROW_NOM_C[],
4917 		double TimePerMetaChunkNominal[],
4918 		double TimePerChromaMetaChunkNominal[],
4919 		double TimePerMetaChunkVBlank[],
4920 		double TimePerChromaMetaChunkVBlank[],
4921 		double TimePerMetaChunkFlip[],
4922 		double TimePerChromaMetaChunkFlip[],
4923 		double time_per_pte_group_nom_luma[],
4924 		double time_per_pte_group_vblank_luma[],
4925 		double time_per_pte_group_flip_luma[],
4926 		double time_per_pte_group_nom_chroma[],
4927 		double time_per_pte_group_vblank_chroma[],
4928 		double time_per_pte_group_flip_chroma[])
4929 {
4930 	unsigned int   meta_chunk_width;
4931 	unsigned int   min_meta_chunk_width;
4932 	unsigned int   meta_chunk_per_row_int;
4933 	unsigned int   meta_row_remainder;
4934 	unsigned int   meta_chunk_threshold;
4935 	unsigned int   meta_chunks_per_row_ub;
4936 	unsigned int   meta_chunk_width_chroma;
4937 	unsigned int   min_meta_chunk_width_chroma;
4938 	unsigned int   meta_chunk_per_row_int_chroma;
4939 	unsigned int   meta_row_remainder_chroma;
4940 	unsigned int   meta_chunk_threshold_chroma;
4941 	unsigned int   meta_chunks_per_row_ub_chroma;
4942 	unsigned int   dpte_group_width_luma;
4943 	unsigned int   dpte_groups_per_row_luma_ub;
4944 	unsigned int   dpte_group_width_chroma;
4945 	unsigned int   dpte_groups_per_row_chroma_ub;
4946 	unsigned int k;
4947 
4948 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4949 		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4950 		if (BytePerPixelC[k] == 0)
4951 			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4952 		else
4953 			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4954 		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4955 		if (BytePerPixelC[k] == 0)
4956 			DST_Y_PER_META_ROW_NOM_C[k] = 0;
4957 		else
4958 			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4959 	}
4960 
4961 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4962 		if (DCCEnable[k] == true) {
4963 			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4964 			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4965 			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4966 			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4967 			if (!IsVertical(SourceRotation[k]))
4968 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4969 			else
4970 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4971 
4972 			if (meta_row_remainder <= meta_chunk_threshold)
4973 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4974 			else
4975 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4976 
4977 			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4978 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4979 			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4980 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4981 			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4982 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4983 			if (BytePerPixelC[k] == 0) {
4984 				TimePerChromaMetaChunkNominal[k] = 0;
4985 				TimePerChromaMetaChunkVBlank[k] = 0;
4986 				TimePerChromaMetaChunkFlip[k] = 0;
4987 			} else {
4988 				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4989 						meta_row_height_chroma[k];
4990 				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4991 						meta_row_height_chroma[k];
4992 				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4993 						meta_chunk_width_chroma;
4994 				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4995 				if (!IsVertical(SourceRotation[k])) {
4996 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4997 							meta_req_width_chroma[k];
4998 				} else {
4999 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
5000 							meta_req_height_chroma[k];
5001 				}
5002 				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
5003 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
5004 				else
5005 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5006 
5007 				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
5008 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5009 				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
5010 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5011 				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5012 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5013 			}
5014 		} else {
5015 			TimePerMetaChunkNominal[k] = 0;
5016 			TimePerMetaChunkVBlank[k] = 0;
5017 			TimePerMetaChunkFlip[k] = 0;
5018 			TimePerChromaMetaChunkNominal[k] = 0;
5019 			TimePerChromaMetaChunkVBlank[k] = 0;
5020 			TimePerChromaMetaChunkFlip[k] = 0;
5021 		}
5022 	}
5023 
5024 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5025 		if (GPUVMEnable == true) {
5026 			if (!IsVertical(SourceRotation[k])) {
5027 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5028 						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5029 			} else {
5030 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5031 						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5032 			}
5033 
5034 			if (use_one_row_for_frame[k]) {
5035 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5036 						(double) dpte_group_width_luma / 2.0, 1.0);
5037 			} else {
5038 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5039 						(double) dpte_group_width_luma, 1.0);
5040 			}
5041 #ifdef __DML_VBA_DEBUG__
5042 			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5043 					__func__, k, use_one_row_for_frame[k]);
5044 			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5045 					__func__, k, dpte_group_bytes[k]);
5046 			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5047 					__func__, k, PTERequestSizeY[k]);
5048 			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5049 					__func__, k, PixelPTEReqWidthY[k]);
5050 			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5051 					__func__, k, PixelPTEReqHeightY[k]);
5052 			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5053 					__func__, k, dpte_row_width_luma_ub[k]);
5054 			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5055 					__func__, k, dpte_group_width_luma);
5056 			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5057 					__func__, k, dpte_groups_per_row_luma_ub);
5058 #endif
5059 
5060 			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5061 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5062 			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5063 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5064 			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5065 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5066 			if (BytePerPixelC[k] == 0) {
5067 				time_per_pte_group_nom_chroma[k] = 0;
5068 				time_per_pte_group_vblank_chroma[k] = 0;
5069 				time_per_pte_group_flip_chroma[k] = 0;
5070 			} else {
5071 				if (!IsVertical(SourceRotation[k])) {
5072 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5073 							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5074 				} else {
5075 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5076 							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5077 				}
5078 
5079 				if (use_one_row_for_frame[k]) {
5080 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5081 							(double) dpte_group_width_chroma / 2.0, 1.0);
5082 				} else {
5083 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5084 							(double) dpte_group_width_chroma, 1.0);
5085 				}
5086 #ifdef __DML_VBA_DEBUG__
5087 				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5088 						__func__, k, dpte_row_width_chroma_ub[k]);
5089 				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5090 						__func__, k, dpte_group_width_chroma);
5091 				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5092 						__func__, k, dpte_groups_per_row_chroma_ub);
5093 #endif
5094 				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5095 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5096 				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5097 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5098 				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5099 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5100 			}
5101 		} else {
5102 			time_per_pte_group_nom_luma[k] = 0;
5103 			time_per_pte_group_vblank_luma[k] = 0;
5104 			time_per_pte_group_flip_luma[k] = 0;
5105 			time_per_pte_group_nom_chroma[k] = 0;
5106 			time_per_pte_group_vblank_chroma[k] = 0;
5107 			time_per_pte_group_flip_chroma[k] = 0;
5108 		}
5109 #ifdef __DML_VBA_DEBUG__
5110 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5111 				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
5112 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5113 				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5114 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5115 				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5116 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5117 				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5118 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5119 				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5120 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5121 				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5122 		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5123 				__func__, k, TimePerMetaChunkNominal[k]);
5124 		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5125 				__func__, k, TimePerMetaChunkVBlank[k]);
5126 		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5127 				__func__, k, TimePerMetaChunkFlip[k]);
5128 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5129 				__func__, k, TimePerChromaMetaChunkNominal[k]);
5130 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5131 				__func__, k, TimePerChromaMetaChunkVBlank[k]);
5132 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5133 				__func__, k, TimePerChromaMetaChunkFlip[k]);
5134 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5135 				__func__, k, time_per_pte_group_nom_luma[k]);
5136 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5137 				__func__, k, time_per_pte_group_vblank_luma[k]);
5138 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5139 				__func__, k, time_per_pte_group_flip_luma[k]);
5140 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5141 				__func__, k, time_per_pte_group_nom_chroma[k]);
5142 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5143 				__func__, k, time_per_pte_group_vblank_chroma[k]);
5144 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5145 				__func__, k, time_per_pte_group_flip_chroma[k]);
5146 #endif
5147 	}
5148 } // CalculateMetaAndPTETimes
5149 
dml32_CalculateVMGroupAndRequestTimes(unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],unsigned int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5150 void dml32_CalculateVMGroupAndRequestTimes(
5151 		unsigned int     NumberOfActiveSurfaces,
5152 		bool     GPUVMEnable,
5153 		unsigned int     GPUVMMaxPageTableLevels,
5154 		unsigned int     HTotal[],
5155 		unsigned int     BytePerPixelC[],
5156 		double      DestinationLinesToRequestVMInVBlank[],
5157 		double      DestinationLinesToRequestVMInImmediateFlip[],
5158 		bool     DCCEnable[],
5159 		double      PixelClock[],
5160 		unsigned int        dpte_row_width_luma_ub[],
5161 		unsigned int        dpte_row_width_chroma_ub[],
5162 		unsigned int     vm_group_bytes[],
5163 		unsigned int     dpde0_bytes_per_frame_ub_l[],
5164 		unsigned int     dpde0_bytes_per_frame_ub_c[],
5165 		unsigned int        meta_pte_bytes_per_frame_ub_l[],
5166 		unsigned int        meta_pte_bytes_per_frame_ub_c[],
5167 
5168 		/* Output */
5169 		double      TimePerVMGroupVBlank[],
5170 		double      TimePerVMGroupFlip[],
5171 		double      TimePerVMRequestVBlank[],
5172 		double      TimePerVMRequestFlip[])
5173 {
5174 	unsigned int k;
5175 	unsigned int   num_group_per_lower_vm_stage;
5176 	unsigned int   num_req_per_lower_vm_stage;
5177 
5178 #ifdef __DML_VBA_DEBUG__
5179 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5180 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5181 #endif
5182 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5183 
5184 #ifdef __DML_VBA_DEBUG__
5185 		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5186 		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5187 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5188 				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
5189 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5190 				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
5191 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5192 				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5193 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5194 				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5195 #endif
5196 
5197 		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5198 			if (DCCEnable[k] == false) {
5199 				if (BytePerPixelC[k] > 0) {
5200 					num_group_per_lower_vm_stage = dml_ceil(
5201 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5202 							(double) (vm_group_bytes[k]), 1.0) +
5203 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5204 							(double) (vm_group_bytes[k]), 1.0);
5205 				} else {
5206 					num_group_per_lower_vm_stage = dml_ceil(
5207 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5208 							(double) (vm_group_bytes[k]), 1.0);
5209 				}
5210 			} else {
5211 				if (GPUVMMaxPageTableLevels == 1) {
5212 					if (BytePerPixelC[k] > 0) {
5213 						num_group_per_lower_vm_stage = dml_ceil(
5214 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5215 							(double) (vm_group_bytes[k]), 1.0) +
5216 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5217 							(double) (vm_group_bytes[k]), 1.0);
5218 					} else {
5219 						num_group_per_lower_vm_stage = dml_ceil(
5220 								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5221 								(double) (vm_group_bytes[k]), 1.0);
5222 					}
5223 				} else {
5224 					if (BytePerPixelC[k] > 0) {
5225 						num_group_per_lower_vm_stage = 2 + dml_ceil(
5226 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5227 							(double) (vm_group_bytes[k]), 1) +
5228 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5229 							(double) (vm_group_bytes[k]), 1) +
5230 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5231 							(double) (vm_group_bytes[k]), 1) +
5232 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5233 							(double) (vm_group_bytes[k]), 1);
5234 					} else {
5235 						num_group_per_lower_vm_stage = 1 + dml_ceil(
5236 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5237 							(double) (vm_group_bytes[k]), 1) + dml_ceil(
5238 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5239 							(double) (vm_group_bytes[k]), 1);
5240 					}
5241 				}
5242 			}
5243 
5244 			if (DCCEnable[k] == false) {
5245 				if (BytePerPixelC[k] > 0) {
5246 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5247 							dpde0_bytes_per_frame_ub_c[k] / 64;
5248 				} else {
5249 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5250 				}
5251 			} else {
5252 				if (GPUVMMaxPageTableLevels == 1) {
5253 					if (BytePerPixelC[k] > 0) {
5254 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5255 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5256 					} else {
5257 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5258 					}
5259 				} else {
5260 					if (BytePerPixelC[k] > 0) {
5261 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5262 								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5263 								meta_pte_bytes_per_frame_ub_l[k] / 64 +
5264 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5265 					} else {
5266 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5267 								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5268 					}
5269 				}
5270 			}
5271 
5272 			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5273 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5274 			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5275 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5276 			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5277 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5278 			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5279 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5280 
5281 			if (GPUVMMaxPageTableLevels > 2) {
5282 				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5283 				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5284 				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5285 				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5286 			}
5287 
5288 		} else {
5289 			TimePerVMGroupVBlank[k] = 0;
5290 			TimePerVMGroupFlip[k] = 0;
5291 			TimePerVMRequestVBlank[k] = 0;
5292 			TimePerVMRequestFlip[k] = 0;
5293 		}
5294 
5295 #ifdef __DML_VBA_DEBUG__
5296 		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5297 		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5298 		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5299 		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5300 #endif
5301 	}
5302 } // CalculateVMGroupAndRequestTimes
5303 
dml32_CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dm_rotation_angle SourceRotation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)5304 void dml32_CalculateDCCConfiguration(
5305 		bool             DCCEnabled,
5306 		bool             DCCProgrammingAssumesScanDirectionUnknown,
5307 		enum source_format_class SourcePixelFormat,
5308 		unsigned int             SurfaceWidthLuma,
5309 		unsigned int             SurfaceWidthChroma,
5310 		unsigned int             SurfaceHeightLuma,
5311 		unsigned int             SurfaceHeightChroma,
5312 		unsigned int                nomDETInKByte,
5313 		unsigned int             RequestHeight256ByteLuma,
5314 		unsigned int             RequestHeight256ByteChroma,
5315 		enum dm_swizzle_mode     TilingFormat,
5316 		unsigned int             BytePerPixelY,
5317 		unsigned int             BytePerPixelC,
5318 		double              BytePerPixelDETY,
5319 		double              BytePerPixelDETC,
5320 		enum dm_rotation_angle   SourceRotation,
5321 		/* Output */
5322 		unsigned int        *MaxUncompressedBlockLuma,
5323 		unsigned int        *MaxUncompressedBlockChroma,
5324 		unsigned int        *MaxCompressedBlockLuma,
5325 		unsigned int        *MaxCompressedBlockChroma,
5326 		unsigned int        *IndependentBlockLuma,
5327 		unsigned int        *IndependentBlockChroma)
5328 {
5329 	typedef enum {
5330 		REQ_256Bytes,
5331 		REQ_128BytesNonContiguous,
5332 		REQ_128BytesContiguous,
5333 		REQ_NA
5334 	} RequestType;
5335 
5336 	RequestType   RequestLuma;
5337 	RequestType   RequestChroma;
5338 
5339 	unsigned int   segment_order_horz_contiguous_luma;
5340 	unsigned int   segment_order_horz_contiguous_chroma;
5341 	unsigned int   segment_order_vert_contiguous_luma;
5342 	unsigned int   segment_order_vert_contiguous_chroma;
5343 	unsigned int req128_horz_wc_l;
5344 	unsigned int req128_horz_wc_c;
5345 	unsigned int req128_vert_wc_l;
5346 	unsigned int req128_vert_wc_c;
5347 	unsigned int MAS_vp_horz_limit;
5348 	unsigned int MAS_vp_vert_limit;
5349 	unsigned int max_vp_horz_width;
5350 	unsigned int max_vp_vert_height;
5351 	unsigned int eff_surf_width_l;
5352 	unsigned int eff_surf_width_c;
5353 	unsigned int eff_surf_height_l;
5354 	unsigned int eff_surf_height_c;
5355 	unsigned int full_swath_bytes_horz_wc_l;
5356 	unsigned int full_swath_bytes_horz_wc_c;
5357 	unsigned int full_swath_bytes_vert_wc_l;
5358 	unsigned int full_swath_bytes_vert_wc_c;
5359 	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5360 
5361 	unsigned int   yuv420;
5362 	unsigned int   horz_div_l;
5363 	unsigned int   horz_div_c;
5364 	unsigned int   vert_div_l;
5365 	unsigned int   vert_div_c;
5366 
5367 	unsigned int     swath_buf_size;
5368 	double   detile_buf_vp_horz_limit;
5369 	double   detile_buf_vp_vert_limit;
5370 
5371 	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5372 			SourcePixelFormat == dm_420_12) ? 1 : 0);
5373 	horz_div_l = 1;
5374 	horz_div_c = 1;
5375 	vert_div_l = 1;
5376 	vert_div_c = 1;
5377 
5378 	if (BytePerPixelY == 1)
5379 		vert_div_l = 0;
5380 	if (BytePerPixelC == 1)
5381 		vert_div_c = 0;
5382 
5383 	if (BytePerPixelC == 0) {
5384 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5385 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5386 				BytePerPixelY / (1 + horz_div_l));
5387 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5388 				(1 + vert_div_l));
5389 	} else {
5390 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5391 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5392 				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5393 				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5394 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5395 				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5396 				(1 + vert_div_c) / (1 + yuv420));
5397 	}
5398 
5399 	if (SourcePixelFormat == dm_420_10) {
5400 		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5401 		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5402 	}
5403 
5404 	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5405 	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5406 
5407 	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5408 	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5409 	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5410 	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5411 	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5412 	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5413 	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5414 	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5415 
5416 	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5417 	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5418 	if (BytePerPixelC > 0) {
5419 		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5420 		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5421 	} else {
5422 		full_swath_bytes_horz_wc_c = 0;
5423 		full_swath_bytes_vert_wc_c = 0;
5424 	}
5425 
5426 	if (SourcePixelFormat == dm_420_10) {
5427 		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5428 		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5429 		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5430 		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5431 	}
5432 
5433 	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5434 		req128_horz_wc_l = 0;
5435 		req128_horz_wc_c = 0;
5436 	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5437 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5438 		req128_horz_wc_l = 0;
5439 		req128_horz_wc_c = 1;
5440 	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5441 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5442 		req128_horz_wc_l = 1;
5443 		req128_horz_wc_c = 0;
5444 	} else {
5445 		req128_horz_wc_l = 1;
5446 		req128_horz_wc_c = 1;
5447 	}
5448 
5449 	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5450 		req128_vert_wc_l = 0;
5451 		req128_vert_wc_c = 0;
5452 	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5453 			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5454 		req128_vert_wc_l = 0;
5455 		req128_vert_wc_c = 1;
5456 	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5457 			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5458 		req128_vert_wc_l = 1;
5459 		req128_vert_wc_c = 0;
5460 	} else {
5461 		req128_vert_wc_l = 1;
5462 		req128_vert_wc_c = 1;
5463 	}
5464 
5465 	if (BytePerPixelY == 2) {
5466 		segment_order_horz_contiguous_luma = 0;
5467 		segment_order_vert_contiguous_luma = 1;
5468 	} else {
5469 		segment_order_horz_contiguous_luma = 1;
5470 		segment_order_vert_contiguous_luma = 0;
5471 	}
5472 
5473 	if (BytePerPixelC == 2) {
5474 		segment_order_horz_contiguous_chroma = 0;
5475 		segment_order_vert_contiguous_chroma = 1;
5476 	} else {
5477 		segment_order_horz_contiguous_chroma = 1;
5478 		segment_order_vert_contiguous_chroma = 0;
5479 	}
5480 #ifdef __DML_VBA_DEBUG__
5481 	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5482 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5483 	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5484 	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5485 	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5486 	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5487 	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5488 	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5489 	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5490 			__func__, segment_order_horz_contiguous_chroma);
5491 #endif
5492 
5493 	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5494 		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5495 			RequestLuma = REQ_256Bytes;
5496 		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5497 				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5498 			RequestLuma = REQ_128BytesNonContiguous;
5499 		else
5500 			RequestLuma = REQ_128BytesContiguous;
5501 
5502 		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5503 			RequestChroma = REQ_256Bytes;
5504 		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5505 				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5506 			RequestChroma = REQ_128BytesNonContiguous;
5507 		else
5508 			RequestChroma = REQ_128BytesContiguous;
5509 
5510 	} else if (!IsVertical(SourceRotation)) {
5511 		if (req128_horz_wc_l == 0)
5512 			RequestLuma = REQ_256Bytes;
5513 		else if (segment_order_horz_contiguous_luma == 0)
5514 			RequestLuma = REQ_128BytesNonContiguous;
5515 		else
5516 			RequestLuma = REQ_128BytesContiguous;
5517 
5518 		if (req128_horz_wc_c == 0)
5519 			RequestChroma = REQ_256Bytes;
5520 		else if (segment_order_horz_contiguous_chroma == 0)
5521 			RequestChroma = REQ_128BytesNonContiguous;
5522 		else
5523 			RequestChroma = REQ_128BytesContiguous;
5524 
5525 	} else {
5526 		if (req128_vert_wc_l == 0)
5527 			RequestLuma = REQ_256Bytes;
5528 		else if (segment_order_vert_contiguous_luma == 0)
5529 			RequestLuma = REQ_128BytesNonContiguous;
5530 		else
5531 			RequestLuma = REQ_128BytesContiguous;
5532 
5533 		if (req128_vert_wc_c == 0)
5534 			RequestChroma = REQ_256Bytes;
5535 		else if (segment_order_vert_contiguous_chroma == 0)
5536 			RequestChroma = REQ_128BytesNonContiguous;
5537 		else
5538 			RequestChroma = REQ_128BytesContiguous;
5539 	}
5540 
5541 	if (RequestLuma == REQ_256Bytes) {
5542 		*MaxUncompressedBlockLuma = 256;
5543 		*MaxCompressedBlockLuma = 256;
5544 		*IndependentBlockLuma = 0;
5545 	} else if (RequestLuma == REQ_128BytesContiguous) {
5546 		*MaxUncompressedBlockLuma = 256;
5547 		*MaxCompressedBlockLuma = 128;
5548 		*IndependentBlockLuma = 128;
5549 	} else {
5550 		*MaxUncompressedBlockLuma = 256;
5551 		*MaxCompressedBlockLuma = 64;
5552 		*IndependentBlockLuma = 64;
5553 	}
5554 
5555 	if (RequestChroma == REQ_256Bytes) {
5556 		*MaxUncompressedBlockChroma = 256;
5557 		*MaxCompressedBlockChroma = 256;
5558 		*IndependentBlockChroma = 0;
5559 	} else if (RequestChroma == REQ_128BytesContiguous) {
5560 		*MaxUncompressedBlockChroma = 256;
5561 		*MaxCompressedBlockChroma = 128;
5562 		*IndependentBlockChroma = 128;
5563 	} else {
5564 		*MaxUncompressedBlockChroma = 256;
5565 		*MaxCompressedBlockChroma = 64;
5566 		*IndependentBlockChroma = 64;
5567 	}
5568 
5569 	if (DCCEnabled != true || BytePerPixelC == 0) {
5570 		*MaxUncompressedBlockChroma = 0;
5571 		*MaxCompressedBlockChroma = 0;
5572 		*IndependentBlockChroma = 0;
5573 	}
5574 
5575 	if (DCCEnabled != true) {
5576 		*MaxUncompressedBlockLuma = 0;
5577 		*MaxCompressedBlockLuma = 0;
5578 		*IndependentBlockLuma = 0;
5579 	}
5580 
5581 #ifdef __DML_VBA_DEBUG__
5582 	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5583 	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5584 	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5585 	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5586 	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5587 	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5588 #endif
5589 
5590 } // CalculateDCCConfiguration
5591 
dml32_CalculateStutterEfficiency(unsigned int CompressedBufferSizeInkByte,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool UnboundedRequestEnabled,unsigned int MetaFIFOSizeInKEntries,unsigned int ZeroSizeBufferEntries,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,unsigned int CompbufReservedSpace64B,unsigned int CompbufReservedSpaceZs,double SRExitTime,double SRExitZ8Time,bool SynchronizeTimingsFinal,unsigned int BlendingAndTiming[],double StutterEnterPlusExitWatermark,double Z8StutterEnterPlusExitWatermark,bool ProgressiveToInterlaceUnitInOPP,bool Interlace[],double MinTTUVBlank[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeY[],unsigned int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double NetDCCRateLuma[],double NetDCCRateChroma[],double DCCFractionOfZeroSizeRequestsLuma[],double DCCFractionOfZeroSizeRequestsChroma[],unsigned int HTotal[],unsigned int VTotal[],double PixelClock[],double VRatio[],enum dm_rotation_angle SourceRotation[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesC[],unsigned int BlockWidth256BytesC[],unsigned int DCCYMaxUncompressedBlock[],unsigned int DCCCMaxUncompressedBlock[],unsigned int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthSurfaceLuma[],double ReadBandwidthSurfaceChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,unsigned int * NumberOfStutterBurstsPerFrame,double * Z8StutterEfficiencyNotIncludingVBlank,double * Z8StutterEfficiency,unsigned int * Z8NumberOfStutterBurstsPerFrame,double * StutterPeriod,bool * DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)5592 void dml32_CalculateStutterEfficiency(
5593 		unsigned int      CompressedBufferSizeInkByte,
5594 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5595 		bool   UnboundedRequestEnabled,
5596 		unsigned int      MetaFIFOSizeInKEntries,
5597 		unsigned int      ZeroSizeBufferEntries,
5598 		unsigned int      PixelChunkSizeInKByte,
5599 		unsigned int   NumberOfActiveSurfaces,
5600 		unsigned int      ROBBufferSizeInKByte,
5601 		double    TotalDataReadBandwidth,
5602 		double    DCFCLK,
5603 		double    ReturnBW,
5604 		unsigned int      CompbufReservedSpace64B,
5605 		unsigned int      CompbufReservedSpaceZs,
5606 		double    SRExitTime,
5607 		double    SRExitZ8Time,
5608 		bool   SynchronizeTimingsFinal,
5609 		unsigned int   BlendingAndTiming[],
5610 		double    StutterEnterPlusExitWatermark,
5611 		double    Z8StutterEnterPlusExitWatermark,
5612 		bool   ProgressiveToInterlaceUnitInOPP,
5613 		bool   Interlace[],
5614 		double    MinTTUVBlank[],
5615 		unsigned int   DPPPerSurface[],
5616 		unsigned int      DETBufferSizeY[],
5617 		unsigned int   BytePerPixelY[],
5618 		double    BytePerPixelDETY[],
5619 		double      SwathWidthY[],
5620 		unsigned int   SwathHeightY[],
5621 		unsigned int   SwathHeightC[],
5622 		double    NetDCCRateLuma[],
5623 		double    NetDCCRateChroma[],
5624 		double    DCCFractionOfZeroSizeRequestsLuma[],
5625 		double    DCCFractionOfZeroSizeRequestsChroma[],
5626 		unsigned int      HTotal[],
5627 		unsigned int      VTotal[],
5628 		double    PixelClock[],
5629 		double    VRatio[],
5630 		enum dm_rotation_angle SourceRotation[],
5631 		unsigned int   BlockHeight256BytesY[],
5632 		unsigned int   BlockWidth256BytesY[],
5633 		unsigned int   BlockHeight256BytesC[],
5634 		unsigned int   BlockWidth256BytesC[],
5635 		unsigned int   DCCYMaxUncompressedBlock[],
5636 		unsigned int   DCCCMaxUncompressedBlock[],
5637 		unsigned int      VActive[],
5638 		bool   DCCEnable[],
5639 		bool   WritebackEnable[],
5640 		double    ReadBandwidthSurfaceLuma[],
5641 		double    ReadBandwidthSurfaceChroma[],
5642 		double    meta_row_bw[],
5643 		double    dpte_row_bw[],
5644 
5645 		/* Output */
5646 		double   *StutterEfficiencyNotIncludingVBlank,
5647 		double   *StutterEfficiency,
5648 		unsigned int     *NumberOfStutterBurstsPerFrame,
5649 		double   *Z8StutterEfficiencyNotIncludingVBlank,
5650 		double   *Z8StutterEfficiency,
5651 		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5652 		double   *StutterPeriod,
5653 		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5654 {
5655 
5656 	bool FoundCriticalSurface = false;
5657 	unsigned int SwathSizeCriticalSurface = 0;
5658 	unsigned int LastChunkOfSwathSize;
5659 	unsigned int MissingPartOfLastSwathOfDETSize;
5660 	double LastZ8StutterPeriod = 0.0;
5661 	double LastStutterPeriod = 0.0;
5662 	unsigned int TotalNumberOfActiveOTG = 0;
5663 	double doublePixelClock;
5664 	unsigned int doubleHTotal;
5665 	unsigned int doubleVTotal;
5666 	bool SameTiming = true;
5667 	double DETBufferingTimeY;
5668 	double SwathWidthYCriticalSurface = 0.0;
5669 	double SwathHeightYCriticalSurface = 0.0;
5670 	double VActiveTimeCriticalSurface = 0.0;
5671 	double FrameTimeCriticalSurface = 0.0;
5672 	unsigned int BytePerPixelYCriticalSurface = 0;
5673 	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5674 	unsigned int DETBufferSizeYCriticalSurface = 0;
5675 	double MinTTUVBlankCriticalSurface = 0.0;
5676 	unsigned int BlockWidth256BytesYCriticalSurface = 0;
5677 	bool doublePlaneCriticalSurface = 0;
5678 	bool doublePipeCriticalSurface = 0;
5679 	double TotalCompressedReadBandwidth;
5680 	double TotalRowReadBandwidth;
5681 	double AverageDCCCompressionRate;
5682 	double EffectiveCompressedBufferSize;
5683 	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5684 	double StutterBurstTime;
5685 	unsigned int TotalActiveWriteback;
5686 	double LinesInDETY;
5687 	double LinesInDETYRoundedDownToSwath;
5688 	double MaximumEffectiveCompressionLuma;
5689 	double MaximumEffectiveCompressionChroma;
5690 	double TotalZeroSizeRequestReadBandwidth;
5691 	double TotalZeroSizeCompressedReadBandwidth;
5692 	double AverageDCCZeroSizeFraction;
5693 	double AverageZeroSizeCompressionRate;
5694 	unsigned int k;
5695 
5696 	TotalZeroSizeRequestReadBandwidth = 0;
5697 	TotalZeroSizeCompressedReadBandwidth = 0;
5698 	TotalRowReadBandwidth = 0;
5699 	TotalCompressedReadBandwidth = 0;
5700 
5701 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5702 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5703 			if (DCCEnable[k] == true) {
5704 				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5705 						|| (!IsVertical(SourceRotation[k])
5706 								&& BlockHeight256BytesY[k] > SwathHeightY[k])
5707 						|| DCCYMaxUncompressedBlock[k] < 256) {
5708 					MaximumEffectiveCompressionLuma = 2;
5709 				} else {
5710 					MaximumEffectiveCompressionLuma = 4;
5711 				}
5712 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5713 						+ ReadBandwidthSurfaceLuma[k]
5714 								/ dml_min(NetDCCRateLuma[k],
5715 										MaximumEffectiveCompressionLuma);
5716 #ifdef __DML_VBA_DEBUG__
5717 				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5718 						__func__, k, ReadBandwidthSurfaceLuma[k]);
5719 				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5720 						__func__, k, NetDCCRateLuma[k]);
5721 				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5722 						__func__, k, MaximumEffectiveCompressionLuma);
5723 #endif
5724 				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5725 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5726 				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5727 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5728 								/ MaximumEffectiveCompressionLuma;
5729 
5730 				if (ReadBandwidthSurfaceChroma[k] > 0) {
5731 					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5732 							|| (!IsVertical(SourceRotation[k])
5733 									&& BlockHeight256BytesC[k] > SwathHeightC[k])
5734 							|| DCCCMaxUncompressedBlock[k] < 256) {
5735 						MaximumEffectiveCompressionChroma = 2;
5736 					} else {
5737 						MaximumEffectiveCompressionChroma = 4;
5738 					}
5739 					TotalCompressedReadBandwidth =
5740 							TotalCompressedReadBandwidth
5741 							+ ReadBandwidthSurfaceChroma[k]
5742 							/ dml_min(NetDCCRateChroma[k],
5743 							MaximumEffectiveCompressionChroma);
5744 #ifdef __DML_VBA_DEBUG__
5745 					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5746 							__func__, k, ReadBandwidthSurfaceChroma[k]);
5747 					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5748 							__func__, k, NetDCCRateChroma[k]);
5749 					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5750 							__func__, k, MaximumEffectiveCompressionChroma);
5751 #endif
5752 					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5753 							+ ReadBandwidthSurfaceChroma[k]
5754 									* DCCFractionOfZeroSizeRequestsChroma[k];
5755 					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5756 							+ ReadBandwidthSurfaceChroma[k]
5757 									* DCCFractionOfZeroSizeRequestsChroma[k]
5758 									/ MaximumEffectiveCompressionChroma;
5759 				}
5760 			} else {
5761 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5762 						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5763 			}
5764 			TotalRowReadBandwidth = TotalRowReadBandwidth
5765 					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5766 		}
5767 	}
5768 
5769 	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5770 	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5771 
5772 #ifdef __DML_VBA_DEBUG__
5773 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5774 	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5775 	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5776 	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5777 			__func__, TotalZeroSizeCompressedReadBandwidth);
5778 	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5779 	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5780 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5781 	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5782 	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5783 	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5784 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5785 #endif
5786 	if (AverageDCCZeroSizeFraction == 1) {
5787 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5788 				/ TotalZeroSizeCompressedReadBandwidth;
5789 		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5790 				* AverageZeroSizeCompressionRate
5791 				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5792 						* AverageZeroSizeCompressionRate;
5793 	} else if (AverageDCCZeroSizeFraction > 0) {
5794 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5795 				/ TotalZeroSizeCompressedReadBandwidth;
5796 		EffectiveCompressedBufferSize = dml_min(
5797 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5798 				(double) MetaFIFOSizeInKEntries * 1024 * 64
5799 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5800 					+ 1 / AverageDCCCompressionRate))
5801 					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5802 					* AverageDCCCompressionRate,
5803 					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5804 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5805 
5806 #ifdef __DML_VBA_DEBUG__
5807 		dml_print("DML::%s: min 1 = %f\n", __func__,
5808 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5809 		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5810 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5811 						AverageDCCCompressionRate));
5812 		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5813 				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5814 		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5815 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5816 #endif
5817 	} else {
5818 		EffectiveCompressedBufferSize = dml_min(
5819 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5820 				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5821 				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5822 						* AverageDCCCompressionRate;
5823 
5824 #ifdef __DML_VBA_DEBUG__
5825 		dml_print("DML::%s: min 1 = %f\n", __func__,
5826 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5827 		dml_print("DML::%s: min 2 = %f\n", __func__,
5828 				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5829 #endif
5830 	}
5831 
5832 #ifdef __DML_VBA_DEBUG__
5833 	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5834 	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5835 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5836 #endif
5837 
5838 	*StutterPeriod = 0;
5839 
5840 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5841 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5842 			LinesInDETY = ((double) DETBufferSizeY[k]
5843 					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5844 							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5845 					/ BytePerPixelDETY[k] / SwathWidthY[k];
5846 			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5847 			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5848 					/ VRatio[k];
5849 #ifdef __DML_VBA_DEBUG__
5850 			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5851 			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5852 			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5853 			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5854 					__func__, k, ReadBandwidthSurfaceLuma[k]);
5855 			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5856 			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5857 			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5858 					__func__, k, LinesInDETYRoundedDownToSwath);
5859 			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5860 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5861 			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5862 			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5863 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5864 #endif
5865 
5866 			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5867 				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5868 
5869 				FoundCriticalSurface = true;
5870 				*StutterPeriod = DETBufferingTimeY;
5871 				FrameTimeCriticalSurface = (
5872 						isInterlaceTiming ?
5873 								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5874 						* (double) HTotal[k] / PixelClock[k];
5875 				VActiveTimeCriticalSurface = (
5876 						isInterlaceTiming ?
5877 								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5878 						* (double) HTotal[k] / PixelClock[k];
5879 				BytePerPixelYCriticalSurface = BytePerPixelY[k];
5880 				SwathWidthYCriticalSurface = SwathWidthY[k];
5881 				SwathHeightYCriticalSurface = SwathHeightY[k];
5882 				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5883 				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5884 						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
5885 				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5886 				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5887 				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5888 				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5889 
5890 #ifdef __DML_VBA_DEBUG__
5891 				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5892 						__func__, k, FoundCriticalSurface);
5893 				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5894 						__func__, k, *StutterPeriod);
5895 				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5896 						__func__, k, MinTTUVBlankCriticalSurface);
5897 				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5898 						__func__, k, FrameTimeCriticalSurface);
5899 				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5900 						__func__, k, VActiveTimeCriticalSurface);
5901 				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5902 						__func__, k, BytePerPixelYCriticalSurface);
5903 				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5904 						__func__, k, SwathWidthYCriticalSurface);
5905 				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5906 						__func__, k, SwathHeightYCriticalSurface);
5907 				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5908 						__func__, k, BlockWidth256BytesYCriticalSurface);
5909 				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5910 						__func__, k, doublePlaneCriticalSurface);
5911 				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5912 						__func__, k, doublePipeCriticalSurface);
5913 				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5914 						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5915 #endif
5916 			}
5917 		}
5918 	}
5919 
5920 	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5921 			EffectiveCompressedBufferSize);
5922 #ifdef __DML_VBA_DEBUG__
5923 	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5924 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5925 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5926 			__func__, *StutterPeriod * TotalDataReadBandwidth);
5927 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5928 	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5929 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5930 	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5931 	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5932 	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5933 	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5934 #endif
5935 
5936 	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5937 			/ ReturnBW
5938 			+ (*StutterPeriod * TotalDataReadBandwidth
5939 					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5940 			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5941 #ifdef __DML_VBA_DEBUG__
5942 	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5943 			AverageDCCCompressionRate / ReturnBW);
5944 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5945 			__func__, (*StutterPeriod * TotalDataReadBandwidth));
5946 	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5947 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5948 	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5949 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5950 #endif
5951 	StutterBurstTime = dml_max(StutterBurstTime,
5952 			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5953 					* SwathWidthYCriticalSurface / ReturnBW);
5954 
5955 #ifdef __DML_VBA_DEBUG__
5956 	dml_print("DML::%s: Time to finish residue swath=%f\n",
5957 			__func__,
5958 			LinesToFinishSwathTransferStutterCriticalSurface *
5959 			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5960 #endif
5961 
5962 	TotalActiveWriteback = 0;
5963 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5964 		if (WritebackEnable[k])
5965 			TotalActiveWriteback = TotalActiveWriteback + 1;
5966 	}
5967 
5968 	if (TotalActiveWriteback == 0) {
5969 #ifdef __DML_VBA_DEBUG__
5970 		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5971 		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5972 		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5973 		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5974 #endif
5975 		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5976 				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5977 		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5978 				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5979 		*NumberOfStutterBurstsPerFrame = (
5980 				*StutterEfficiencyNotIncludingVBlank > 0 ?
5981 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5982 		*Z8NumberOfStutterBurstsPerFrame = (
5983 				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5984 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5985 	} else {
5986 		*StutterEfficiencyNotIncludingVBlank = 0.;
5987 		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
5988 		*NumberOfStutterBurstsPerFrame = 0;
5989 		*Z8NumberOfStutterBurstsPerFrame = 0;
5990 	}
5991 #ifdef __DML_VBA_DEBUG__
5992 	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5993 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5994 			__func__, *StutterEfficiencyNotIncludingVBlank);
5995 	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5996 			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
5997 	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5998 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5999 #endif
6000 
6001 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6002 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6003 			if (BlendingAndTiming[k] == k) {
6004 				if (TotalNumberOfActiveOTG == 0) {
6005 					doublePixelClock = PixelClock[k];
6006 					doubleHTotal = HTotal[k];
6007 					doubleVTotal = VTotal[k];
6008 				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
6009 						|| doubleVTotal != VTotal[k]) {
6010 					SameTiming = false;
6011 				}
6012 				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6013 			}
6014 		}
6015 	}
6016 
6017 	if (*StutterEfficiencyNotIncludingVBlank > 0) {
6018 		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6019 
6020 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6021 				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6022 			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6023 						+ StutterBurstTime * VActiveTimeCriticalSurface
6024 						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6025 		} else {
6026 			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6027 		}
6028 	} else {
6029 		*StutterEfficiency = 0;
6030 	}
6031 
6032 	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6033 		LastZ8StutterPeriod = VActiveTimeCriticalSurface
6034 				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6035 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6036 				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6037 			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6038 				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6039 		} else {
6040 			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6041 		}
6042 	} else {
6043 		*Z8StutterEfficiency = 0.;
6044 	}
6045 
6046 #ifdef __DML_VBA_DEBUG__
6047 	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6048 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6049 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6050 	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6051 	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6052 	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6053 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6054 			__func__, *StutterEfficiencyNotIncludingVBlank);
6055 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6056 #endif
6057 
6058 	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6059 			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6060 	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6061 	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6062 			- DETBufferSizeYCriticalSurface;
6063 
6064 	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6065 			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6066 			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6067 			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6068 
6069 #ifdef __DML_VBA_DEBUG__
6070 	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6071 	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6072 	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6073 	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6074 #endif
6075 } // CalculateStutterEfficiency
6076 
dml32_CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,bool nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)6077 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6078 		unsigned int    ConfigReturnBufferSizeInKByte,
6079 		unsigned int    ROBBufferSizeInKByte,
6080 		unsigned int MaxNumDPP,
6081 		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6082 		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6083 
6084 		/* Output */
6085 		unsigned int *MaxTotalDETInKByte,
6086 		unsigned int *nomDETInKByte,
6087 		unsigned int *MinCompressedBufferSizeInKByte)
6088 {
6089 	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6090 	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6091 
6092 	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6093 			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6094 	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6095 	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6096 
6097 #ifdef __DML_VBA_DEBUG__
6098 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6099 	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6100 	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6101 	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6102 	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6103 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6104 #endif
6105 
6106 	if (det_buff_size_override_en) {
6107 		*nomDETInKByte = det_buff_size_override_val;
6108 #ifdef __DML_VBA_DEBUG__
6109 		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6110 #endif
6111 	}
6112 } // CalculateMaxDETAndMinCompressedBufferSize
6113 
dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[])6114 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6115 		double ReturnBW,
6116 		bool NotUrgentLatencyHiding[],
6117 		double ReadBandwidthLuma[],
6118 		double ReadBandwidthChroma[],
6119 		double cursor_bw[],
6120 		double meta_row_bandwidth[],
6121 		double dpte_row_bandwidth[],
6122 		unsigned int NumberOfDPP[],
6123 		double UrgentBurstFactorLuma[],
6124 		double UrgentBurstFactorChroma[],
6125 		double UrgentBurstFactorCursor[])
6126 {
6127 	unsigned int k;
6128 	bool NotEnoughUrgentLatencyHiding = false;
6129 	bool CalculateVActiveBandwithSupport_val = false;
6130 	double VActiveBandwith = 0;
6131 
6132 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6133 		if (NotUrgentLatencyHiding[k]) {
6134 			NotEnoughUrgentLatencyHiding = true;
6135 		}
6136 	}
6137 
6138 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6139 		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6140 	}
6141 
6142 	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6143 
6144 #ifdef __DML_VBA_DEBUG__
6145 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6146 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6147 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6148 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6149 #endif
6150 	return CalculateVActiveBandwithSupport_val;
6151 }
6152 
dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double PrefetchBW[],double VRatio[],double MaxVRatioPre,double * MaxPrefetchBandwidth,double * FractionOfUrgentBandwidth,bool * PrefetchBandwidthSupport)6153 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6154 		double ReturnBW,
6155 		bool NotUrgentLatencyHiding[],
6156 		double ReadBandwidthLuma[],
6157 		double ReadBandwidthChroma[],
6158 		double PrefetchBandwidthLuma[],
6159 		double PrefetchBandwidthChroma[],
6160 		double cursor_bw[],
6161 		double meta_row_bandwidth[],
6162 		double dpte_row_bandwidth[],
6163 		double cursor_bw_pre[],
6164 		double prefetch_vmrow_bw[],
6165 		unsigned int NumberOfDPP[],
6166 		double UrgentBurstFactorLuma[],
6167 		double UrgentBurstFactorChroma[],
6168 		double UrgentBurstFactorCursor[],
6169 		double UrgentBurstFactorLumaPre[],
6170 		double UrgentBurstFactorChromaPre[],
6171 		double UrgentBurstFactorCursorPre[],
6172 		double PrefetchBW[],
6173 		double VRatio[],
6174 		double MaxVRatioPre,
6175 
6176 		/* output */
6177 		double  *MaxPrefetchBandwidth,
6178 		double  *FractionOfUrgentBandwidth,
6179 		bool *PrefetchBandwidthSupport)
6180 {
6181 	unsigned int k;
6182 	double ActiveBandwidthPerSurface;
6183 	bool NotEnoughUrgentLatencyHiding = false;
6184 	double TotalActiveBandwidth = 0;
6185 	double TotalPrefetchBandwidth = 0;
6186 
6187 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6188 		if (NotUrgentLatencyHiding[k]) {
6189 			NotEnoughUrgentLatencyHiding = true;
6190 		}
6191 	}
6192 
6193 	*MaxPrefetchBandwidth = 0;
6194 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6195 		ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6196 
6197 		TotalActiveBandwidth += ActiveBandwidthPerSurface;
6198 
6199 		TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6200 
6201 		*MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6202 				ActiveBandwidthPerSurface,
6203 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6204 	}
6205 
6206 	if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6207 		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6208 	else
6209 		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6210 
6211 	*FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6212 }
6213 
dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,double ReturnBW,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double cursor_bw_pre[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])6214 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6215 		double ReturnBW,
6216 		double ReadBandwidthLuma[],
6217 		double ReadBandwidthChroma[],
6218 		double PrefetchBandwidthLuma[],
6219 		double PrefetchBandwidthChroma[],
6220 		double cursor_bw[],
6221 		double cursor_bw_pre[],
6222 		unsigned int NumberOfDPP[],
6223 		double UrgentBurstFactorLuma[],
6224 		double UrgentBurstFactorChroma[],
6225 		double UrgentBurstFactorCursor[],
6226 		double UrgentBurstFactorLumaPre[],
6227 		double UrgentBurstFactorChromaPre[],
6228 		double UrgentBurstFactorCursorPre[])
6229 {
6230 	unsigned int k;
6231 	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6232 
6233 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6234 		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6235 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6236 	}
6237 
6238 	return CalculateBandwidthAvailableForImmediateFlip_val;
6239 }
6240 
dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,enum immediate_flip_requirement ImmediateFlipRequirement[],double final_flip_bw[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double * TotalBandwidth,double * FractionOfUrgentBandwidth,bool * ImmediateFlipBandwidthSupport)6241 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6242 		double ReturnBW,
6243 		enum immediate_flip_requirement ImmediateFlipRequirement[],
6244 		double final_flip_bw[],
6245 		double ReadBandwidthLuma[],
6246 		double ReadBandwidthChroma[],
6247 		double PrefetchBandwidthLuma[],
6248 		double PrefetchBandwidthChroma[],
6249 		double cursor_bw[],
6250 		double meta_row_bandwidth[],
6251 		double dpte_row_bandwidth[],
6252 		double cursor_bw_pre[],
6253 		double prefetch_vmrow_bw[],
6254 		unsigned int NumberOfDPP[],
6255 		double UrgentBurstFactorLuma[],
6256 		double UrgentBurstFactorChroma[],
6257 		double UrgentBurstFactorCursor[],
6258 		double UrgentBurstFactorLumaPre[],
6259 		double UrgentBurstFactorChromaPre[],
6260 		double UrgentBurstFactorCursorPre[],
6261 
6262 		/* output */
6263 		double  *TotalBandwidth,
6264 		double  *FractionOfUrgentBandwidth,
6265 		bool *ImmediateFlipBandwidthSupport)
6266 {
6267 	unsigned int k;
6268 	*TotalBandwidth = 0;
6269 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6270 		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6271 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6272 					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6273 					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6274 		} else {
6275 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6276 					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6277 					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6278 		}
6279 	}
6280 	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6281 	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6282 }
6283 
dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,double ReturnBW,double UrgentLatency,unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],double BytePerPixelInDETY[],double BytePerPixelInDETC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int NumOfDPP[],unsigned int HTotal[],double PixelClock[],double VRatioY[],double VRatioC[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],enum unbounded_requesting_policy UseUnboundedRequesting)6284 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6285 		double ReturnBW,
6286 		double UrgentLatency,
6287 		unsigned int SwathHeightY[],
6288 		unsigned int SwathHeightC[],
6289 		unsigned int SwathWidthY[],
6290 		unsigned int SwathWidthC[],
6291 		double  BytePerPixelInDETY[],
6292 		double  BytePerPixelInDETC[],
6293 		unsigned int    DETBufferSizeY[],
6294 		unsigned int    DETBufferSizeC[],
6295 		unsigned int	NumOfDPP[],
6296 		unsigned int	HTotal[],
6297 		double	PixelClock[],
6298 		double	VRatioY[],
6299 		double	VRatioC[],
6300 		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6301 		enum unbounded_requesting_policy UseUnboundedRequesting)
6302 {
6303 	int k;
6304 	double SwathSizeAllSurfaces = 0;
6305 	double SwathSizeAllSurfacesInFetchTimeUs;
6306 	double DETSwathLatencyHidingUs;
6307 	double DETSwathLatencyHidingYUs;
6308 	double DETSwathLatencyHidingCUs;
6309 	double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6310 	double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6311 	bool NotEnoughDETSwathFillLatencyHiding = false;
6312 
6313 	if (UseUnboundedRequesting == dm_unbounded_requesting)
6314 		return false;
6315 
6316 	/* calculate sum of single swath size for all pipes in bytes */
6317 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6318 		SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6319 
6320 		if (SwathHeightC[k] != 0)
6321 			SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6322 		else
6323 			SwathSizePerSurfaceC[k] = 0;
6324 
6325 		SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6326 	}
6327 
6328 	SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6329 
6330 	/* ensure all DET - 1 swath can hide a fetch for all surfaces */
6331 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6332 		double LineTime = HTotal[k] / PixelClock[k];
6333 
6334 		/* only care if surface is not phantom */
6335 		if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6336 			DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6337 
6338 			if (SwathHeightC[k] != 0) {
6339 				DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6340 
6341 				DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6342 			} else {
6343 				DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6344 			}
6345 
6346 			/* DET must be able to hide time to fetch 1 swath for each surface */
6347 			if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6348 				NotEnoughDETSwathFillLatencyHiding = true;
6349 				break;
6350 			}
6351 		}
6352 	}
6353 
6354 	return NotEnoughDETSwathFillLatencyHiding;
6355 }
6356