1 /*
2  * Copyright 2022 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: AMD
23  *
24  */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29 
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31 
dml32_dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)32 unsigned int dml32_dscceComputeDelay(
33 		unsigned int bpc,
34 		double BPP,
35 		unsigned int sliceWidth,
36 		unsigned int numSlices,
37 		enum output_format_class pixelFormat,
38 		enum output_encoder_class Output)
39 {
40 	// valid bpc         = source bits per component in the set of {8, 10, 12}
41 	// valid bpp         = increments of 1/16 of a bit
42 	//                    min = 6/7/8 in N420/N422/444, respectively
43 	//                    max = such that compression is 1:1
44 	//valid sliceWidth  = number of pixels per slice line,
45 	//	must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 	//valid numSlices   = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 	//valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48 
49 	// fixed value
50 	unsigned int rcModelSize = 8192;
51 
52 	// N422/N420 operate at 2 pixels per clock
53 	unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 	Delay, pixels;
55 
56 	if (pixelFormat == dm_420)
57 		pixelsPerClock = 2;
58 	else if (pixelFormat == dm_n422)
59 		pixelsPerClock = 2;
60 	// #all other modes operate at 1 pixel per clock
61 	else
62 		pixelsPerClock = 1;
63 
64 	//initial transmit delay as per PPS
65 	initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66 
67 	//compute ssm delay
68 	if (bpc == 8)
69 		D = 81;
70 	else if (bpc == 10)
71 		D = 89;
72 	else
73 		D = 113;
74 
75 	//divide by pixel per cycle to compute slice width as seen by DSC
76 	w = sliceWidth / pixelsPerClock;
77 
78 	//422 mode has an additional cycle of delay
79 	if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 		s = 0;
81 	else
82 		s = 1;
83 
84 	//main calculation for the dscce
85 	ix = initalXmitDelay + 45;
86 	wx = (w + 2) / 3;
87 	p = 3 * wx - w;
88 	l0 = ix / w;
89 	a = ix + p * l0;
90 	ax = (a + 2) / 3 + D + 6 + 1;
91 	L = (ax + wx - 1) / wx;
92 	if ((ix % w) == 0 && p != 0)
93 		lstall = 1;
94 	else
95 		lstall = 0;
96 	Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97 
98 	//dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 	pixels = Delay * 3 * pixelsPerClock;
100 
101 #ifdef __DML_VBA_DEBUG__
102 	dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 	dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 	dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 	dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 	dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 	dml_print("DML::%s: Output: %d\n", __func__, Output);
108 	dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110 
111 	return pixels;
112 }
113 
dml32_dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 	unsigned int Delay = 0;
117 
118 	if (pixelFormat == dm_420) {
119 		//   sfr
120 		Delay = Delay + 2;
121 		//   dsccif
122 		Delay = Delay + 0;
123 		//   dscc - input deserializer
124 		Delay = Delay + 3;
125 		//   dscc gets pixels every other cycle
126 		Delay = Delay + 2;
127 		//   dscc - input cdc fifo
128 		Delay = Delay + 12;
129 		//   dscc gets pixels every other cycle
130 		Delay = Delay + 13;
131 		//   dscc - cdc uncertainty
132 		Delay = Delay + 2;
133 		//   dscc - output cdc fifo
134 		Delay = Delay + 7;
135 		//   dscc gets pixels every other cycle
136 		Delay = Delay + 3;
137 		//   dscc - cdc uncertainty
138 		Delay = Delay + 2;
139 		//   dscc - output serializer
140 		Delay = Delay + 1;
141 		//   sft
142 		Delay = Delay + 1;
143 	} else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 		//   sfr
145 		Delay = Delay + 2;
146 		//   dsccif
147 		Delay = Delay + 1;
148 		//   dscc - input deserializer
149 		Delay = Delay + 5;
150 		//  dscc - input cdc fifo
151 		Delay = Delay + 25;
152 		//   dscc - cdc uncertainty
153 		Delay = Delay + 2;
154 		//   dscc - output cdc fifo
155 		Delay = Delay + 10;
156 		//   dscc - cdc uncertainty
157 		Delay = Delay + 2;
158 		//   dscc - output serializer
159 		Delay = Delay + 1;
160 		//   sft
161 		Delay = Delay + 1;
162 	} else {
163 		//   sfr
164 		Delay = Delay + 2;
165 		//   dsccif
166 		Delay = Delay + 0;
167 		//   dscc - input deserializer
168 		Delay = Delay + 3;
169 		//   dscc - input cdc fifo
170 		Delay = Delay + 12;
171 		//   dscc - cdc uncertainty
172 		Delay = Delay + 2;
173 		//   dscc - output cdc fifo
174 		Delay = Delay + 7;
175 		//   dscc - output serializer
176 		Delay = Delay + 1;
177 		//   dscc - cdc uncertainty
178 		Delay = Delay + 2;
179 		//   sft
180 		Delay = Delay + 1;
181 	}
182 
183 	return Delay;
184 }
185 
186 
IsVertical(enum dm_rotation_angle Scan)187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 	bool is_vert = false;
190 
191 	if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 		is_vert = true;
193 	else
194 		is_vert = false;
195 	return is_vert;
196 }
197 
dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 		double HRatio,
200 		double HRatioChroma,
201 		double VRatio,
202 		double VRatioChroma,
203 		double MaxDCHUBToPSCLThroughput,
204 		double MaxPSCLToLBThroughput,
205 		double PixelClock,
206 		enum source_format_class SourcePixelFormat,
207 		unsigned int HTaps,
208 		unsigned int HTapsChroma,
209 		unsigned int VTaps,
210 		unsigned int VTapsChroma,
211 
212 		/* output */
213 		double *PSCL_THROUGHPUT,
214 		double *PSCL_THROUGHPUT_CHROMA,
215 		double *DPPCLKUsingSingleDPP)
216 {
217 	double DPPCLKUsingSingleDPPLuma;
218 	double DPPCLKUsingSingleDPPChroma;
219 
220 	if (HRatio > 1) {
221 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 				dml_ceil((double) HTaps / 6.0, 1.0));
223 	} else {
224 		*PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 	}
226 
227 	DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 			*PSCL_THROUGHPUT, 1);
229 
230 	if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 		DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232 
233 	if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 			SourcePixelFormat != dm_rgbe_alpha)) {
235 		*PSCL_THROUGHPUT_CHROMA = 0;
236 		*DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 	} else {
238 		if (HRatioChroma > 1) {
239 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 					HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 		} else {
242 			*PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 		}
244 		DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 				HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 		if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 			DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 		*DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 	}
250 }
251 
dml32_CalculateBytePerPixelAndBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC)252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 		enum source_format_class SourcePixelFormat,
254 		enum dm_swizzle_mode SurfaceTiling,
255 
256 		/* Output */
257 		unsigned int *BytePerPixelY,
258 		unsigned int *BytePerPixelC,
259 		double  *BytePerPixelDETY,
260 		double  *BytePerPixelDETC,
261 		unsigned int *BlockHeight256BytesY,
262 		unsigned int *BlockHeight256BytesC,
263 		unsigned int *BlockWidth256BytesY,
264 		unsigned int *BlockWidth256BytesC,
265 		unsigned int *MacroTileHeightY,
266 		unsigned int *MacroTileHeightC,
267 		unsigned int *MacroTileWidthY,
268 		unsigned int *MacroTileWidthC)
269 {
270 	if (SourcePixelFormat == dm_444_64) {
271 		*BytePerPixelDETY = 8;
272 		*BytePerPixelDETC = 0;
273 		*BytePerPixelY = 8;
274 		*BytePerPixelC = 0;
275 	} else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 		*BytePerPixelDETY = 4;
277 		*BytePerPixelDETC = 0;
278 		*BytePerPixelY = 4;
279 		*BytePerPixelC = 0;
280 	} else if (SourcePixelFormat == dm_444_16) {
281 		*BytePerPixelDETY = 2;
282 		*BytePerPixelDETC = 0;
283 		*BytePerPixelY = 2;
284 		*BytePerPixelC = 0;
285 	} else if (SourcePixelFormat == dm_444_8) {
286 		*BytePerPixelDETY = 1;
287 		*BytePerPixelDETC = 0;
288 		*BytePerPixelY = 1;
289 		*BytePerPixelC = 0;
290 	} else if (SourcePixelFormat == dm_rgbe_alpha) {
291 		*BytePerPixelDETY = 4;
292 		*BytePerPixelDETC = 1;
293 		*BytePerPixelY = 4;
294 		*BytePerPixelC = 1;
295 	} else if (SourcePixelFormat == dm_420_8) {
296 		*BytePerPixelDETY = 1;
297 		*BytePerPixelDETC = 2;
298 		*BytePerPixelY = 1;
299 		*BytePerPixelC = 2;
300 	} else if (SourcePixelFormat == dm_420_12) {
301 		*BytePerPixelDETY = 2;
302 		*BytePerPixelDETC = 4;
303 		*BytePerPixelY = 2;
304 		*BytePerPixelC = 4;
305 	} else {
306 		*BytePerPixelDETY = 4.0 / 3;
307 		*BytePerPixelDETC = 8.0 / 3;
308 		*BytePerPixelY = 2;
309 		*BytePerPixelC = 4;
310 	}
311 #ifdef __DML_VBA_DEBUG__
312 	dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 	dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 	dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 	dml_print("DML::%s: BytePerPixelY    = %d\n", __func__, *BytePerPixelY);
316 	dml_print("DML::%s: BytePerPixelC    = %d\n", __func__, *BytePerPixelC);
317 #endif
318 	if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 			|| SourcePixelFormat == dm_444_16
320 			|| SourcePixelFormat == dm_444_8
321 			|| SourcePixelFormat == dm_mono_16
322 			|| SourcePixelFormat == dm_mono_8
323 			|| SourcePixelFormat == dm_rgbe)) {
324 		if (SurfaceTiling == dm_sw_linear)
325 			*BlockHeight256BytesY = 1;
326 		else if (SourcePixelFormat == dm_444_64)
327 			*BlockHeight256BytesY = 4;
328 		else if (SourcePixelFormat == dm_444_8)
329 			*BlockHeight256BytesY = 16;
330 		else
331 			*BlockHeight256BytesY = 8;
332 
333 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 		*BlockHeight256BytesC = 0;
335 		*BlockWidth256BytesC = 0;
336 	} else {
337 		if (SurfaceTiling == dm_sw_linear) {
338 			*BlockHeight256BytesY = 1;
339 			*BlockHeight256BytesC = 1;
340 		} else if (SourcePixelFormat == dm_rgbe_alpha) {
341 			*BlockHeight256BytesY = 8;
342 			*BlockHeight256BytesC = 16;
343 		} else if (SourcePixelFormat == dm_420_8) {
344 			*BlockHeight256BytesY = 16;
345 			*BlockHeight256BytesC = 8;
346 		} else {
347 			*BlockHeight256BytesY = 8;
348 			*BlockHeight256BytesC = 8;
349 		}
350 		*BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 		*BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 	}
353 #ifdef __DML_VBA_DEBUG__
354 	dml_print("DML::%s: BlockWidth256BytesY  = %d\n", __func__, *BlockWidth256BytesY);
355 	dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 	dml_print("DML::%s: BlockWidth256BytesC  = %d\n", __func__, *BlockWidth256BytesC);
357 	dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359 
360 	if (SurfaceTiling == dm_sw_linear) {
361 		*MacroTileHeightY = *BlockHeight256BytesY;
362 		*MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 		*MacroTileHeightC = *BlockHeight256BytesC;
364 		if (*MacroTileHeightC == 0)
365 			*MacroTileWidthC = 0;
366 		else
367 			*MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 	} else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 			SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 		*MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 		*MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 		*MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 		if (*MacroTileHeightC == 0)
374 			*MacroTileWidthC = 0;
375 		else
376 			*MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 	} else {
378 		*MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 		*MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 		*MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 		if (*MacroTileHeightC == 0)
382 			*MacroTileWidthC = 0;
383 		else
384 			*MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 	}
386 
387 #ifdef __DML_VBA_DEBUG__
388 	dml_print("DML::%s: MacroTileWidthY  = %d\n", __func__, *MacroTileWidthY);
389 	dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 	dml_print("DML::%s: MacroTileWidthC  = %d\n", __func__, *MacroTileWidthC);
391 	dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394 
dml32_CalculateSwathAndDETConfiguration(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int ConfigReturnBufferSizeInKByte,unsigned int MaxTotalDETInKByte,unsigned int MinCompressedBufferSizeInKByte,double ForceSingleDPP,unsigned int NumberOfActiveSurfaces,unsigned int nomDETInKByte,enum unbounded_requesting_policy UseUnboundedRequestingFinal,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,unsigned int PixelChunkSizeKBytes,unsigned int ROBSizeKBytes,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum output_encoder_class Output[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],enum odm_combine_mode ODMMode[],unsigned int BlendingAndTiming[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],unsigned int HActive[],double HRatio[],double HRatioChroma[],unsigned int DPPPerSurface[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int DETBufferSizeInKByte[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool * UnboundedRequestEnabled,unsigned int * CompressedBufferSizeInkByte,unsigned int * CompBufReservedSpaceKBytes,bool * CompBufReservedSpaceNeedAdjustment,bool ViewportSizeSupportPerSurface[],bool * ViewportSizeSupport)395 void dml32_CalculateSwathAndDETConfiguration(
396 		unsigned int DETSizeOverride[],
397 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 		unsigned int ConfigReturnBufferSizeInKByte,
399 		unsigned int MaxTotalDETInKByte,
400 		unsigned int MinCompressedBufferSizeInKByte,
401 		double ForceSingleDPP,
402 		unsigned int NumberOfActiveSurfaces,
403 		unsigned int nomDETInKByte,
404 		enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 		bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 		unsigned int PixelChunkSizeKBytes,
407 		unsigned int ROBSizeKBytes,
408 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 		enum output_encoder_class Output[],
410 		double ReadBandwidthLuma[],
411 		double ReadBandwidthChroma[],
412 		double MaximumSwathWidthLuma[],
413 		double MaximumSwathWidthChroma[],
414 		enum dm_rotation_angle SourceRotation[],
415 		bool ViewportStationary[],
416 		enum source_format_class SourcePixelFormat[],
417 		enum dm_swizzle_mode SurfaceTiling[],
418 		unsigned int ViewportWidth[],
419 		unsigned int ViewportHeight[],
420 		unsigned int ViewportXStart[],
421 		unsigned int ViewportYStart[],
422 		unsigned int ViewportXStartC[],
423 		unsigned int ViewportYStartC[],
424 		unsigned int SurfaceWidthY[],
425 		unsigned int SurfaceWidthC[],
426 		unsigned int SurfaceHeightY[],
427 		unsigned int SurfaceHeightC[],
428 		unsigned int Read256BytesBlockHeightY[],
429 		unsigned int Read256BytesBlockHeightC[],
430 		unsigned int Read256BytesBlockWidthY[],
431 		unsigned int Read256BytesBlockWidthC[],
432 		enum odm_combine_mode ODMMode[],
433 		unsigned int BlendingAndTiming[],
434 		unsigned int BytePerPixY[],
435 		unsigned int BytePerPixC[],
436 		double BytePerPixDETY[],
437 		double BytePerPixDETC[],
438 		unsigned int HActive[],
439 		double HRatio[],
440 		double HRatioChroma[],
441 		unsigned int DPPPerSurface[],
442 
443 		/* Output */
444 		unsigned int swath_width_luma_ub[],
445 		unsigned int swath_width_chroma_ub[],
446 		double SwathWidth[],
447 		double SwathWidthChroma[],
448 		unsigned int SwathHeightY[],
449 		unsigned int SwathHeightC[],
450 		unsigned int DETBufferSizeInKByte[],
451 		unsigned int DETBufferSizeY[],
452 		unsigned int DETBufferSizeC[],
453 		bool *UnboundedRequestEnabled,
454 		unsigned int *CompressedBufferSizeInkByte,
455 		unsigned int *CompBufReservedSpaceKBytes,
456 		bool *CompBufReservedSpaceNeedAdjustment,
457 		bool ViewportSizeSupportPerSurface[],
458 		bool *ViewportSizeSupport)
459 {
460 	unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 	unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 	unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 	unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 	unsigned int RoundedUpSwathSizeBytesY;
465 	unsigned int RoundedUpSwathSizeBytesC;
466 	double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 	double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 	unsigned int k;
469 	unsigned int TotalActiveDPP = 0;
470 	bool NoChromaSurfaces = true;
471 	unsigned int DETBufferSizeInKByteForSwathCalculation;
472 
473 #ifdef __DML_VBA_DEBUG__
474 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 	dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 	dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 	dml32_CalculateSwathWidth(ForceSingleDPP,
479 			NumberOfActiveSurfaces,
480 			SourcePixelFormat,
481 			SourceRotation,
482 			ViewportStationary,
483 			ViewportWidth,
484 			ViewportHeight,
485 			ViewportXStart,
486 			ViewportYStart,
487 			ViewportXStartC,
488 			ViewportYStartC,
489 			SurfaceWidthY,
490 			SurfaceWidthC,
491 			SurfaceHeightY,
492 			SurfaceHeightC,
493 			ODMMode,
494 			BytePerPixY,
495 			BytePerPixC,
496 			Read256BytesBlockHeightY,
497 			Read256BytesBlockHeightC,
498 			Read256BytesBlockWidthY,
499 			Read256BytesBlockWidthC,
500 			BlendingAndTiming,
501 			HActive,
502 			HRatio,
503 			DPPPerSurface,
504 
505 			/* Output */
506 			SwathWidthdoubleDPP,
507 			SwathWidthdoubleDPPChroma,
508 			SwathWidth,
509 			SwathWidthChroma,
510 			MaximumSwathHeightY,
511 			MaximumSwathHeightC,
512 			swath_width_luma_ub,
513 			swath_width_chroma_ub);
514 
515 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 		RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 		RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 		dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 		dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 		dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 		dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 				RoundedUpMaxSwathSizeBytesY[k]);
525 		dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 		dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 		dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 				RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531 
532 		if (SourcePixelFormat[k] == dm_420_10) {
533 			RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 			RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 		}
536 	}
537 
538 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 		TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 				SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 			NoChromaSurfaces = false;
543 		}
544 	}
545 
546 	// By default, just set the reserved space to 2 pixel chunks size
547 	*CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548 
549 	// if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 	// - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 	// - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 	*CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553 
554 	if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 		*CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 	}
557 
558 	#ifdef __DML_VBA_DEBUG__
559 		dml_print("DML::%s: CompBufReservedSpaceKBytes          = %d\n",  __func__, *CompBufReservedSpaceKBytes);
560 		dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, *CompBufReservedSpaceNeedAdjustment);
561 	#endif
562 
563 	*UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564 
565 	dml32_CalculateDETBufferSize(DETSizeOverride,
566 			UseMALLForPStateChange,
567 			ForceSingleDPP,
568 			NumberOfActiveSurfaces,
569 			*UnboundedRequestEnabled,
570 			nomDETInKByte,
571 			MaxTotalDETInKByte,
572 			ConfigReturnBufferSizeInKByte,
573 			MinCompressedBufferSizeInKByte,
574 			CompressedBufferSegmentSizeInkByteFinal,
575 			SourcePixelFormat,
576 			ReadBandwidthLuma,
577 			ReadBandwidthChroma,
578 			RoundedUpMaxSwathSizeBytesY,
579 			RoundedUpMaxSwathSizeBytesC,
580 			DPPPerSurface,
581 
582 			/* Output */
583 			DETBufferSizeInKByte,    // per hubp pipe
584 			CompressedBufferSizeInkByte);
585 
586 #ifdef __DML_VBA_DEBUG__
587 	dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 	dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594 
595 	*ViewportSizeSupport = true;
596 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597 
598 		DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 				dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 		dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 				DETBufferSizeInKByteForSwathCalculation);
603 #endif
604 
605 		if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 			SwathHeightY[k] = MaximumSwathHeightY[k];
608 			SwathHeightC[k] = MaximumSwathHeightC[k];
609 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 		} else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 				RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 			SwathHeightC[k] = MaximumSwathHeightC[k];
616 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 		} else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 				RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 			SwathHeightY[k] = MaximumSwathHeightY[k];
622 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 		} else {
626 			SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 			SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 			RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 			RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 		}
631 
632 		if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 				DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 				|| SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 						SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 			*ViewportSizeSupport = false;
637 			ViewportSizeSupportPerSurface[k] = false;
638 		} else {
639 			ViewportSizeSupportPerSurface[k] = true;
640 		}
641 
642 		if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 			dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 			DETBufferSizeC[k] = 0;
648 		} else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 			dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 			DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 		} else {
655 #ifdef __DML_VBA_DEBUG__
656 			dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 			DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 			DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 		}
661 
662 #ifdef __DML_VBA_DEBUG__
663 		dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 		dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 				k, RoundedUpMaxSwathSizeBytesY[k]);
667 		dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 				k, RoundedUpMaxSwathSizeBytesC[k]);
669 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 		dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 		dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 		dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 		dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 		dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 				ViewportSizeSupportPerSurface[k]);
676 #endif
677 
678 	}
679 } // CalculateSwathAndDETConfiguration
680 
dml32_CalculateSwathWidth(bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum source_format_class SourcePixelFormat[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int BlendingAndTiming[],unsigned int HActive[],double HRatio[],unsigned int DPPPerSurface[],double SwathWidthdoubleDPPY[],double SwathWidthdoubleDPPC[],double SwathWidthY[],double SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])681 void dml32_CalculateSwathWidth(
682 		bool				ForceSingleDPP,
683 		unsigned int			NumberOfActiveSurfaces,
684 		enum source_format_class	SourcePixelFormat[],
685 		enum dm_rotation_angle		SourceRotation[],
686 		bool				ViewportStationary[],
687 		unsigned int			ViewportWidth[],
688 		unsigned int			ViewportHeight[],
689 		unsigned int			ViewportXStart[],
690 		unsigned int			ViewportYStart[],
691 		unsigned int			ViewportXStartC[],
692 		unsigned int			ViewportYStartC[],
693 		unsigned int			SurfaceWidthY[],
694 		unsigned int			SurfaceWidthC[],
695 		unsigned int			SurfaceHeightY[],
696 		unsigned int			SurfaceHeightC[],
697 		enum odm_combine_mode		ODMMode[],
698 		unsigned int			BytePerPixY[],
699 		unsigned int			BytePerPixC[],
700 		unsigned int			Read256BytesBlockHeightY[],
701 		unsigned int			Read256BytesBlockHeightC[],
702 		unsigned int			Read256BytesBlockWidthY[],
703 		unsigned int			Read256BytesBlockWidthC[],
704 		unsigned int			BlendingAndTiming[],
705 		unsigned int			HActive[],
706 		double				HRatio[],
707 		unsigned int			DPPPerSurface[],
708 
709 		/* Output */
710 		double				SwathWidthdoubleDPPY[],
711 		double				SwathWidthdoubleDPPC[],
712 		double				SwathWidthY[], // per-pipe
713 		double				SwathWidthC[], // per-pipe
714 		unsigned int			MaximumSwathHeightY[],
715 		unsigned int			MaximumSwathHeightC[],
716 		unsigned int			swath_width_luma_ub[], // per-pipe
717 		unsigned int			swath_width_chroma_ub[]) // per-pipe
718 {
719 	unsigned int k, j;
720 	enum odm_combine_mode MainSurfaceODMMode;
721 
722 	unsigned int surface_width_ub_l;
723 	unsigned int surface_height_ub_l;
724 	unsigned int surface_width_ub_c = 0;
725 	unsigned int surface_height_ub_c = 0;
726 
727 #ifdef __DML_VBA_DEBUG__
728 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731 
732 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 		if (!IsVertical(SourceRotation[k]))
734 			SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 		else
736 			SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737 
738 #ifdef __DML_VBA_DEBUG__
739 		dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 		dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742 
743 		MainSurfaceODMMode = ODMMode[k];
744 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 			if (BlendingAndTiming[k] == j)
746 				MainSurfaceODMMode = ODMMode[j];
747 		}
748 
749 		if (ForceSingleDPP) {
750 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 		} else {
752 			if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 						dml_round(HActive[k] / 4.0 * HRatio[k]));
755 			} else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 				SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 						dml_round(HActive[k] / 2.0 * HRatio[k]));
758 			} else if (DPPPerSurface[k] == 2) {
759 				SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 			} else {
761 				SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 			}
763 		}
764 
765 #ifdef __DML_VBA_DEBUG__
766 		dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 		dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 		dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 		dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 		dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772 
773 		if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 				SourcePixelFormat[k] == dm_420_12) {
775 			SwathWidthC[k] = SwathWidthY[k] / 2;
776 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 		} else {
778 			SwathWidthC[k] = SwathWidthY[k];
779 			SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 		}
781 
782 		if (ForceSingleDPP == true) {
783 			SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 			SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 		}
786 
787 		surface_width_ub_l  = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 		surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789 
790 		if (!IsVertical(SourceRotation[k])) {
791 			MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 			MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 						dml_floor(ViewportXStart[k] +
796 								SwathWidthY[k] +
797 								Read256BytesBlockWidthY[k] - 1,
798 								Read256BytesBlockWidthY[k]) -
799 								dml_floor(ViewportXStart[k],
800 								Read256BytesBlockWidthY[k]));
801 			} else {
802 				swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 						dml_ceil(SwathWidthY[k] - 1,
804 								Read256BytesBlockWidthY[k]) +
805 								Read256BytesBlockWidthY[k]);
806 			}
807 			if (BytePerPixC[k] > 0) {
808 				surface_width_ub_c  = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 							dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 									Read256BytesBlockWidthC[k] - 1,
813 									Read256BytesBlockWidthC[k]) -
814 									dml_floor(ViewportXStartC[k],
815 									Read256BytesBlockWidthC[k]));
816 				} else {
817 					swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 							dml_ceil(SwathWidthC[k] - 1,
819 								Read256BytesBlockWidthC[k]) +
820 								Read256BytesBlockWidthC[k]);
821 				}
822 			} else {
823 				swath_width_chroma_ub[k] = 0;
824 			}
825 		} else {
826 			MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 			MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828 
829 			if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 						SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 						Read256BytesBlockHeightY[k]) -
833 						dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 			} else {
835 				swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 						Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 			}
838 			if (BytePerPixC[k] > 0) {
839 				surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 				if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 							dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 									Read256BytesBlockHeightC[k] - 1,
844 									Read256BytesBlockHeightC[k]) -
845 									dml_floor(ViewportYStartC[k],
846 											Read256BytesBlockHeightC[k]));
847 				} else {
848 					swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 							dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 							Read256BytesBlockHeightC[k]);
851 				}
852 			} else {
853 				swath_width_chroma_ub[k] = 0;
854 			}
855 		}
856 
857 #ifdef __DML_VBA_DEBUG__
858 		dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 		dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 		dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 		dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 		dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 		dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 		dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 		dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 		dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 		dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 		dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 		dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 		dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 		dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873 
874 	}
875 } // CalculateSwathWidth
876 
dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,unsigned int TotalNumberOfActiveDPP,bool NoChroma,enum output_encoder_class Output,enum dm_swizzle_mode SurfaceTiling,bool CompBufReservedSpaceNeedAdjustment,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 			unsigned int TotalNumberOfActiveDPP,
879 			bool NoChroma,
880 			enum output_encoder_class Output,
881 			enum dm_swizzle_mode SurfaceTiling,
882 			bool CompBufReservedSpaceNeedAdjustment,
883 			bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 	bool ret_val = false;
886 
887 	ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 			TotalNumberOfActiveDPP == 1 && NoChroma);
889 	if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 		ret_val = false;
891 
892 	if (SurfaceTiling == dm_sw_linear)
893 		ret_val = false;
894 
895 	if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 		ret_val = false;
897 
898 #ifdef __DML_VBA_DEBUG__
899 	dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, CompBufReservedSpaceNeedAdjustment);
900 	dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment  = %d\n",  __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 	dml_print("DML::%s: ret_val = %d\n",  __func__, ret_val);
902 #endif
903 
904 	return (ret_val);
905 }
906 
dml32_CalculateDETBufferSize(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum source_format_class SourcePixelFormat[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int RoundedUpMaxSwathSizeBytesY[],unsigned int RoundedUpMaxSwathSizeBytesC[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)907 void dml32_CalculateDETBufferSize(
908 		unsigned int DETSizeOverride[],
909 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 		bool ForceSingleDPP,
911 		unsigned int NumberOfActiveSurfaces,
912 		bool UnboundedRequestEnabled,
913 		unsigned int nomDETInKByte,
914 		unsigned int MaxTotalDETInKByte,
915 		unsigned int ConfigReturnBufferSizeInKByte,
916 		unsigned int MinCompressedBufferSizeInKByte,
917 		unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 		enum source_format_class SourcePixelFormat[],
919 		double ReadBandwidthLuma[],
920 		double ReadBandwidthChroma[],
921 		unsigned int RoundedUpMaxSwathSizeBytesY[],
922 		unsigned int RoundedUpMaxSwathSizeBytesC[],
923 		unsigned int DPPPerSurface[],
924 		/* Output */
925 		unsigned int DETBufferSizeInKByte[],
926 		unsigned int *CompressedBufferSizeInkByte)
927 {
928 	unsigned int DETBufferSizePoolInKByte;
929 	unsigned int NextDETBufferPieceInKByte;
930 	bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 	bool NextPotentialSurfaceToAssignDETPieceFound;
932 	unsigned int NextSurfaceToAssignDETPiece;
933 	double TotalBandwidth;
934 	double BandwidthOfSurfacesNotAssignedDETPiece;
935 	unsigned int max_minDET;
936 	unsigned int minDET;
937 	unsigned int minDET_pipe;
938 	unsigned int j, k;
939 
940 #ifdef __DML_VBA_DEBUG__
941 	dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 	dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 	dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 			CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951 
952 	// Note: Will use default det size if that fits 2 swaths
953 	if (UnboundedRequestEnabled) {
954 		if (DETSizeOverride[0] > 0) {
955 			DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 		} else {
957 			DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 					((double) RoundedUpMaxSwathSizeBytesY[0] +
959 							(double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 		}
961 		*CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 	} else {
963 		DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 			DETBufferSizeInKByte[k] = nomDETInKByte;
966 			if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 					SourcePixelFormat[k] == dm_420_12) {
968 				max_minDET = nomDETInKByte - 64;
969 			} else {
970 				max_minDET = nomDETInKByte;
971 			}
972 			minDET = 128;
973 			minDET_pipe = 0;
974 
975 			// add DET resource until can hold 2 full swaths
976 			while (minDET <= max_minDET && minDET_pipe == 0) {
977 				if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 						(double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 					minDET_pipe = minDET;
980 				minDET = minDET + 64;
981 			}
982 
983 #ifdef __DML_VBA_DEBUG__
984 			dml_print("DML::%s: k=%0d minDET        = %d\n", __func__, k, minDET);
985 			dml_print("DML::%s: k=%0d max_minDET    = %d\n", __func__, k, max_minDET);
986 			dml_print("DML::%s: k=%0d minDET_pipe   = %d\n", __func__, k, minDET_pipe);
987 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 					RoundedUpMaxSwathSizeBytesY[k]);
989 			dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 					RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992 
993 			if (minDET_pipe == 0) {
994 				minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 						(double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 				dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 						__func__, k, minDET_pipe);
999 #endif
1000 			}
1001 
1002 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 				DETBufferSizeInKByte[k] = 0;
1004 			} else if (DETSizeOverride[k] > 0) {
1005 				DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 			} else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 				DETBufferSizeInKByte[k] = minDET_pipe;
1010 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 						(ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 			}
1013 
1014 #ifdef __DML_VBA_DEBUG__
1015 			dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 			dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 			dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 			dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 		}
1021 
1022 		TotalBandwidth = 0;
1023 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 			if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 				TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 		}
1027 #ifdef __DML_VBA_DEBUG__
1028 		dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 		for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 			dml_print("DML::%s: k=%d DETBufferSizeInKByte   = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 		dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 		dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 		BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036 
1037 			if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 			} else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 					(double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 					((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 				DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 						ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 			} else {
1046 				DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 			}
1048 #ifdef __DML_VBA_DEBUG__
1049 			dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 					DETPieceAssignedToThisSurfaceAlready[k]);
1051 			dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 					BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 		}
1055 
1056 		for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 			NextPotentialSurfaceToAssignDETPieceFound = false;
1058 			NextSurfaceToAssignDETPiece = 0;
1059 
1060 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 						ReadBandwidthLuma[k]);
1064 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 						ReadBandwidthChroma[k]);
1066 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 				dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 				dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 						NextSurfaceToAssignDETPiece);
1072 #endif
1073 				if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 						(!NextPotentialSurfaceToAssignDETPieceFound ||
1075 						ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 						ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 					NextSurfaceToAssignDETPiece = k;
1079 					NextPotentialSurfaceToAssignDETPieceFound = true;
1080 				}
1081 #ifdef __DML_VBA_DEBUG__
1082 				dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 						__func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 				dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 						__func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 			}
1088 
1089 			if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 				// Note: To show the banker's rounding behavior in VBA and also the fact
1091 				// that the DET buffer size varies due to precision issue
1092 				//
1093 				//double tmp1 =  ((double) DETBufferSizePoolInKByte *
1094 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 				// BandwidthOfSurfacesNotAssignedDETPiece /
1097 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 				//double tmp2 =  dml_round((double) DETBufferSizePoolInKByte *
1099 				// (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 				// ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 				 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 				// ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 				//
1104 				//dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 				//dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106 
1107 				NextDETBufferPieceInKByte = dml_min(
1108 					dml_round((double) DETBufferSizePoolInKByte *
1109 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 						ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 						BandwidthOfSurfacesNotAssignedDETPiece /
1112 						((ForceSingleDPP ? 1 :
1113 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 						(ForceSingleDPP ? 1 :
1115 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 						dml_floor((double) DETBufferSizePoolInKByte,
1117 						(ForceSingleDPP ? 1 :
1118 								DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119 
1120 				// Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 				// We should limit the per-pipe DET size to the nominal / max per pipe.
1122 				if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 					if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 							nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 						NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 								DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 					} else {
1128 						// Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 						// already has the max per-pipe value
1130 						NextDETBufferPieceInKByte = 0;
1131 					}
1132 				}
1133 
1134 #ifdef __DML_VBA_DEBUG__
1135 				dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 					DETBufferSizePoolInKByte);
1137 				dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 					NextSurfaceToAssignDETPiece);
1139 				dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 					NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 				dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 					NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 				dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 					__func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 				dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 					NextDETBufferPieceInKByte);
1147 				dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 					__func__, j, NextSurfaceToAssignDETPiece,
1149 					DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151 
1152 				DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 						DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 						+ NextDETBufferPieceInKByte
1155 						/ (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 				dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159 
1160 				DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 				DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 				BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 						(ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 								ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 			}
1166 		}
1167 		*CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 	}
1169 	*CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170 
1171 #ifdef __DML_VBA_DEBUG__
1172 	dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 	for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 		dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 				__func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 	}
1178 #endif
1179 } // CalculateDETBufferSize
1180 
dml32_CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum output_format_class OutFormat,enum output_encoder_class Output,enum odm_combine_policy ODMUse,double StateDispclk,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum odm_combine_mode * ODMMode,double * RequiredDISPCLKPerSurface)1181 void dml32_CalculateODMMode(
1182 		unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 		unsigned int HActive,
1184 		enum output_format_class OutFormat,
1185 		enum output_encoder_class Output,
1186 		enum odm_combine_policy ODMUse,
1187 		double StateDispclk,
1188 		double MaxDispclk,
1189 		bool DSCEnable,
1190 		unsigned int TotalNumberOfActiveDPP,
1191 		unsigned int MaxNumDPP,
1192 		double PixelClock,
1193 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 		double DISPCLKRampingMargin,
1195 		double DISPCLKDPPCLKVCOSpeed,
1196 		unsigned int NumberOfDSCSlices,
1197 
1198 		/* Output */
1199 		bool *TotalAvailablePipesSupport,
1200 		unsigned int *NumberOfDPP,
1201 		enum odm_combine_mode *ODMMode,
1202 		double *RequiredDISPCLKPerSurface)
1203 {
1204 
1205 	double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 	double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 	double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208 
1209 	SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 			MaxDispclk);
1212 	SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 			MaxDispclk);
1215 	SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 			PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 			MaxDispclk);
1218 	*TotalAvailablePipesSupport = true;
1219 	*ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220 
1221 	if (ODMUse == dm_odm_combine_policy_none)
1222 		*ODMMode = dm_odm_combine_mode_disabled;
1223 
1224 	*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 	*NumberOfDPP = 0;
1226 
1227 	// FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 	// (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229 
1230 	if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 			((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 					(DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 					|| NumberOfDSCSlices > 8)))) {
1234 		if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 			*ODMMode = dm_odm_combine_mode_4to1;
1236 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 			*NumberOfDPP = 4;
1238 		} else {
1239 			*TotalAvailablePipesSupport = false;
1240 		}
1241 	} else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 			(((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 					SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 					(DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 					|| (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 		if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 			*ODMMode = dm_odm_combine_mode_2to1;
1248 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 			*NumberOfDPP = 2;
1250 		} else {
1251 			*TotalAvailablePipesSupport = false;
1252 		}
1253 	} else {
1254 		if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 			*NumberOfDPP = 1;
1256 		else
1257 			*TotalAvailablePipesSupport = false;
1258 	}
1259 	if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 			ODMUse != dm_odm_combine_policy_4to1) {
1261 		if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 			*ODMMode = dm_odm_combine_mode_disabled;
1263 			*NumberOfDPP = 0;
1264 			*TotalAvailablePipesSupport = false;
1265 		} else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 				*ODMMode == dm_odm_combine_mode_4to1) {
1267 			*ODMMode = dm_odm_combine_mode_4to1;
1268 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 			*NumberOfDPP = 4;
1270 		} else {
1271 			*ODMMode = dm_odm_combine_mode_2to1;
1272 			*RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 			*NumberOfDPP = 2;
1274 		}
1275 	}
1276 	if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 			HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 		*ODMMode = dm_odm_combine_mode_disabled;
1279 		*NumberOfDPP = 0;
1280 		*TotalAvailablePipesSupport = false;
1281 	}
1282 }
1283 
dml32_CalculateRequiredDispclk(enum odm_combine_mode ODMMode,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,double MaxDispclk)1284 double dml32_CalculateRequiredDispclk(
1285 		enum odm_combine_mode ODMMode,
1286 		double PixelClock,
1287 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 		double DISPCLKRampingMargin,
1289 		double DISPCLKDPPCLKVCOSpeed,
1290 		double MaxDispclk)
1291 {
1292 	double RequiredDispclk = 0.;
1293 	double PixelClockAfterODM;
1294 	double DISPCLKWithRampingRoundedToDFSGranularity;
1295 	double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 	double MaxDispclkRoundedDownToDFSGranularity;
1297 
1298 	if (ODMMode == dm_odm_combine_mode_4to1)
1299 		PixelClockAfterODM = PixelClock / 4;
1300 	else if (ODMMode == dm_odm_combine_mode_2to1)
1301 		PixelClockAfterODM = PixelClock / 2;
1302 	else
1303 		PixelClockAfterODM = PixelClock;
1304 
1305 
1306 	DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 					* (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309 
1310 	DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 			PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312 
1313 	MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314 
1315 	if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 		RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 	else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 		RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 	else
1320 		RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321 
1322 	return RequiredDispclk;
1323 }
1324 
dml32_RoundToDFSGranularity(double Clock,bool round_up,double VCOSpeed)1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 	if (Clock <= 0.0)
1328 		return 0.0;
1329 
1330 	if (round_up)
1331 		return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 	else
1333 		return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335 
dml32_CalculateOutputLink(double PHYCLKPerState,double PHYCLKD18PerState,double PHYCLKD32PerState,double Downspreading,bool IsMainSurfaceUsingTheIndicatedTiming,enum output_encoder_class Output,enum output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,bool DSCEnable,unsigned int OutputLinkDPLanes,enum dm_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,double * RequiresFEC,double * OutBpp,enum dm_output_type * OutputType,enum dm_output_rate * OutputRate,unsigned int * RequiredSlots)1336 void dml32_CalculateOutputLink(
1337 		double PHYCLKPerState,
1338 		double PHYCLKD18PerState,
1339 		double PHYCLKD32PerState,
1340 		double Downspreading,
1341 		bool IsMainSurfaceUsingTheIndicatedTiming,
1342 		enum output_encoder_class Output,
1343 		enum output_format_class OutputFormat,
1344 		unsigned int HTotal,
1345 		unsigned int HActive,
1346 		double PixelClockBackEnd,
1347 		double ForcedOutputLinkBPP,
1348 		unsigned int DSCInputBitPerComponent,
1349 		unsigned int NumberOfDSCSlices,
1350 		double AudioSampleRate,
1351 		unsigned int AudioSampleLayout,
1352 		enum odm_combine_mode ODMModeNoDSC,
1353 		enum odm_combine_mode ODMModeDSC,
1354 		bool DSCEnable,
1355 		unsigned int OutputLinkDPLanes,
1356 		enum dm_output_link_dp_rate OutputLinkDPRate,
1357 
1358 		/* Output */
1359 		bool *RequiresDSC,
1360 		double *RequiresFEC,
1361 		double  *OutBpp,
1362 		enum dm_output_type *OutputType,
1363 		enum dm_output_rate *OutputRate,
1364 		unsigned int *RequiredSlots)
1365 {
1366 	bool LinkDSCEnable;
1367 	unsigned int dummy;
1368 	*RequiresDSC = false;
1369 	*RequiresFEC = false;
1370 	*OutBpp = 0;
1371 	*OutputType = dm_output_type_unknown;
1372 	*OutputRate = dm_output_rate_unknown;
1373 
1374 	if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 		if (Output == dm_hdmi) {
1376 			*RequiresDSC = false;
1377 			*RequiresFEC = false;
1378 			*OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 					PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 					DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 					ODMModeNoDSC, ODMModeDSC, &dummy);
1382 			//OutputTypeAndRate = "HDMI";
1383 			*OutputType = dm_output_type_hdmi;
1384 
1385 		} else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 			if (DSCEnable == true) {
1387 				*RequiresDSC = true;
1388 				LinkDSCEnable = true;
1389 				if (Output == dm_dp || Output == dm_dp2p0)
1390 					*RequiresFEC = true;
1391 				else
1392 					*RequiresFEC = false;
1393 			} else {
1394 				*RequiresDSC = false;
1395 				LinkDSCEnable = false;
1396 				if (Output == dm_dp2p0)
1397 					*RequiresFEC = true;
1398 				else
1399 					*RequiresFEC = false;
1400 			}
1401 			if (Output == dm_dp2p0) {
1402 				*OutBpp = 0;
1403 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 						PHYCLKD32PerState >= 10000.0 / 32) {
1405 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 					if (*OutBpp == 0 && PHYCLKD32PerState < 13500.0 / 32 && DSCEnable == true &&
1411 							ForcedOutputLinkBPP == 0) {
1412 						*RequiresDSC = true;
1413 						LinkDSCEnable = true;
1414 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 								OutputFormat, DSCInputBitPerComponent,
1418 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 					}
1421 					//OutputTypeAndRate = Output & " UHBR10";
1422 					*OutputType = dm_output_type_dp2p0;
1423 					*OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 				}
1425 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 						*OutBpp == 0 && PHYCLKD32PerState >= 13500.0 / 32) {
1427 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432 
1433 					if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 							ForcedOutputLinkBPP == 0) {
1435 						*RequiresDSC = true;
1436 						LinkDSCEnable = true;
1437 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 								OutputFormat, DSCInputBitPerComponent,
1441 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 					}
1444 					//OutputTypeAndRate = Output & " UHBR13p5";
1445 					*OutputType = dm_output_type_dp2p0;
1446 					*OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 				}
1448 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 						*OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 						*RequiresDSC = true;
1457 						LinkDSCEnable = true;
1458 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 								OutputFormat, DSCInputBitPerComponent,
1462 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 					}
1465 					//OutputTypeAndRate = Output & " UHBR20";
1466 					*OutputType = dm_output_type_dp2p0;
1467 					*OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 				}
1469 			} else {
1470 				*OutBpp = 0;
1471 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 						PHYCLKPerState >= 270) {
1473 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 					if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 							ForcedOutputLinkBPP == 0) {
1480 						*RequiresDSC = true;
1481 						LinkDSCEnable = true;
1482 						if (Output == dm_dp)
1483 							*RequiresFEC = true;
1484 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 								OutputFormat, DSCInputBitPerComponent,
1488 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 					}
1491 					//OutputTypeAndRate = Output & " HBR";
1492 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 					*OutputRate = dm_output_rate_dp_rate_hbr;
1494 				}
1495 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 						*OutBpp == 0 && PHYCLKPerState >= 540) {
1497 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 							ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 							DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 							AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502 
1503 					if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 							ForcedOutputLinkBPP == 0) {
1505 						*RequiresDSC = true;
1506 						LinkDSCEnable = true;
1507 						if (Output == dm_dp)
1508 							*RequiresFEC = true;
1509 
1510 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 								OutputFormat, DSCInputBitPerComponent,
1514 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 					}
1517 					//OutputTypeAndRate = Output & " HBR2";
1518 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 					*OutputRate = dm_output_rate_dp_rate_hbr2;
1520 				}
1521 				if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 					*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 							OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 							ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 							OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 							AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 							RequiredSlots);
1528 
1529 					if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 						*RequiresDSC = true;
1531 						LinkDSCEnable = true;
1532 						if (Output == dm_dp)
1533 							*RequiresFEC = true;
1534 
1535 						*OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 								OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 								ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 								OutputFormat, DSCInputBitPerComponent,
1539 								NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 								ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 					}
1542 					//OutputTypeAndRate = Output & " HBR3";
1543 					*OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 					*OutputRate = dm_output_rate_dp_rate_hbr3;
1545 				}
1546 			}
1547 		}
1548 	}
1549 }
1550 
dml32_CalculateDPPCLK(unsigned int NumberOfActiveSurfaces,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKDPPCLKVCOSpeed,double DPPCLKUsingSingleDPP[],unsigned int DPPPerSurface[],double * GlobalDPPCLK,double Dppclk[])1551 void dml32_CalculateDPPCLK(
1552 		unsigned int NumberOfActiveSurfaces,
1553 		double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 		double DISPCLKDPPCLKVCOSpeed,
1555 		double DPPCLKUsingSingleDPP[],
1556 		unsigned int DPPPerSurface[],
1557 
1558 		/* output */
1559 		double *GlobalDPPCLK,
1560 		double Dppclk[])
1561 {
1562 	unsigned int k;
1563 	*GlobalDPPCLK = 0;
1564 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 		Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 		*GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 	}
1568 	*GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 		Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572 
dml32_TruncToValidBPP(double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,unsigned int * RequiredSlots)1573 double dml32_TruncToValidBPP(
1574 		double LinkBitRate,
1575 		unsigned int Lanes,
1576 		unsigned int HTotal,
1577 		unsigned int HActive,
1578 		double PixelClock,
1579 		double DesiredBPP,
1580 		bool DSCEnable,
1581 		enum output_encoder_class Output,
1582 		enum output_format_class Format,
1583 		unsigned int DSCInputBitPerComponent,
1584 		unsigned int DSCSlices,
1585 		unsigned int AudioRate,
1586 		unsigned int AudioLayout,
1587 		enum odm_combine_mode ODMModeNoDSC,
1588 		enum odm_combine_mode ODMModeDSC,
1589 		/* Output */
1590 		unsigned int *RequiredSlots)
1591 {
1592 	double    MaxLinkBPP;
1593 	unsigned int   MinDSCBPP;
1594 	double    MaxDSCBPP;
1595 	unsigned int   NonDSCBPP0;
1596 	unsigned int   NonDSCBPP1;
1597 	unsigned int   NonDSCBPP2;
1598 
1599 	if (Format == dm_420) {
1600 		NonDSCBPP0 = 12;
1601 		NonDSCBPP1 = 15;
1602 		NonDSCBPP2 = 18;
1603 		MinDSCBPP = 6;
1604 		MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
1605 	} else if (Format == dm_444) {
1606 		NonDSCBPP0 = 24;
1607 		NonDSCBPP1 = 30;
1608 		NonDSCBPP2 = 36;
1609 		MinDSCBPP = 8;
1610 		MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1611 	} else {
1612 		if (Output == dm_hdmi) {
1613 			NonDSCBPP0 = 24;
1614 			NonDSCBPP1 = 24;
1615 			NonDSCBPP2 = 24;
1616 		} else {
1617 			NonDSCBPP0 = 16;
1618 			NonDSCBPP1 = 20;
1619 			NonDSCBPP2 = 24;
1620 		}
1621 		if (Format == dm_n422) {
1622 			MinDSCBPP = 7;
1623 			MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1624 		} else {
1625 			MinDSCBPP = 8;
1626 			MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1627 		}
1628 	}
1629 	if (Output == dm_dp2p0) {
1630 		MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1631 	} else if (DSCEnable && Output == dm_dp) {
1632 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1633 	} else {
1634 		MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1635 	}
1636 
1637 	if (DSCEnable) {
1638 		if (ODMModeDSC == dm_odm_combine_mode_4to1)
1639 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1640 		else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1641 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1642 		else if (ODMModeDSC == dm_odm_split_mode_1to2)
1643 			MaxLinkBPP = 2 * MaxLinkBPP;
1644 	} else {
1645 		if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1646 			MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1647 		else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1648 			MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1649 		else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1650 			MaxLinkBPP = 2 * MaxLinkBPP;
1651 	}
1652 
1653 	*RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1654 
1655 	if (DesiredBPP == 0) {
1656 		if (DSCEnable) {
1657 			if (MaxLinkBPP < MinDSCBPP)
1658 				return BPP_INVALID;
1659 			else if (MaxLinkBPP >= MaxDSCBPP)
1660 				return MaxDSCBPP;
1661 			else
1662 				return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1663 		} else {
1664 			if (MaxLinkBPP >= NonDSCBPP2)
1665 				return NonDSCBPP2;
1666 			else if (MaxLinkBPP >= NonDSCBPP1)
1667 				return NonDSCBPP1;
1668 			else if (MaxLinkBPP >= NonDSCBPP0)
1669 				return 16.0;
1670 			else
1671 				return BPP_INVALID;
1672 		}
1673 	} else {
1674 		if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1675 				DesiredBPP <= NonDSCBPP0)) ||
1676 				(DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1677 			return BPP_INVALID;
1678 		else
1679 			return DesiredBPP;
1680 	}
1681 } // TruncToValidBPP
1682 
dml32_RequiredDTBCLK(bool DSCEnable,double PixelClock,enum output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)1683 double dml32_RequiredDTBCLK(
1684 		bool              DSCEnable,
1685 		double               PixelClock,
1686 		enum output_format_class  OutputFormat,
1687 		double               OutputBpp,
1688 		unsigned int              DSCSlices,
1689 		unsigned int                 HTotal,
1690 		unsigned int                 HActive,
1691 		unsigned int              AudioRate,
1692 		unsigned int              AudioLayout)
1693 {
1694 	double PixelWordRate;
1695 	double HCActive;
1696 	double HCBlank;
1697 	double AverageTribyteRate;
1698 	double HActiveTribyteRate;
1699 
1700 	if (DSCEnable != true)
1701 		return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1702 
1703 	PixelWordRate = PixelClock /  (OutputFormat == dm_444 ? 1 : 2);
1704 	HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1705 			dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1706 	HCBlank = 64 + 32 *
1707 			dml_ceil(AudioRate *  (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1708 	AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1709 	HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1710 	return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1711 }
1712 
dml32_DSCDelayRequirement(bool DSCEnabled,enum odm_combine_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum output_format_class OutputFormat,enum output_encoder_class Output,double PixelClock,double PixelClockBackEnd,double dsc_delay_factor_wa)1713 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1714 		enum odm_combine_mode ODMMode,
1715 		unsigned int DSCInputBitPerComponent,
1716 		double OutputBpp,
1717 		unsigned int HActive,
1718 		unsigned int HTotal,
1719 		unsigned int NumberOfDSCSlices,
1720 		enum output_format_class  OutputFormat,
1721 		enum output_encoder_class Output,
1722 		double PixelClock,
1723 		double PixelClockBackEnd,
1724 		double dsc_delay_factor_wa)
1725 {
1726 	unsigned int DSCDelayRequirement_val;
1727 
1728 	if (DSCEnabled == true && OutputBpp != 0) {
1729 		if (ODMMode == dm_odm_combine_mode_4to1) {
1730 			DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1731 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1732 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1733 		} else if (ODMMode == dm_odm_combine_mode_2to1) {
1734 			DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1735 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1736 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1737 		} else {
1738 			DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1739 					dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1740 					OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1741 		}
1742 
1743 		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1744 				dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1745 
1746 		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1747 
1748 	} else {
1749 		DSCDelayRequirement_val = 0;
1750 	}
1751 
1752 #ifdef __DML_VBA_DEBUG__
1753 	dml_print("DML::%s: DSCEnabled              = %d\n", __func__, DSCEnabled);
1754 	dml_print("DML::%s: OutputBpp               = %f\n", __func__, OutputBpp);
1755 	dml_print("DML::%s: HActive                 = %d\n", __func__, HActive);
1756 	dml_print("DML::%s: OutputFormat            = %d\n", __func__, OutputFormat);
1757 	dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1758 	dml_print("DML::%s: NumberOfDSCSlices       = %d\n", __func__, NumberOfDSCSlices);
1759 	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1760 #endif
1761 
1762 	return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1763 }
1764 
dml32_CalculateSurfaceSizeInMall(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],bool DCCEnable[],bool ViewportStationary[],unsigned int ViewportXStartY[],unsigned int ViewportYStartY[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int ViewportWidthY[],unsigned int ViewportHeightY[],unsigned int BytesPerPixelY[],unsigned int ViewportWidthC[],unsigned int ViewportHeightC[],unsigned int BytesPerPixelC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int DCCMetaPitchY[],unsigned int DCCMetaPitchC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)1765 void dml32_CalculateSurfaceSizeInMall(
1766 		unsigned int NumberOfActiveSurfaces,
1767 		unsigned int MALLAllocatedForDCN,
1768 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1769 		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1770 		bool DCCEnable[],
1771 		bool ViewportStationary[],
1772 		unsigned int ViewportXStartY[],
1773 		unsigned int ViewportYStartY[],
1774 		unsigned int ViewportXStartC[],
1775 		unsigned int ViewportYStartC[],
1776 		unsigned int ViewportWidthY[],
1777 		unsigned int ViewportHeightY[],
1778 		unsigned int BytesPerPixelY[],
1779 		unsigned int ViewportWidthC[],
1780 		unsigned int ViewportHeightC[],
1781 		unsigned int BytesPerPixelC[],
1782 		unsigned int SurfaceWidthY[],
1783 		unsigned int SurfaceWidthC[],
1784 		unsigned int SurfaceHeightY[],
1785 		unsigned int SurfaceHeightC[],
1786 		unsigned int Read256BytesBlockWidthY[],
1787 		unsigned int Read256BytesBlockWidthC[],
1788 		unsigned int Read256BytesBlockHeightY[],
1789 		unsigned int Read256BytesBlockHeightC[],
1790 		unsigned int ReadBlockWidthY[],
1791 		unsigned int ReadBlockWidthC[],
1792 		unsigned int ReadBlockHeightY[],
1793 		unsigned int ReadBlockHeightC[],
1794 		unsigned int DCCMetaPitchY[],
1795 		unsigned int DCCMetaPitchC[],
1796 
1797 		/* Output */
1798 		unsigned int    SurfaceSizeInMALL[],
1799 		bool *ExceededMALLSize)
1800 {
1801 	unsigned int k;
1802 	unsigned int TotalSurfaceSizeInMALLForSS = 0;
1803 	unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1804 	unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1805 
1806 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1807 		if (ViewportStationary[k]) {
1808 			SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1809 					dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1810 						ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1811 						ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1812 						ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1813 						ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1814 						dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1815 
1816 			if (ReadBlockWidthC[k] > 0) {
1817 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1818 						dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1819 							dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1820 							ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1821 							dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1822 							dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1823 							dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1824 							ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1825 							dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1826 							BytesPerPixelC[k];
1827 			}
1828 			if (DCCEnable[k] == true) {
1829 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1830 						(dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
1831 							dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1832 							Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1833 							- dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1834 							* dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1835 							Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1836 							ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1837 							Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
1838 							Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1839 				if (Read256BytesBlockWidthC[k] > 0) {
1840 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1841 							dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
1842 								Read256BytesBlockWidthC[k]),
1843 								dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1844 								* Read256BytesBlockWidthC[k] - 1, 8 *
1845 								Read256BytesBlockWidthC[k]) -
1846 								dml_floor(ViewportXStartC[k], 8 *
1847 								Read256BytesBlockWidthC[k])) *
1848 								dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1849 								Read256BytesBlockHeightC[k]),
1850 								dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1851 								8 * Read256BytesBlockHeightC[k] - 1, 8 *
1852 								Read256BytesBlockHeightC[k]) -
1853 								dml_floor(ViewportYStartC[k], 8 *
1854 								Read256BytesBlockHeightC[k])) *
1855 								BytesPerPixelC[k] / 256;
1856 				}
1857 			}
1858 		} else {
1859 			SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1860 					ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1861 					dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1862 							ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1863 							BytesPerPixelY[k];
1864 			if (ReadBlockWidthC[k] > 0) {
1865 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1866 						dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1867 								ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1868 						dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1869 								ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1870 								BytesPerPixelC[k];
1871 			}
1872 			if (DCCEnable[k] == true) {
1873 				SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1874 						(dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
1875 								Read256BytesBlockWidthY[k] - 1), 8 *
1876 								Read256BytesBlockWidthY[k]) *
1877 						dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1878 								Read256BytesBlockHeightY[k] - 1), 8 *
1879 								Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1880 
1881 				if (Read256BytesBlockWidthC[k] > 0) {
1882 					SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1883 							dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
1884 									Read256BytesBlockWidthC[k] - 1), 8 *
1885 									Read256BytesBlockWidthC[k]) *
1886 							dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1887 									Read256BytesBlockHeightC[k] - 1), 8 *
1888 									Read256BytesBlockHeightC[k]) *
1889 									BytesPerPixelC[k] / 256;
1890 				}
1891 			}
1892 		}
1893 	}
1894 
1895 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1896 		/* SS and Subvp counted separate as they are never used at the same time */
1897 		if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1898 			TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1899 		else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1900 			TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1901 	}
1902 	*ExceededMALLSize =  (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1903 							(TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1904 } // CalculateSurfaceSizeInMall
1905 
dml32_CalculateVMRowAndSwath(unsigned int NumberOfActiveSurfaces,DmlPipe myPipe[],unsigned int SurfaceSizeInMALL[],unsigned int PTEBufferSizeInRequestsLuma,unsigned int PTEBufferSizeInRequestsChroma,unsigned int DCCMetaBufferSizeBytes,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int MALLAllocatedForDCN,double SwathWidthY[],double SwathWidthC[],bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes[],unsigned int HostVMMinPageSize,bool PTEBufferSizeNotExceeded[],bool DCCMetaBufferSizeNotExceeded[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int dpte_row_height_luma[],unsigned int dpte_row_height_chroma[],unsigned int dpte_row_height_linear_luma[],unsigned int dpte_row_height_linear_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int vm_group_bytes[],unsigned int dpte_group_bytes[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PTERequestSizeY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int PTERequestSizeC[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_c[],double PrefetchSourceLinesY[],double PrefetchSourceLinesC[],double VInitPreFillY[],double VInitPreFillC[],unsigned int MaxNumSwathY[],unsigned int MaxNumSwathC[],double meta_row_bw[],double dpte_row_bw[],double PixelPTEBytesPerRow[],double PDEAndMetaPTEBytesFrame[],double MetaRowByte[],bool use_one_row_for_frame[],bool use_one_row_for_frame_flip[],bool UsesMALLForStaticScreen[],bool PTE_BUFFER_MODE[],unsigned int BIGK_FRAGMENT_SIZE[])1906 void dml32_CalculateVMRowAndSwath(
1907 		unsigned int NumberOfActiveSurfaces,
1908 		DmlPipe myPipe[],
1909 		unsigned int SurfaceSizeInMALL[],
1910 		unsigned int PTEBufferSizeInRequestsLuma,
1911 		unsigned int PTEBufferSizeInRequestsChroma,
1912 		unsigned int DCCMetaBufferSizeBytes,
1913 		enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1914 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1915 		unsigned int MALLAllocatedForDCN,
1916 		double SwathWidthY[],
1917 		double SwathWidthC[],
1918 		bool GPUVMEnable,
1919 		bool HostVMEnable,
1920 		unsigned int HostVMMaxNonCachedPageTableLevels,
1921 		unsigned int GPUVMMaxPageTableLevels,
1922 		unsigned int GPUVMMinPageSizeKBytes[],
1923 		unsigned int HostVMMinPageSize,
1924 
1925 		/* Output */
1926 		bool PTEBufferSizeNotExceeded[],
1927 		bool DCCMetaBufferSizeNotExceeded[],
1928 		unsigned int dpte_row_width_luma_ub[],
1929 		unsigned int dpte_row_width_chroma_ub[],
1930 		unsigned int dpte_row_height_luma[],
1931 		unsigned int dpte_row_height_chroma[],
1932 		unsigned int dpte_row_height_linear_luma[],     // VBA_DELTA
1933 		unsigned int dpte_row_height_linear_chroma[],   // VBA_DELTA
1934 		unsigned int meta_req_width[],
1935 		unsigned int meta_req_width_chroma[],
1936 		unsigned int meta_req_height[],
1937 		unsigned int meta_req_height_chroma[],
1938 		unsigned int meta_row_width[],
1939 		unsigned int meta_row_width_chroma[],
1940 		unsigned int meta_row_height[],
1941 		unsigned int meta_row_height_chroma[],
1942 		unsigned int vm_group_bytes[],
1943 		unsigned int dpte_group_bytes[],
1944 		unsigned int PixelPTEReqWidthY[],
1945 		unsigned int PixelPTEReqHeightY[],
1946 		unsigned int PTERequestSizeY[],
1947 		unsigned int PixelPTEReqWidthC[],
1948 		unsigned int PixelPTEReqHeightC[],
1949 		unsigned int PTERequestSizeC[],
1950 		unsigned int dpde0_bytes_per_frame_ub_l[],
1951 		unsigned int meta_pte_bytes_per_frame_ub_l[],
1952 		unsigned int dpde0_bytes_per_frame_ub_c[],
1953 		unsigned int meta_pte_bytes_per_frame_ub_c[],
1954 		double PrefetchSourceLinesY[],
1955 		double PrefetchSourceLinesC[],
1956 		double VInitPreFillY[],
1957 		double VInitPreFillC[],
1958 		unsigned int MaxNumSwathY[],
1959 		unsigned int MaxNumSwathC[],
1960 		double meta_row_bw[],
1961 		double dpte_row_bw[],
1962 		double PixelPTEBytesPerRow[],
1963 		double PDEAndMetaPTEBytesFrame[],
1964 		double MetaRowByte[],
1965 		bool use_one_row_for_frame[],
1966 		bool use_one_row_for_frame_flip[],
1967 		bool UsesMALLForStaticScreen[],
1968 		bool PTE_BUFFER_MODE[],
1969 		unsigned int BIGK_FRAGMENT_SIZE[])
1970 {
1971 	unsigned int k;
1972 	unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1973 	unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1974 	unsigned int PDEAndMetaPTEBytesFrameY;
1975 	unsigned int PDEAndMetaPTEBytesFrameC;
1976 	unsigned int MetaRowByteY[DC__NUM_DPP__MAX] = {0};
1977 	unsigned int MetaRowByteC[DC__NUM_DPP__MAX] = {0};
1978 	unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1979 	unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1980 	unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1981 	unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1982 	unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1983 	unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1984 	unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1985 	unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1986 	bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1987 
1988 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1989 		if (HostVMEnable == true) {
1990 			vm_group_bytes[k] = 512;
1991 			dpte_group_bytes[k] = 512;
1992 		} else if (GPUVMEnable == true) {
1993 			vm_group_bytes[k] = 2048;
1994 			if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1995 				dpte_group_bytes[k] = 512;
1996 			else
1997 				dpte_group_bytes[k] = 2048;
1998 		} else {
1999 			vm_group_bytes[k] = 0;
2000 			dpte_group_bytes[k] = 0;
2001 		}
2002 
2003 		if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2004 				myPipe[k].SourcePixelFormat == dm_420_12 ||
2005 				myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2006 			if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2007 					!IsVertical(myPipe[k].SourceRotation)) {
2008 				PTEBufferSizeInRequestsForLuma[k] =
2009 						(PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2010 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2011 			} else {
2012 				PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2013 				PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2014 			}
2015 
2016 			PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2017 					myPipe[k].ViewportStationary,
2018 					myPipe[k].DCCEnable,
2019 					myPipe[k].DPPPerSurface,
2020 					myPipe[k].BlockHeight256BytesC,
2021 					myPipe[k].BlockWidth256BytesC,
2022 					myPipe[k].SourcePixelFormat,
2023 					myPipe[k].SurfaceTiling,
2024 					myPipe[k].BytePerPixelC,
2025 					myPipe[k].SourceRotation,
2026 					SwathWidthC[k],
2027 					myPipe[k].ViewportHeightChroma,
2028 					myPipe[k].ViewportXStartC,
2029 					myPipe[k].ViewportYStartC,
2030 					GPUVMEnable,
2031 					HostVMEnable,
2032 					HostVMMaxNonCachedPageTableLevels,
2033 					GPUVMMaxPageTableLevels,
2034 					GPUVMMinPageSizeKBytes[k],
2035 					HostVMMinPageSize,
2036 					PTEBufferSizeInRequestsForChroma[k],
2037 					myPipe[k].PitchC,
2038 					myPipe[k].DCCMetaPitchC,
2039 					myPipe[k].BlockWidthC,
2040 					myPipe[k].BlockHeightC,
2041 
2042 					/* Output */
2043 					&MetaRowByteC[k],
2044 					&PixelPTEBytesPerRowC[k],
2045 					&dpte_row_width_chroma_ub[k],
2046 					&dpte_row_height_chroma[k],
2047 					&dpte_row_height_linear_chroma[k],
2048 					&PixelPTEBytesPerRowC_one_row_per_frame[k],
2049 					&dpte_row_width_chroma_ub_one_row_per_frame[k],
2050 					&dpte_row_height_chroma_one_row_per_frame[k],
2051 					&meta_req_width_chroma[k],
2052 					&meta_req_height_chroma[k],
2053 					&meta_row_width_chroma[k],
2054 					&meta_row_height_chroma[k],
2055 					&PixelPTEReqWidthC[k],
2056 					&PixelPTEReqHeightC[k],
2057 					&PTERequestSizeC[k],
2058 					&dpde0_bytes_per_frame_ub_c[k],
2059 					&meta_pte_bytes_per_frame_ub_c[k]);
2060 
2061 			PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2062 					myPipe[k].VRatioChroma,
2063 					myPipe[k].VTapsChroma,
2064 					myPipe[k].InterlaceEnable,
2065 					myPipe[k].ProgressiveToInterlaceUnitInOPP,
2066 					myPipe[k].SwathHeightC,
2067 					myPipe[k].SourceRotation,
2068 					myPipe[k].ViewportStationary,
2069 					SwathWidthC[k],
2070 					myPipe[k].ViewportHeightChroma,
2071 					myPipe[k].ViewportXStartC,
2072 					myPipe[k].ViewportYStartC,
2073 
2074 					/* Output */
2075 					&VInitPreFillC[k],
2076 					&MaxNumSwathC[k]);
2077 		} else {
2078 			PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2079 			PTEBufferSizeInRequestsForChroma[k] = 0;
2080 			PixelPTEBytesPerRowC[k] = 0;
2081 			PDEAndMetaPTEBytesFrameC = 0;
2082 			MetaRowByteC[k] = 0;
2083 			MaxNumSwathC[k] = 0;
2084 			PrefetchSourceLinesC[k] = 0;
2085 			dpte_row_height_chroma_one_row_per_frame[k] = 0;
2086 			dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2087 			PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2088 		}
2089 
2090 		PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2091 				myPipe[k].ViewportStationary,
2092 				myPipe[k].DCCEnable,
2093 				myPipe[k].DPPPerSurface,
2094 				myPipe[k].BlockHeight256BytesY,
2095 				myPipe[k].BlockWidth256BytesY,
2096 				myPipe[k].SourcePixelFormat,
2097 				myPipe[k].SurfaceTiling,
2098 				myPipe[k].BytePerPixelY,
2099 				myPipe[k].SourceRotation,
2100 				SwathWidthY[k],
2101 				myPipe[k].ViewportHeight,
2102 				myPipe[k].ViewportXStart,
2103 				myPipe[k].ViewportYStart,
2104 				GPUVMEnable,
2105 				HostVMEnable,
2106 				HostVMMaxNonCachedPageTableLevels,
2107 				GPUVMMaxPageTableLevels,
2108 				GPUVMMinPageSizeKBytes[k],
2109 				HostVMMinPageSize,
2110 				PTEBufferSizeInRequestsForLuma[k],
2111 				myPipe[k].PitchY,
2112 				myPipe[k].DCCMetaPitchY,
2113 				myPipe[k].BlockWidthY,
2114 				myPipe[k].BlockHeightY,
2115 
2116 				/* Output */
2117 				&MetaRowByteY[k],
2118 				&PixelPTEBytesPerRowY[k],
2119 				&dpte_row_width_luma_ub[k],
2120 				&dpte_row_height_luma[k],
2121 				&dpte_row_height_linear_luma[k],
2122 				&PixelPTEBytesPerRowY_one_row_per_frame[k],
2123 				&dpte_row_width_luma_ub_one_row_per_frame[k],
2124 				&dpte_row_height_luma_one_row_per_frame[k],
2125 				&meta_req_width[k],
2126 				&meta_req_height[k],
2127 				&meta_row_width[k],
2128 				&meta_row_height[k],
2129 				&PixelPTEReqWidthY[k],
2130 				&PixelPTEReqHeightY[k],
2131 				&PTERequestSizeY[k],
2132 				&dpde0_bytes_per_frame_ub_l[k],
2133 				&meta_pte_bytes_per_frame_ub_l[k]);
2134 
2135 		PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2136 				myPipe[k].VRatio,
2137 				myPipe[k].VTaps,
2138 				myPipe[k].InterlaceEnable,
2139 				myPipe[k].ProgressiveToInterlaceUnitInOPP,
2140 				myPipe[k].SwathHeightY,
2141 				myPipe[k].SourceRotation,
2142 				myPipe[k].ViewportStationary,
2143 				SwathWidthY[k],
2144 				myPipe[k].ViewportHeight,
2145 				myPipe[k].ViewportXStart,
2146 				myPipe[k].ViewportYStart,
2147 
2148 				/* Output */
2149 				&VInitPreFillY[k],
2150 				&MaxNumSwathY[k]);
2151 
2152 		PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2153 		MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2154 
2155 		if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2156 				PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2157 			PTEBufferSizeNotExceeded[k] = true;
2158 		} else {
2159 			PTEBufferSizeNotExceeded[k] = false;
2160 		}
2161 
2162 		one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2163 			PTEBufferSizeInRequestsForLuma[k] &&
2164 			PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2165 	}
2166 
2167 	dml32_CalculateMALLUseForStaticScreen(
2168 			NumberOfActiveSurfaces,
2169 			MALLAllocatedForDCN,
2170 			UseMALLForStaticScreen,   // mode
2171 			SurfaceSizeInMALL,
2172 			one_row_per_frame_fits_in_buffer,
2173 			/* Output */
2174 			UsesMALLForStaticScreen); // boolen
2175 
2176 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2177 		PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2178 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2179 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2180 				(GPUVMMinPageSizeKBytes[k] > 64);
2181 		BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2182 	}
2183 
2184 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2185 #ifdef __DML_VBA_DEBUG__
2186 		dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n",  __func__, k, SurfaceSizeInMALL[k]);
2187 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2188 #endif
2189 		use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2190 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2191 				(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2192 				(GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2193 
2194 		use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2195 				!(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2196 
2197 		if (use_one_row_for_frame[k]) {
2198 			dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2199 			dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2200 			PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2201 			dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2202 			dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2203 			PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2204 			PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2205 		}
2206 
2207 		if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2208 			DCCMetaBufferSizeNotExceeded[k] = true;
2209 		else
2210 			DCCMetaBufferSizeNotExceeded[k] = false;
2211 
2212 		PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2213 		if (use_one_row_for_frame[k])
2214 			PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2215 
2216 		dml32_CalculateRowBandwidth(
2217 				GPUVMEnable,
2218 				myPipe[k].SourcePixelFormat,
2219 				myPipe[k].VRatio,
2220 				myPipe[k].VRatioChroma,
2221 				myPipe[k].DCCEnable,
2222 				myPipe[k].HTotal / myPipe[k].PixelClock,
2223 				MetaRowByteY[k], MetaRowByteC[k],
2224 				meta_row_height[k],
2225 				meta_row_height_chroma[k],
2226 				PixelPTEBytesPerRowY[k],
2227 				PixelPTEBytesPerRowC[k],
2228 				dpte_row_height_luma[k],
2229 				dpte_row_height_chroma[k],
2230 
2231 				/* Output */
2232 				&meta_row_bw[k],
2233 				&dpte_row_bw[k]);
2234 #ifdef __DML_VBA_DEBUG__
2235 		dml_print("DML::%s: k=%d, use_one_row_for_frame        = %d\n",  __func__, k, use_one_row_for_frame[k]);
2236 		dml_print("DML::%s: k=%d, use_one_row_for_frame_flip   = %d\n",
2237 				__func__, k, use_one_row_for_frame_flip[k]);
2238 		dml_print("DML::%s: k=%d, UseMALLForPStateChange       = %d\n",
2239 				__func__, k, UseMALLForPStateChange[k]);
2240 		dml_print("DML::%s: k=%d, dpte_row_height_luma         = %d\n",  __func__, k, dpte_row_height_luma[k]);
2241 		dml_print("DML::%s: k=%d, dpte_row_width_luma_ub       = %d\n",
2242 				__func__, k, dpte_row_width_luma_ub[k]);
2243 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY         = %d\n",  __func__, k, PixelPTEBytesPerRowY[k]);
2244 		dml_print("DML::%s: k=%d, dpte_row_height_chroma       = %d\n",
2245 				__func__, k, dpte_row_height_chroma[k]);
2246 		dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub     = %d\n",
2247 				__func__, k, dpte_row_width_chroma_ub[k]);
2248 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC         = %d\n",  __func__, k, PixelPTEBytesPerRowC[k]);
2249 		dml_print("DML::%s: k=%d, PixelPTEBytesPerRow          = %d\n",  __func__, k, PixelPTEBytesPerRow[k]);
2250 		dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded     = %d\n",
2251 				__func__, k, PTEBufferSizeNotExceeded[k]);
2252 		dml_print("DML::%s: k=%d, PTE_BUFFER_MODE              = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2253 		dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE           = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2254 #endif
2255 	}
2256 } // CalculateVMRowAndSwath
2257 
dml32_CalculateVMAndRowBytes(bool ViewportStationary,bool DCCEnable,unsigned int NumberOfDPPs,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum dm_rotation_angle SourceRotation,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int MacroTileWidth,unsigned int MacroTileHeight,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * dpte_row_height_linear,unsigned int * PixelPTEBytesPerRow_one_row_per_frame,unsigned int * dpte_row_width_ub_one_row_per_frame,unsigned int * dpte_row_height_one_row_per_frame,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)2258 unsigned int dml32_CalculateVMAndRowBytes(
2259 		bool ViewportStationary,
2260 		bool DCCEnable,
2261 		unsigned int NumberOfDPPs,
2262 		unsigned int BlockHeight256Bytes,
2263 		unsigned int BlockWidth256Bytes,
2264 		enum source_format_class SourcePixelFormat,
2265 		unsigned int SurfaceTiling,
2266 		unsigned int BytePerPixel,
2267 		enum dm_rotation_angle SourceRotation,
2268 		double SwathWidth,
2269 		unsigned int ViewportHeight,
2270 		unsigned int    ViewportXStart,
2271 		unsigned int    ViewportYStart,
2272 		bool GPUVMEnable,
2273 		bool HostVMEnable,
2274 		unsigned int HostVMMaxNonCachedPageTableLevels,
2275 		unsigned int GPUVMMaxPageTableLevels,
2276 		unsigned int GPUVMMinPageSizeKBytes,
2277 		unsigned int HostVMMinPageSize,
2278 		unsigned int PTEBufferSizeInRequests,
2279 		unsigned int Pitch,
2280 		unsigned int DCCMetaPitch,
2281 		unsigned int MacroTileWidth,
2282 		unsigned int MacroTileHeight,
2283 
2284 		/* Output */
2285 		unsigned int *MetaRowByte,
2286 		unsigned int *PixelPTEBytesPerRow,
2287 		unsigned int    *dpte_row_width_ub,
2288 		unsigned int *dpte_row_height,
2289 		unsigned int *dpte_row_height_linear,
2290 		unsigned int    *PixelPTEBytesPerRow_one_row_per_frame,
2291 		unsigned int    *dpte_row_width_ub_one_row_per_frame,
2292 		unsigned int    *dpte_row_height_one_row_per_frame,
2293 		unsigned int *MetaRequestWidth,
2294 		unsigned int *MetaRequestHeight,
2295 		unsigned int *meta_row_width,
2296 		unsigned int *meta_row_height,
2297 		unsigned int *PixelPTEReqWidth,
2298 		unsigned int *PixelPTEReqHeight,
2299 		unsigned int *PTERequestSize,
2300 		unsigned int    *DPDE0BytesFrame,
2301 		unsigned int    *MetaPTEBytesFrame)
2302 {
2303 	unsigned int MPDEBytesFrame;
2304 	unsigned int DCCMetaSurfaceBytes;
2305 	unsigned int ExtraDPDEBytesFrame;
2306 	unsigned int PDEAndMetaPTEBytesFrame;
2307 	unsigned int HostVMDynamicLevels = 0;
2308 	unsigned int    MacroTileSizeBytes;
2309 	unsigned int    vp_height_meta_ub;
2310 	unsigned int    vp_height_dpte_ub;
2311 	unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2312 
2313 	if (GPUVMEnable == true && HostVMEnable == true) {
2314 		if (HostVMMinPageSize < 2048)
2315 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2316 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2317 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2318 		else
2319 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2320 	}
2321 
2322 	*MetaRequestHeight = 8 * BlockHeight256Bytes;
2323 	*MetaRequestWidth = 8 * BlockWidth256Bytes;
2324 	if (SurfaceTiling == dm_sw_linear) {
2325 		*meta_row_height = 32;
2326 		*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2327 				- dml_floor(ViewportXStart, *MetaRequestWidth);
2328 	} else if (!IsVertical(SourceRotation)) {
2329 		*meta_row_height = *MetaRequestHeight;
2330 		if (ViewportStationary && NumberOfDPPs == 1) {
2331 			*meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2332 					*MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2333 		} else {
2334 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2335 		}
2336 		*MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2337 	} else {
2338 		*meta_row_height = *MetaRequestWidth;
2339 		if (ViewportStationary && NumberOfDPPs == 1) {
2340 			*meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2341 					*MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2342 		} else {
2343 			*meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2344 		}
2345 		*MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2346 	}
2347 
2348 	if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2349 		vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2350 				64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2351 	} else if (!IsVertical(SourceRotation)) {
2352 		vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2353 	} else {
2354 		vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2355 	}
2356 
2357 	DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2358 
2359 	if (GPUVMEnable == true) {
2360 		*MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2361 				(8 * 4.0 * 1024), 1) + 1) * 64;
2362 		MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2363 	} else {
2364 		*MetaPTEBytesFrame = 0;
2365 		MPDEBytesFrame = 0;
2366 	}
2367 
2368 	if (DCCEnable != true) {
2369 		*MetaPTEBytesFrame = 0;
2370 		MPDEBytesFrame = 0;
2371 		*MetaRowByte = 0;
2372 	}
2373 
2374 	MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2375 
2376 	if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2377 		if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2378 			vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2379 					MacroTileHeight - 1, MacroTileHeight) -
2380 					dml_floor(ViewportYStart, MacroTileHeight);
2381 		} else if (!IsVertical(SourceRotation)) {
2382 			vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2383 		} else {
2384 			vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2385 		}
2386 		*DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2387 				(8 * 2097152), 1) + 1);
2388 		ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2389 	} else {
2390 		*DPDE0BytesFrame = 0;
2391 		ExtraDPDEBytesFrame = 0;
2392 		vp_height_dpte_ub = 0;
2393 	}
2394 
2395 	PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2396 
2397 #ifdef __DML_VBA_DEBUG__
2398 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2399 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2400 	dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2401 	dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2402 	dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2403 	dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2404 	dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2405 	dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2406 	dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2407 	dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2408 	dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2409 	dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2410 	dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2411 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2412 	dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2413 	dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2414 	dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2415 #endif
2416 
2417 	if (HostVMEnable == true)
2418 		PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2419 
2420 	if (SurfaceTiling == dm_sw_linear) {
2421 		*PixelPTEReqHeight = 1;
2422 		*PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2423 		PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2424 		*PTERequestSize = 64;
2425 	} else if (GPUVMMinPageSizeKBytes == 4) {
2426 		*PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2427 		*PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2428 		*PTERequestSize = 128;
2429 	} else {
2430 		*PixelPTEReqHeight = MacroTileHeight;
2431 		*PixelPTEReqWidth = 8 *  1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2432 		*PTERequestSize = 64;
2433 	}
2434 #ifdef __DML_VBA_DEBUG__
2435 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2436 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2437 	dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2438 	dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2439 	dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2440 	dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2441 	dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2442 #endif
2443 
2444 	*dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2445 	*dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2446 			(double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2447 					(double) *PixelPTEReqWidth;
2448 	*PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2449 			*PTERequestSize;
2450 
2451 	if (SurfaceTiling == dm_sw_linear) {
2452 		*dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2453 				*PixelPTEReqWidth / Pitch), 1));
2454 #ifdef __DML_VBA_DEBUG__
2455 		dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2456 				PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2457 		dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2458 				dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2459 		dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2460 				dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2461 		dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2462 				1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2463 						*PixelPTEReqWidth / Pitch), 1));
2464 		dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2465 #endif
2466 		*dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2467 				(double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2468 		*PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2469 
2470 		// VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2471 		*dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2472 				PixelPTEReqWidth_linear / Pitch), 1);
2473 		if (*dpte_row_height_linear > 128)
2474 			*dpte_row_height_linear = 128;
2475 
2476 	} else if (!IsVertical(SourceRotation)) {
2477 		*dpte_row_height = *PixelPTEReqHeight;
2478 
2479 		if (GPUVMMinPageSizeKBytes > 64) {
2480 			*dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2481 					*PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2482 		} else if (ViewportStationary && (NumberOfDPPs == 1)) {
2483 			*dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2484 					*PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2485 					dml_floor(ViewportXStart, *PixelPTEReqWidth);
2486 		} else {
2487 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2488 					*PixelPTEReqWidth;
2489 		}
2490 
2491 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2492 	} else {
2493 		*dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2494 
2495 		if (ViewportStationary && (NumberOfDPPs == 1)) {
2496 			*dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2497 					*PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2498 		} else {
2499 			*dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2500 					* *PixelPTEReqHeight;
2501 		}
2502 
2503 		*PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2504 	}
2505 
2506 	if (GPUVMEnable != true)
2507 		*PixelPTEBytesPerRow = 0;
2508 	if (HostVMEnable == true)
2509 		*PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2510 
2511 #ifdef __DML_VBA_DEBUG__
2512 	dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2513 	dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2514 	dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2515 	dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2516 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2517 	dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2518 	dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2519 	dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2520 			__func__, *dpte_row_width_ub_one_row_per_frame);
2521 	dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2522 			__func__, *PixelPTEBytesPerRow_one_row_per_frame);
2523 	dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2524 			*MetaPTEBytesFrame);
2525 #endif
2526 
2527 	return PDEAndMetaPTEBytesFrame;
2528 } // CalculateVMAndRowBytes
2529 
dml32_CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dm_rotation_angle SourceRotation,bool ViewportStationary,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)2530 double dml32_CalculatePrefetchSourceLines(
2531 		double VRatio,
2532 		unsigned int VTaps,
2533 		bool Interlace,
2534 		bool ProgressiveToInterlaceUnitInOPP,
2535 		unsigned int SwathHeight,
2536 		enum dm_rotation_angle SourceRotation,
2537 		bool ViewportStationary,
2538 		double SwathWidth,
2539 		unsigned int ViewportHeight,
2540 		unsigned int ViewportXStart,
2541 		unsigned int ViewportYStart,
2542 
2543 		/* Output */
2544 		double *VInitPreFill,
2545 		unsigned int *MaxNumSwath)
2546 {
2547 
2548 	unsigned int vp_start_rot;
2549 	unsigned int sw0_tmp;
2550 	unsigned int MaxPartialSwath;
2551 	double numLines;
2552 
2553 #ifdef __DML_VBA_DEBUG__
2554 	dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2555 	dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2556 	dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2557 	dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2558 	dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2559 	dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2560 #endif
2561 	if (ProgressiveToInterlaceUnitInOPP)
2562 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2563 	else
2564 		*VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2565 
2566 	if (ViewportStationary) {
2567 		if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2568 			vp_start_rot = SwathHeight -
2569 					(((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2570 		} else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2571 			vp_start_rot = ViewportXStart;
2572 		} else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2573 			vp_start_rot = SwathHeight -
2574 					(((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2575 		} else {
2576 			vp_start_rot = ViewportYStart;
2577 		}
2578 		sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2579 		if (sw0_tmp < *VInitPreFill)
2580 			*MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2581 		else
2582 			*MaxNumSwath = 1;
2583 		MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2584 	} else {
2585 		*MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2586 		if (*VInitPreFill > 1)
2587 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2588 		else
2589 			MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2590 	}
2591 	numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2592 
2593 #ifdef __DML_VBA_DEBUG__
2594 	dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2595 	dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2596 	dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2597 	dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2598 	dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2599 #endif
2600 	return numLines;
2601 
2602 } // CalculatePrefetchSourceLines
2603 
dml32_CalculateMALLUseForStaticScreen(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCNFinal,enum dm_use_mall_for_static_screen_mode * UseMALLForStaticScreen,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool UsesMALLForStaticScreen[])2604 void dml32_CalculateMALLUseForStaticScreen(
2605 		unsigned int NumberOfActiveSurfaces,
2606 		unsigned int MALLAllocatedForDCNFinal,
2607 		enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2608 		unsigned int SurfaceSizeInMALL[],
2609 		bool one_row_per_frame_fits_in_buffer[],
2610 
2611 		/* output */
2612 		bool UsesMALLForStaticScreen[])
2613 {
2614 	unsigned int k;
2615 	unsigned int SurfaceToAddToMALL;
2616 	bool CanAddAnotherSurfaceToMALL;
2617 	unsigned int TotalSurfaceSizeInMALL;
2618 
2619 	TotalSurfaceSizeInMALL = 0;
2620 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2621 		UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2622 		if (UsesMALLForStaticScreen[k])
2623 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2624 #ifdef __DML_VBA_DEBUG__
2625 		dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n",  __func__, k, UsesMALLForStaticScreen[k]);
2626 		dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n",  __func__, k, TotalSurfaceSizeInMALL);
2627 #endif
2628 	}
2629 
2630 	SurfaceToAddToMALL = 0;
2631 	CanAddAnotherSurfaceToMALL = true;
2632 	while (CanAddAnotherSurfaceToMALL) {
2633 		CanAddAnotherSurfaceToMALL = false;
2634 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2635 			if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2636 					!UsesMALLForStaticScreen[k] &&
2637 					UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2638 					one_row_per_frame_fits_in_buffer[k] &&
2639 					(!CanAddAnotherSurfaceToMALL ||
2640 					SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2641 				CanAddAnotherSurfaceToMALL = true;
2642 				SurfaceToAddToMALL = k;
2643 #ifdef __DML_VBA_DEBUG__
2644 				dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2645 						__func__, k, UseMALLForStaticScreen[k]);
2646 #endif
2647 			}
2648 		}
2649 		if (CanAddAnotherSurfaceToMALL) {
2650 			UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2651 			TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2652 
2653 #ifdef __DML_VBA_DEBUG__
2654 			dml_print("DML::%s: SurfaceToAddToMALL       = %d\n",  __func__, SurfaceToAddToMALL);
2655 			dml_print("DML::%s: TotalSurfaceSizeInMALL   = %d\n",  __func__, TotalSurfaceSizeInMALL);
2656 #endif
2657 
2658 		}
2659 	}
2660 }
2661 
dml32_CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)2662 void dml32_CalculateRowBandwidth(
2663 		bool GPUVMEnable,
2664 		enum source_format_class SourcePixelFormat,
2665 		double VRatio,
2666 		double VRatioChroma,
2667 		bool DCCEnable,
2668 		double LineTime,
2669 		unsigned int MetaRowByteLuma,
2670 		unsigned int MetaRowByteChroma,
2671 		unsigned int meta_row_height_luma,
2672 		unsigned int meta_row_height_chroma,
2673 		unsigned int PixelPTEBytesPerRowLuma,
2674 		unsigned int PixelPTEBytesPerRowChroma,
2675 		unsigned int dpte_row_height_luma,
2676 		unsigned int dpte_row_height_chroma,
2677 		/* Output */
2678 		double *meta_row_bw,
2679 		double *dpte_row_bw)
2680 {
2681 	if (DCCEnable != true) {
2682 		*meta_row_bw = 0;
2683 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2684 			SourcePixelFormat == dm_rgbe_alpha) {
2685 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2686 				MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2687 	} else {
2688 		*meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2689 	}
2690 
2691 	if (GPUVMEnable != true) {
2692 		*dpte_row_bw = 0;
2693 	} else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2694 			SourcePixelFormat == dm_rgbe_alpha) {
2695 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2696 				VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2697 	} else {
2698 		*dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2699 	}
2700 }
2701 
dml32_CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)2702 double dml32_CalculateUrgentLatency(
2703 		double UrgentLatencyPixelDataOnly,
2704 		double UrgentLatencyPixelMixedWithVMData,
2705 		double UrgentLatencyVMDataOnly,
2706 		bool   DoUrgentLatencyAdjustment,
2707 		double UrgentLatencyAdjustmentFabricClockComponent,
2708 		double UrgentLatencyAdjustmentFabricClockReference,
2709 		double FabricClock)
2710 {
2711 	double   ret;
2712 
2713 	ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2714 	if (DoUrgentLatencyAdjustment == true) {
2715 		ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2716 				(UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2717 	}
2718 	return ret;
2719 }
2720 
dml32_CalculateUrgentBurstFactor(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)2721 void dml32_CalculateUrgentBurstFactor(
2722 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2723 		unsigned int    swath_width_luma_ub,
2724 		unsigned int    swath_width_chroma_ub,
2725 		unsigned int SwathHeightY,
2726 		unsigned int SwathHeightC,
2727 		double  LineTime,
2728 		double  UrgentLatency,
2729 		double  CursorBufferSize,
2730 		unsigned int CursorWidth,
2731 		unsigned int CursorBPP,
2732 		double  VRatio,
2733 		double  VRatioC,
2734 		double  BytePerPixelInDETY,
2735 		double  BytePerPixelInDETC,
2736 		unsigned int    DETBufferSizeY,
2737 		unsigned int    DETBufferSizeC,
2738 		/* Output */
2739 		double *UrgentBurstFactorCursor,
2740 		double *UrgentBurstFactorLuma,
2741 		double *UrgentBurstFactorChroma,
2742 		bool   *NotEnoughUrgentLatencyHiding)
2743 {
2744 	double       LinesInDETLuma;
2745 	double       LinesInDETChroma;
2746 	unsigned int LinesInCursorBuffer;
2747 	double       CursorBufferSizeInTime;
2748 	double       DETBufferSizeInTimeLuma;
2749 	double       DETBufferSizeInTimeChroma;
2750 
2751 	*NotEnoughUrgentLatencyHiding = 0;
2752 
2753 	if (CursorWidth > 0) {
2754 		LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2755 				(CursorWidth * CursorBPP / 8.0)), 1.0);
2756 		if (VRatio > 0) {
2757 			CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2758 			if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2759 				*NotEnoughUrgentLatencyHiding = 1;
2760 				*UrgentBurstFactorCursor = 0;
2761 			} else {
2762 				*UrgentBurstFactorCursor = CursorBufferSizeInTime /
2763 						(CursorBufferSizeInTime - UrgentLatency);
2764 			}
2765 		} else {
2766 			*UrgentBurstFactorCursor = 1;
2767 		}
2768 	}
2769 
2770 	LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2771 			DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2772 
2773 	if (VRatio > 0) {
2774 		DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2775 		if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2776 			*NotEnoughUrgentLatencyHiding = 1;
2777 			*UrgentBurstFactorLuma = 0;
2778 		} else {
2779 			*UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2780 		}
2781 	} else {
2782 		*UrgentBurstFactorLuma = 1;
2783 	}
2784 
2785 	if (BytePerPixelInDETC > 0) {
2786 		LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2787 					1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2788 					/ swath_width_chroma_ub;
2789 
2790 		if (VRatio > 0) {
2791 			DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2792 			if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2793 				*NotEnoughUrgentLatencyHiding = 1;
2794 				*UrgentBurstFactorChroma = 0;
2795 			} else {
2796 				*UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2797 						/ (DETBufferSizeInTimeChroma - UrgentLatency);
2798 			}
2799 		} else {
2800 			*UrgentBurstFactorChroma = 1;
2801 		}
2802 	}
2803 } // CalculateUrgentBurstFactor
2804 
dml32_CalculateDCFCLKDeepSleep(unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)2805 void dml32_CalculateDCFCLKDeepSleep(
2806 		unsigned int NumberOfActiveSurfaces,
2807 		unsigned int BytePerPixelY[],
2808 		unsigned int BytePerPixelC[],
2809 		double VRatio[],
2810 		double VRatioChroma[],
2811 		double SwathWidthY[],
2812 		double SwathWidthC[],
2813 		unsigned int DPPPerSurface[],
2814 		double HRatio[],
2815 		double HRatioChroma[],
2816 		double PixelClock[],
2817 		double PSCL_THROUGHPUT[],
2818 		double PSCL_THROUGHPUT_CHROMA[],
2819 		double Dppclk[],
2820 		double ReadBandwidthLuma[],
2821 		double ReadBandwidthChroma[],
2822 		unsigned int ReturnBusWidth,
2823 
2824 		/* Output */
2825 		double *DCFClkDeepSleep)
2826 {
2827 	unsigned int k;
2828 	double   DisplayPipeLineDeliveryTimeLuma;
2829 	double   DisplayPipeLineDeliveryTimeChroma;
2830 	double   DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2831 	double ReadBandwidth = 0.0;
2832 
2833 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2834 
2835 		if (VRatio[k] <= 1) {
2836 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2837 					/ PixelClock[k];
2838 		} else {
2839 			DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2840 		}
2841 		if (BytePerPixelC[k] == 0) {
2842 			DisplayPipeLineDeliveryTimeChroma = 0;
2843 		} else {
2844 			if (VRatioChroma[k] <= 1) {
2845 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2846 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2847 			} else {
2848 				DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2849 						/ Dppclk[k];
2850 			}
2851 		}
2852 
2853 		if (BytePerPixelC[k] > 0) {
2854 			DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2855 					BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2856 					__DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2857 					32.0 / DisplayPipeLineDeliveryTimeChroma);
2858 		} else {
2859 			DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2860 					64.0 / DisplayPipeLineDeliveryTimeLuma;
2861 		}
2862 		DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2863 
2864 #ifdef __DML_VBA_DEBUG__
2865 		dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2866 		dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2867 #endif
2868 	}
2869 
2870 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2871 		ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2872 
2873 	*DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2874 
2875 #ifdef __DML_VBA_DEBUG__
2876 	dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2877 	dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2878 	dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2879 	dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2880 #endif
2881 
2882 	for (k = 0; k < NumberOfActiveSurfaces; ++k)
2883 		*DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2884 #ifdef __DML_VBA_DEBUG__
2885 	dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2886 #endif
2887 } // CalculateDCFCLKDeepSleep
2888 
dml32_CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)2889 double dml32_CalculateWriteBackDelay(
2890 		enum source_format_class WritebackPixelFormat,
2891 		double WritebackHRatio,
2892 		double WritebackVRatio,
2893 		unsigned int WritebackVTaps,
2894 		unsigned int         WritebackDestinationWidth,
2895 		unsigned int         WritebackDestinationHeight,
2896 		unsigned int         WritebackSourceHeight,
2897 		unsigned int HTotal)
2898 {
2899 	double CalculateWriteBackDelay;
2900 	double Line_length;
2901 	double Output_lines_last_notclamped;
2902 	double WritebackVInit;
2903 
2904 	WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2905 	Line_length = dml_max((double) WritebackDestinationWidth,
2906 			dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2907 	Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2908 			dml_ceil(((double)WritebackSourceHeight -
2909 					(double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2910 	if (Output_lines_last_notclamped < 0) {
2911 		CalculateWriteBackDelay = 0;
2912 	} else {
2913 		CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2914 				(HTotal - WritebackDestinationWidth) + 80;
2915 	}
2916 	return CalculateWriteBackDelay;
2917 }
2918 
dml32_UseMinimumDCFCLK(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool DRRDisplay[],bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,unsigned int MaxInterDCNTileRepeaters,unsigned int MaxPrefetchMode,double DRAMClockChangeLatencyFinal,double FCLKChangeLatency,double SREnterPlusExitTime,unsigned int ReturnBusWidth,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,unsigned int PixelChunkSizeInKByte,unsigned int MetaChunkSize,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,bool ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,unsigned int VTotal[],unsigned int VActive[],unsigned int DynamicMetadataTransmittedBytes[],unsigned int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFClkDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],unsigned int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])2919 void dml32_UseMinimumDCFCLK(
2920 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2921 		bool DRRDisplay[],
2922 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2923 		unsigned int MaxInterDCNTileRepeaters,
2924 		unsigned int MaxPrefetchMode,
2925 		double DRAMClockChangeLatencyFinal,
2926 		double FCLKChangeLatency,
2927 		double SREnterPlusExitTime,
2928 		unsigned int ReturnBusWidth,
2929 		unsigned int RoundTripPingLatencyCycles,
2930 		unsigned int ReorderingBytes,
2931 		unsigned int PixelChunkSizeInKByte,
2932 		unsigned int MetaChunkSize,
2933 		bool GPUVMEnable,
2934 		unsigned int GPUVMMaxPageTableLevels,
2935 		bool HostVMEnable,
2936 		unsigned int NumberOfActiveSurfaces,
2937 		double HostVMMinPageSize,
2938 		unsigned int HostVMMaxNonCachedPageTableLevels,
2939 		bool DynamicMetadataVMEnabled,
2940 		bool ImmediateFlipRequirement,
2941 		bool ProgressiveToInterlaceUnitInOPP,
2942 		double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2943 		double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2944 		unsigned int VTotal[],
2945 		unsigned int VActive[],
2946 		unsigned int DynamicMetadataTransmittedBytes[],
2947 		unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2948 		bool Interlace[],
2949 		double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2950 		double RequiredDISPCLK[][2],
2951 		double UrgLatency[],
2952 		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2953 		double ProjectedDCFClkDeepSleep[][2],
2954 		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2955 		unsigned int TotalNumberOfActiveDPP[][2],
2956 		unsigned int TotalNumberOfDCCActiveDPP[][2],
2957 		unsigned int dpte_group_bytes[],
2958 		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2959 		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2960 		unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2961 		unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2962 		unsigned int BytePerPixelY[],
2963 		unsigned int BytePerPixelC[],
2964 		unsigned int HTotal[],
2965 		double PixelClock[],
2966 		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2967 		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2968 		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2969 		bool DynamicMetadataEnable[],
2970 		double ReadBandwidthLuma[],
2971 		double ReadBandwidthChroma[],
2972 		double DCFCLKPerState[],
2973 		/* Output */
2974 		double DCFCLKState[][2])
2975 {
2976 	unsigned int i, j, k;
2977 	unsigned int     dummy1;
2978 	double dummy2, dummy3;
2979 	double   NormalEfficiency;
2980 	double   TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2981 
2982 	NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2983 	for  (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2984 		for  (j = 0; j <= 1; ++j) {
2985 			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2986 			double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2987 			double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2988 			double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2989 			double MinimumTWait = 0.0;
2990 			double DPTEBandwidth;
2991 			double DCFCLKRequiredForAverageBandwidth;
2992 			unsigned int ExtraLatencyBytes;
2993 			double ExtraLatencyCycles;
2994 			double DCFCLKRequiredForPeakBandwidth;
2995 			unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2996 			double MinimumTvmPlus2Tr0;
2997 
2998 			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2999 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3000 				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3001 						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3002 								/ (15.75 * HTotal[k] / PixelClock[k]);
3003 			}
3004 
3005 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3006 				NoOfDPPState[k] = NoOfDPP[i][j][k];
3007 
3008 			DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3009 			DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3010 
3011 			ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3012 					TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3013 					TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3014 					NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3015 					HostVMMaxNonCachedPageTableLevels);
3016 			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3017 					+ ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3018 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3019 				double DCFCLKCyclesRequiredInPrefetch;
3020 				double PrefetchTime;
3021 
3022 				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3023 						* swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3024 						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3025 								* BytePerPixelC[k]) / NormalEfficiency
3026 						/ ReturnBusWidth;
3027 				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3028 						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3029 								/ NormalEfficiency / ReturnBusWidth
3030 								* (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3031 						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3032 								/ ReturnBusWidth
3033 						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3034 						+ PixelDCFCLKCyclesRequiredInPrefetch[k];
3035 				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3036 						* HTotal[k] / PixelClock[k];
3037 				DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3038 						DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3039 						UrgLatency[i] * GPUVMMaxPageTableLevels *
3040 						(HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3041 
3042 				MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3043 						UseMALLForPStateChange[k],
3044 						SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3045 						DRRDisplay[k],
3046 						DRAMClockChangeLatencyFinal,
3047 						FCLKChangeLatency,
3048 						UrgLatency[i],
3049 						SREnterPlusExitTime);
3050 
3051 				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3052 						MinimumTWait - UrgLatency[i] *
3053 						((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3054 						GPUVMMaxPageTableLevels - 2) *  (HostVMEnable == true ?
3055 						HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3056 						DynamicMetadataVMExtraLatency[k];
3057 
3058 				if (PrefetchTime > 0) {
3059 					double ExpectedVRatioPrefetch;
3060 
3061 					ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3062 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3063 							DCFCLKCyclesRequiredInPrefetch);
3064 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3065 							PixelDCFCLKCyclesRequiredInPrefetch[k] /
3066 							PrefetchPixelLinesTime[k] *
3067 							dml_max(1.0, ExpectedVRatioPrefetch) *
3068 							dml_max(1.0, ExpectedVRatioPrefetch / 4);
3069 					if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3070 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3071 								DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3072 								NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3073 								NormalEfficiency / ReturnBusWidth;
3074 					}
3075 				} else {
3076 					DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3077 				}
3078 				if (DynamicMetadataEnable[k] == true) {
3079 					double TSetupPipe;
3080 					double TdmbfPipe;
3081 					double TdmsksPipe;
3082 					double TdmecPipe;
3083 					double AllowedTimeForUrgentExtraLatency;
3084 
3085 					dml32_CalculateVUpdateAndDynamicMetadataParameters(
3086 							MaxInterDCNTileRepeaters,
3087 							RequiredDPPCLKPerSurface[i][j][k],
3088 							RequiredDISPCLK[i][j],
3089 							ProjectedDCFClkDeepSleep[i][j],
3090 							PixelClock[k],
3091 							HTotal[k],
3092 							VTotal[k] - VActive[k],
3093 							DynamicMetadataTransmittedBytes[k],
3094 							DynamicMetadataLinesBeforeActiveRequired[k],
3095 							Interlace[k],
3096 							ProgressiveToInterlaceUnitInOPP,
3097 
3098 							/* output */
3099 							&TSetupPipe,
3100 							&TdmbfPipe,
3101 							&TdmecPipe,
3102 							&TdmsksPipe,
3103 							&dummy1,
3104 							&dummy2,
3105 							&dummy3);
3106 					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3107 							PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3108 							TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3109 					if (AllowedTimeForUrgentExtraLatency > 0)
3110 						DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3111 								dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3112 								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3113 					else
3114 						DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3115 				}
3116 			}
3117 			DCFCLKRequiredForPeakBandwidth = 0;
3118 			for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3119 				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3120 						DCFCLKRequiredForPeakBandwidthPerSurface[k];
3121 			}
3122 			MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3123 					(HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3124 					(HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3125 			for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3126 				double MaximumTvmPlus2Tr0PlusTsw;
3127 
3128 				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3129 						PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3130 				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3131 					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3132 				} else {
3133 					DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3134 							2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3135 								MinimumTvmPlus2Tr0 -
3136 								PrefetchPixelLinesTime[k] / 4),
3137 							(2 * ExtraLatencyCycles +
3138 								PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3139 								(MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3140 				}
3141 			}
3142 			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3143 					dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3144 		}
3145 	}
3146 }
3147 
dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3148 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3149 		unsigned int TotalNumberOfActiveDPP,
3150 		unsigned int PixelChunkSizeInKByte,
3151 		unsigned int TotalNumberOfDCCActiveDPP,
3152 		unsigned int MetaChunkSize,
3153 		bool GPUVMEnable,
3154 		bool HostVMEnable,
3155 		unsigned int NumberOfActiveSurfaces,
3156 		unsigned int NumberOfDPP[],
3157 		unsigned int dpte_group_bytes[],
3158 		double HostVMInefficiencyFactor,
3159 		double HostVMMinPageSize,
3160 		unsigned int HostVMMaxNonCachedPageTableLevels)
3161 {
3162 	unsigned int k;
3163 	double   ret;
3164 	unsigned int  HostVMDynamicLevels;
3165 
3166 	if (GPUVMEnable == true && HostVMEnable == true) {
3167 		if (HostVMMinPageSize < 2048)
3168 			HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3169 		else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3170 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3171 		else
3172 			HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3173 	} else {
3174 		HostVMDynamicLevels = 0;
3175 	}
3176 
3177 	ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3178 			TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3179 
3180 	if (GPUVMEnable == true) {
3181 		for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3182 			ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3183 					(1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3184 		}
3185 	}
3186 	return ret;
3187 }
3188 
dml32_CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3189 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3190 		unsigned int MaxInterDCNTileRepeaters,
3191 		double Dppclk,
3192 		double Dispclk,
3193 		double DCFClkDeepSleep,
3194 		double PixelClock,
3195 		unsigned int HTotal,
3196 		unsigned int VBlank,
3197 		unsigned int DynamicMetadataTransmittedBytes,
3198 		unsigned int DynamicMetadataLinesBeforeActiveRequired,
3199 		unsigned int InterlaceEnable,
3200 		bool ProgressiveToInterlaceUnitInOPP,
3201 
3202 		/* output */
3203 		double *TSetup,
3204 		double *Tdmbf,
3205 		double *Tdmec,
3206 		double *Tdmsks,
3207 		unsigned int *VUpdateOffsetPix,
3208 		double *VUpdateWidthPix,
3209 		double *VReadyOffsetPix)
3210 {
3211 	double TotalRepeaterDelayTime;
3212 
3213 	TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3214 	*VUpdateWidthPix  =
3215 			dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3216 	*VReadyOffsetPix  = dml_ceil(dml_max(150.0 / Dppclk,
3217 			TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3218 	*VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3219 	*TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3220 	*Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3221 	*Tdmec = HTotal / PixelClock;
3222 
3223 	if (DynamicMetadataLinesBeforeActiveRequired == 0)
3224 		*Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3225 	else
3226 		*Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3227 
3228 	if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3229 		*Tdmsks = *Tdmsks / 2;
3230 #ifdef __DML_VBA_DEBUG__
3231 	dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3232 	dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3233 	dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3234 
3235 	dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3236 			__func__, DynamicMetadataLinesBeforeActiveRequired);
3237 	dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3238 	dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3239 	dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3240 	dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3241 #endif
3242 }
3243 
dml32_CalculateTWait(unsigned int PrefetchMode,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,bool DRRDisplay,double DRAMClockChangeLatency,double FCLKChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3244 double dml32_CalculateTWait(
3245 		unsigned int PrefetchMode,
3246 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3247 		bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3248 		bool DRRDisplay,
3249 		double DRAMClockChangeLatency,
3250 		double FCLKChangeLatency,
3251 		double UrgentLatency,
3252 		double SREnterPlusExitTime)
3253 {
3254 	double TWait = 0.0;
3255 
3256 	if (PrefetchMode == 0 &&
3257 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3258 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3259 			!(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3260 			!(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3261 		TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3262 	} else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3263 		TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3264 	} else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3265 		TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3266 	} else {
3267 		TWait = UrgentLatency;
3268 	}
3269 
3270 #ifdef __DML_VBA_DEBUG__
3271 	dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3272 	dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3273 #endif
3274 	return TWait;
3275 } // CalculateTWait
3276 
3277 // Function: get_return_bw_mbps
3278 // Megabyte per second
dml32_get_return_bw_mbps(const soc_bounding_box_st * soc,const int VoltageLevel,const bool HostVMEnable,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3279 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3280 		const int VoltageLevel,
3281 		const bool HostVMEnable,
3282 		const double DCFCLK,
3283 		const double FabricClock,
3284 		const double DRAMSpeed)
3285 {
3286 	double ReturnBW = 0.;
3287 	double IdealSDPPortBandwidth    = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3288 	double IdealFabricBandwidth     = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3289 	double IdealDRAMBandwidth       = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3290 	double PixelDataOnlyReturnBW    = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3291 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3292 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe  :
3293 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3294 	double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3295 			IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3296 			IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3297 					soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3298 
3299 	if (HostVMEnable != true)
3300 		ReturnBW = PixelDataOnlyReturnBW;
3301 	else
3302 		ReturnBW = PixelMixedWithVMDataReturnBW;
3303 
3304 #ifdef __DML_VBA_DEBUG__
3305 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3306 	dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3307 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3308 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3309 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3310 	dml_print("DML::%s: IdealSDPPortBandwidth        = %f\n", __func__, IdealSDPPortBandwidth);
3311 	dml_print("DML::%s: IdealFabricBandwidth         = %f\n", __func__, IdealFabricBandwidth);
3312 	dml_print("DML::%s: IdealDRAMBandwidth           = %f\n", __func__, IdealDRAMBandwidth);
3313 	dml_print("DML::%s: PixelDataOnlyReturnBW        = %f\n", __func__, PixelDataOnlyReturnBW);
3314 	dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3315 	dml_print("DML::%s: ReturnBW                     = %f MBps\n", __func__, ReturnBW);
3316 #endif
3317 	return ReturnBW;
3318 }
3319 
3320 // Function: get_return_bw_mbps_vm_only
3321 // Megabyte per second
dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st * soc,const int VoltageLevel,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3322 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3323 		const int VoltageLevel,
3324 		const double DCFCLK,
3325 		const double FabricClock,
3326 		const double DRAMSpeed)
3327 {
3328 	double VMDataOnlyReturnBW = dml_min3(
3329 			soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3330 			FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3331 					* soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3332 			DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3333 					* (VoltageLevel < 2 ?
3334 							soc->pct_ideal_dram_bw_after_urgent_strobe :
3335 							soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3336 #ifdef __DML_VBA_DEBUG__
3337 	dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3338 	dml_print("DML::%s: DCFCLK       = %f\n", __func__, DCFCLK);
3339 	dml_print("DML::%s: FabricClock  = %f\n", __func__, FabricClock);
3340 	dml_print("DML::%s: DRAMSpeed    = %f\n", __func__, DRAMSpeed);
3341 	dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3342 #endif
3343 	return VMDataOnlyReturnBW;
3344 }
3345 
dml32_CalculateExtraLatency(unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3346 double dml32_CalculateExtraLatency(
3347 		unsigned int RoundTripPingLatencyCycles,
3348 		unsigned int ReorderingBytes,
3349 		double DCFCLK,
3350 		unsigned int TotalNumberOfActiveDPP,
3351 		unsigned int PixelChunkSizeInKByte,
3352 		unsigned int TotalNumberOfDCCActiveDPP,
3353 		unsigned int MetaChunkSize,
3354 		double ReturnBW,
3355 		bool GPUVMEnable,
3356 		bool HostVMEnable,
3357 		unsigned int NumberOfActiveSurfaces,
3358 		unsigned int NumberOfDPP[],
3359 		unsigned int dpte_group_bytes[],
3360 		double HostVMInefficiencyFactor,
3361 		double HostVMMinPageSize,
3362 		unsigned int HostVMMaxNonCachedPageTableLevels)
3363 {
3364 	double ExtraLatencyBytes;
3365 	double ExtraLatency;
3366 
3367 	ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3368 			ReorderingBytes,
3369 			TotalNumberOfActiveDPP,
3370 			PixelChunkSizeInKByte,
3371 			TotalNumberOfDCCActiveDPP,
3372 			MetaChunkSize,
3373 			GPUVMEnable,
3374 			HostVMEnable,
3375 			NumberOfActiveSurfaces,
3376 			NumberOfDPP,
3377 			dpte_group_bytes,
3378 			HostVMInefficiencyFactor,
3379 			HostVMMinPageSize,
3380 			HostVMMaxNonCachedPageTableLevels);
3381 
3382 	ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3383 
3384 #ifdef __DML_VBA_DEBUG__
3385 	dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3386 	dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3387 	dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3388 	dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3389 	dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3390 #endif
3391 
3392 	return ExtraLatency;
3393 } // CalculateExtraLatency
3394 
dml32_CalculatePrefetchSchedule(struct vba_vars_st * v,unsigned int k,double HostVMInefficiencyFactor,DmlPipe * myPipe,unsigned int DSCDelay,unsigned int DPP_RECOUT_WIDTH,unsigned int VStartup,unsigned int MaxVStartup,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,unsigned int VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,unsigned int VInitPreFillC,unsigned int MaxNumSwathC,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double TPreReq,bool ExtendPrefetchIfPossible,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3395 bool dml32_CalculatePrefetchSchedule(
3396 		struct vba_vars_st *v,
3397 		unsigned int k,
3398 		double HostVMInefficiencyFactor,
3399 		DmlPipe *myPipe,
3400 		unsigned int DSCDelay,
3401 		unsigned int DPP_RECOUT_WIDTH,
3402 		unsigned int VStartup,
3403 		unsigned int MaxVStartup,
3404 		double UrgentLatency,
3405 		double UrgentExtraLatency,
3406 		double TCalc,
3407 		unsigned int PDEAndMetaPTEBytesFrame,
3408 		unsigned int MetaRowByte,
3409 		unsigned int PixelPTEBytesPerRow,
3410 		double PrefetchSourceLinesY,
3411 		unsigned int SwathWidthY,
3412 		unsigned int VInitPreFillY,
3413 		unsigned int MaxNumSwathY,
3414 		double PrefetchSourceLinesC,
3415 		unsigned int SwathWidthC,
3416 		unsigned int VInitPreFillC,
3417 		unsigned int MaxNumSwathC,
3418 		unsigned int swath_width_luma_ub,
3419 		unsigned int swath_width_chroma_ub,
3420 		unsigned int SwathHeightY,
3421 		unsigned int SwathHeightC,
3422 		double TWait,
3423 		double TPreReq,
3424 		bool ExtendPrefetchIfPossible,
3425 		/* Output */
3426 		double   *DSTXAfterScaler,
3427 		double   *DSTYAfterScaler,
3428 		double *DestinationLinesForPrefetch,
3429 		double *PrefetchBandwidth,
3430 		double *DestinationLinesToRequestVMInVBlank,
3431 		double *DestinationLinesToRequestRowInVBlank,
3432 		double *VRatioPrefetchY,
3433 		double *VRatioPrefetchC,
3434 		double *RequiredPrefetchPixDataBWLuma,
3435 		double *RequiredPrefetchPixDataBWChroma,
3436 		bool   *NotEnoughTimeForDynamicMetadata,
3437 		double *Tno_bw,
3438 		double *prefetch_vmrow_bw,
3439 		double *Tdmdl_vm,
3440 		double *Tdmdl,
3441 		double *TSetup,
3442 		unsigned int   *VUpdateOffsetPix,
3443 		double   *VUpdateWidthPix,
3444 		double   *VReadyOffsetPix)
3445 {
3446 	double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3447 	bool MyError = false;
3448 	unsigned int DPPCycles, DISPCLKCycles;
3449 	double DSTTotalPixelsAfterScaler;
3450 	double LineTime;
3451 	double dst_y_prefetch_equ;
3452 	double prefetch_bw_oto;
3453 	double Tvm_oto;
3454 	double Tr0_oto;
3455 	double Tvm_oto_lines;
3456 	double Tr0_oto_lines;
3457 	double dst_y_prefetch_oto;
3458 	double TimeForFetchingMetaPTE = 0;
3459 	double TimeForFetchingRowInVBlank = 0;
3460 	double LinesToRequestPrefetchPixelData = 0;
3461 	double LinesForPrefetchBandwidth = 0;
3462 	unsigned int HostVMDynamicLevelsTrips;
3463 	double  trip_to_mem;
3464 	double  Tvm_trips;
3465 	double  Tr0_trips;
3466 	double  Tvm_trips_rounded;
3467 	double  Tr0_trips_rounded;
3468 	double  Lsw_oto;
3469 	double  Tpre_rounded;
3470 	double  prefetch_bw_equ;
3471 	double  Tvm_equ;
3472 	double  Tr0_equ;
3473 	double  Tdmbf;
3474 	double  Tdmec;
3475 	double  Tdmsks;
3476 	double  prefetch_sw_bytes;
3477 	double  bytes_pp;
3478 	double  dep_bytes;
3479 	unsigned int max_vratio_pre = v->MaxVRatioPre;
3480 	double  min_Lsw;
3481 	double  Tsw_est1 = 0;
3482 	double  Tsw_est3 = 0;
3483 
3484 	if (v->GPUVMEnable == true && v->HostVMEnable == true)
3485 		HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3486 	else
3487 		HostVMDynamicLevelsTrips = 0;
3488 #ifdef __DML_VBA_DEBUG__
3489 	dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3490 	dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3491 	dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3492 	dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3493 			__func__, v->HostVMEnable, HostVMInefficiencyFactor);
3494 #endif
3495 	dml32_CalculateVUpdateAndDynamicMetadataParameters(
3496 			v->MaxInterDCNTileRepeaters,
3497 			myPipe->Dppclk,
3498 			myPipe->Dispclk,
3499 			myPipe->DCFClkDeepSleep,
3500 			myPipe->PixelClock,
3501 			myPipe->HTotal,
3502 			myPipe->VBlank,
3503 			v->DynamicMetadataTransmittedBytes[k],
3504 			v->DynamicMetadataLinesBeforeActiveRequired[k],
3505 			myPipe->InterlaceEnable,
3506 			myPipe->ProgressiveToInterlaceUnitInOPP,
3507 			TSetup,
3508 
3509 			/* output */
3510 			&Tdmbf,
3511 			&Tdmec,
3512 			&Tdmsks,
3513 			VUpdateOffsetPix,
3514 			VUpdateWidthPix,
3515 			VReadyOffsetPix);
3516 
3517 	LineTime = myPipe->HTotal / myPipe->PixelClock;
3518 	trip_to_mem = UrgentLatency;
3519 	Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3520 
3521 	if (v->DynamicMetadataVMEnabled == true)
3522 		*Tdmdl = TWait + Tvm_trips + trip_to_mem;
3523 	else
3524 		*Tdmdl = TWait + UrgentExtraLatency;
3525 
3526 #ifdef __DML_VBA_ALLOW_DELTA__
3527 	if (v->DynamicMetadataEnable[k] == false)
3528 		*Tdmdl = 0.0;
3529 #endif
3530 
3531 	if (v->DynamicMetadataEnable[k] == true) {
3532 		if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3533 			*NotEnoughTimeForDynamicMetadata = true;
3534 #ifdef __DML_VBA_DEBUG__
3535 			dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3536 			dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3537 					__func__, Tdmbf);
3538 			dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3539 			dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3540 					__func__, Tdmsks);
3541 			dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3542 					__func__, *Tdmdl);
3543 #endif
3544 		} else {
3545 			*NotEnoughTimeForDynamicMetadata = false;
3546 		}
3547 	} else {
3548 		*NotEnoughTimeForDynamicMetadata = false;
3549 	}
3550 
3551 	*Tdmdl_vm =  (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3552 			v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3553 
3554 	if (myPipe->ScalerEnabled)
3555 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3556 	else
3557 		DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3558 
3559 	DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3560 
3561 	DISPCLKCycles = v->DISPCLKDelaySubtotal;
3562 
3563 	if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3564 		return true;
3565 
3566 	*DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3567 			myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3568 
3569 	*DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3570 			+ (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3571 			+ ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3572 					myPipe->HActive / 2 : 0)
3573 			+ ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3574 
3575 #ifdef __DML_VBA_DEBUG__
3576 	dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3577 	dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3578 	dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3579 	dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3580 	dml_print("DML::%s: DISPCLK: %f\n", __func__,  myPipe->Dispclk);
3581 	dml_print("DML::%s: DSCDelay: %d\n", __func__,  DSCDelay);
3582 	dml_print("DML::%s: ODMMode: %d\n", __func__,  myPipe->ODMMode);
3583 	dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3584 	dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__,  *DSTXAfterScaler);
3585 #endif
3586 
3587 	if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3588 		*DSTYAfterScaler = 1;
3589 	else
3590 		*DSTYAfterScaler = 0;
3591 
3592 	DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3593 	*DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3594 	*DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3595 #ifdef __DML_VBA_DEBUG__
3596 	dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__,  *DSTXAfterScaler);
3597 	dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3598 #endif
3599 
3600 	MyError = false;
3601 
3602 	Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3603 
3604 	if (v->GPUVMEnable == true) {
3605 		Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3606 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3607 		if (v->GPUVMMaxPageTableLevels >= 3) {
3608 			*Tno_bw = UrgentExtraLatency + trip_to_mem *
3609 					(double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3610 		} else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3611 			Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3612 					4.0 * LineTime; // VBA_ERROR
3613 			*Tno_bw = UrgentExtraLatency;
3614 		} else {
3615 			*Tno_bw = 0;
3616 		}
3617 	} else if (myPipe->DCCEnable == true) {
3618 		Tvm_trips_rounded = LineTime / 4.0;
3619 		Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3620 		*Tno_bw = 0;
3621 	} else {
3622 		Tvm_trips_rounded = LineTime / 4.0;
3623 		Tr0_trips_rounded = LineTime / 2.0;
3624 		*Tno_bw = 0;
3625 	}
3626 	Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3627 	Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3628 
3629 	if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3630 			|| myPipe->SourcePixelFormat == dm_420_12) {
3631 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3632 	} else {
3633 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3634 	}
3635 
3636 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3637 			+ PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3638 	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3639 			prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3640 
3641 	min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3642 	min_Lsw = dml_max(min_Lsw, 1.0);
3643 	Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3644 
3645 	if (v->GPUVMEnable == true) {
3646 		Tvm_oto = dml_max3(
3647 				Tvm_trips,
3648 				*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3649 				LineTime / 4.0);
3650 	} else
3651 		Tvm_oto = LineTime / 4.0;
3652 
3653 	if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3654 		Tr0_oto = dml_max4(
3655 				Tr0_trips,
3656 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3657 				(LineTime - Tvm_oto)/2.0,
3658 				LineTime / 4.0);
3659 #ifdef __DML_VBA_DEBUG__
3660 		dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3661 				(MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3662 		dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3663 		dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3664 		dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3665 #endif
3666 	} else
3667 		Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3668 
3669 	Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3670 	Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3671 	dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3672 
3673 	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3674 			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3675 
3676 	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3677 #ifdef __DML_VBA_DEBUG__
3678 	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3679 	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3680 	dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3681 	dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3682 	dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3683 	dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3684 	dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3685 	dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3686 	dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3687 	dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3688 	dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3689 	dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3690 	dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3691 	dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3692 	dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3693 	dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3694 	dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3695 	dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3696 	dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3697 	dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3698 	dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3699 	dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3700 	dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3701 	dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3702 	dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3703 	dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3704 	dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3705 #endif
3706 
3707 	dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3708 	Tpre_rounded = dst_y_prefetch_equ * LineTime;
3709 #ifdef __DML_VBA_DEBUG__
3710 	dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3711 	dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3712 	dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3713 	dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3714 			__func__, VStartup * LineTime);
3715 	dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3716 	dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3717 	dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3718 	dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3719 	dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3720 	dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3721 	dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3722 			__func__, *DSTYAfterScaler);
3723 #endif
3724 	dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3725 			MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3726 
3727 	if (prefetch_sw_bytes < dep_bytes)
3728 		prefetch_sw_bytes = 2 * dep_bytes;
3729 
3730 	*PrefetchBandwidth = 0;
3731 	*DestinationLinesToRequestVMInVBlank = 0;
3732 	*DestinationLinesToRequestRowInVBlank = 0;
3733 	*VRatioPrefetchY = 0;
3734 	*VRatioPrefetchC = 0;
3735 	*RequiredPrefetchPixDataBWLuma = 0;
3736 	if (dst_y_prefetch_equ > 1 &&
3737 			(Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3738 		double PrefetchBandwidth1;
3739 		double PrefetchBandwidth2;
3740 		double PrefetchBandwidth3;
3741 		double PrefetchBandwidth4;
3742 
3743 		if (Tpre_rounded - *Tno_bw > 0) {
3744 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3745 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3746 					+ prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3747 			Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3748 		} else
3749 			PrefetchBandwidth1 = 0;
3750 
3751 		if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3752 				&& Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3753 			PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3754 					+ 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3755 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3756 		}
3757 
3758 		if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3759 			PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3760 			(Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3761 		else
3762 			PrefetchBandwidth2 = 0;
3763 
3764 		if (Tpre_rounded - Tvm_trips_rounded > 0) {
3765 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3766 					+ prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3767 			Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3768 		} else
3769 			PrefetchBandwidth3 = 0;
3770 
3771 
3772 		if (VStartup == MaxVStartup &&
3773 				(Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3774 				LineTime - Tvm_trips_rounded > 0) {
3775 			PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3776 					/ (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3777 		}
3778 
3779 		if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3780 			PrefetchBandwidth4 = prefetch_sw_bytes /
3781 					(Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3782 		} else {
3783 			PrefetchBandwidth4 = 0;
3784 		}
3785 
3786 #ifdef __DML_VBA_DEBUG__
3787 		dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3788 		dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3789 		dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3790 		dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3791 		dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3792 		dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3793 		dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3794 		dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3795 		dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3796 #endif
3797 		{
3798 			bool Case1OK;
3799 			bool Case2OK;
3800 			bool Case3OK;
3801 
3802 			if (PrefetchBandwidth1 > 0) {
3803 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3804 						>= Tvm_trips_rounded
3805 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3806 								/ PrefetchBandwidth1 >= Tr0_trips_rounded) {
3807 					Case1OK = true;
3808 				} else {
3809 					Case1OK = false;
3810 				}
3811 			} else {
3812 				Case1OK = false;
3813 			}
3814 
3815 			if (PrefetchBandwidth2 > 0) {
3816 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3817 						>= Tvm_trips_rounded
3818 						&& (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3819 						/ PrefetchBandwidth2 < Tr0_trips_rounded) {
3820 					Case2OK = true;
3821 				} else {
3822 					Case2OK = false;
3823 				}
3824 			} else {
3825 				Case2OK = false;
3826 			}
3827 
3828 			if (PrefetchBandwidth3 > 0) {
3829 				if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3830 						Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3831 								HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3832 								Tr0_trips_rounded) {
3833 					Case3OK = true;
3834 				} else {
3835 					Case3OK = false;
3836 				}
3837 			} else {
3838 				Case3OK = false;
3839 			}
3840 
3841 			if (Case1OK)
3842 				prefetch_bw_equ = PrefetchBandwidth1;
3843 			else if (Case2OK)
3844 				prefetch_bw_equ = PrefetchBandwidth2;
3845 			else if (Case3OK)
3846 				prefetch_bw_equ = PrefetchBandwidth3;
3847 			else
3848 				prefetch_bw_equ = PrefetchBandwidth4;
3849 
3850 #ifdef __DML_VBA_DEBUG__
3851 			dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3852 			dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3853 			dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3854 			dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3855 #endif
3856 
3857 			if (prefetch_bw_equ > 0) {
3858 				if (v->GPUVMEnable == true) {
3859 					Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3860 							HostVMInefficiencyFactor / prefetch_bw_equ,
3861 							Tvm_trips, LineTime / 4);
3862 				} else {
3863 					Tvm_equ = LineTime / 4;
3864 				}
3865 
3866 				if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3867 					Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3868 							HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3869 							(LineTime - Tvm_equ) / 2, LineTime / 4);
3870 				} else {
3871 					Tr0_equ = (LineTime - Tvm_equ) / 2;
3872 				}
3873 			} else {
3874 				Tvm_equ = 0;
3875 				Tr0_equ = 0;
3876 #ifdef __DML_VBA_DEBUG__
3877 				dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3878 #endif
3879 			}
3880 		}
3881 
3882 		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3883 			if (dst_y_prefetch_oto * LineTime < TPreReq) {
3884 				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3885 			} else {
3886 				*DestinationLinesForPrefetch = dst_y_prefetch_oto;
3887 			}
3888 			TimeForFetchingMetaPTE = Tvm_oto;
3889 			TimeForFetchingRowInVBlank = Tr0_oto;
3890 			*PrefetchBandwidth = prefetch_bw_oto;
3891 			/* Clamp to oto for bandwidth calculation */
3892 			LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3893 		} else {
3894 			/* For mode programming we want to extend the prefetch as much as possible
3895 			 * (up to oto, or as long as we can for equ) if we're not already applying
3896 			 * the 60us prefetch requirement. This is to avoid intermittent underflow
3897 			 * issues during prefetch.
3898 			 *
3899 			 * The prefetch extension is applied under the following scenarios:
3900 			 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
3901 			 * 2. We're using subvp or drr methods of p-state switch, in which case we
3902 			 *    we don't care if prefetch takes up more of the blanking time
3903 			 *
3904 			 * Mode programming typically chooses the smallest prefetch time possible
3905 			 * (i.e. highest bandwidth during prefetch) presumably to create margin between
3906 			 * p-states / c-states that happen in vblank and prefetch. Therefore we only
3907 			 * apply this prefetch extension when p-state in vblank is not required (UCLK
3908 			 * p-states take up the most vblank time).
3909 			 */
3910 			if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
3911 				MyError = true;
3912 			} else {
3913 				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
3914 				TimeForFetchingMetaPTE = Tvm_equ;
3915 				TimeForFetchingRowInVBlank = Tr0_equ;
3916 				*PrefetchBandwidth = prefetch_bw_equ;
3917 				/* Clamp to equ for bandwidth calculation */
3918 				LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3919 			}
3920 		}
3921 
3922 		*DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3923 
3924 		*DestinationLinesToRequestRowInVBlank =
3925 				dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3926 
3927 		LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3928 				*DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3929 
3930 #ifdef __DML_VBA_DEBUG__
3931 		dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3932 		dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3933 				__func__, *DestinationLinesToRequestVMInVBlank);
3934 		dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3935 		dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3936 		dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3937 				__func__, *DestinationLinesToRequestRowInVBlank);
3938 		dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3939 		dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3940 #endif
3941 
3942 		if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3943 			*VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3944 			*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3945 #ifdef __DML_VBA_DEBUG__
3946 			dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3947 			dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3948 			dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3949 #endif
3950 			if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3951 				if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3952 					*VRatioPrefetchY =
3953 							dml_max((double) PrefetchSourceLinesY /
3954 									LinesToRequestPrefetchPixelData,
3955 									(double) MaxNumSwathY * SwathHeightY /
3956 									(LinesToRequestPrefetchPixelData -
3957 									(VInitPreFillY - 3.0) / 2.0));
3958 					*VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3959 				} else {
3960 					MyError = true;
3961 					*VRatioPrefetchY = 0;
3962 				}
3963 #ifdef __DML_VBA_DEBUG__
3964 				dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3965 				dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3966 				dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3967 #endif
3968 			}
3969 
3970 			*VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3971 			*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3972 
3973 #ifdef __DML_VBA_DEBUG__
3974 			dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3975 			dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3976 			dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3977 #endif
3978 			if ((SwathHeightC > 4)) {
3979 				if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3980 					*VRatioPrefetchC =
3981 						dml_max(*VRatioPrefetchC,
3982 							(double) MaxNumSwathC * SwathHeightC /
3983 							(LinesToRequestPrefetchPixelData -
3984 							(VInitPreFillC - 3.0) / 2.0));
3985 					*VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3986 				} else {
3987 					MyError = true;
3988 					*VRatioPrefetchC = 0;
3989 				}
3990 #ifdef __DML_VBA_DEBUG__
3991 				dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3992 				dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3993 				dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3994 #endif
3995 			}
3996 
3997 			*RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3998 					/ LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3999 					/ LineTime;
4000 
4001 #ifdef __DML_VBA_DEBUG__
4002 			dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
4003 			dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
4004 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4005 			dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
4006 					__func__, *RequiredPrefetchPixDataBWLuma);
4007 #endif
4008 			*RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
4009 					LinesToRequestPrefetchPixelData
4010 					* myPipe->BytePerPixelC
4011 					* swath_width_chroma_ub / LineTime;
4012 		} else {
4013 			MyError = true;
4014 #ifdef __DML_VBA_DEBUG__
4015 			dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
4016 					__func__, LinesToRequestPrefetchPixelData);
4017 #endif
4018 			*VRatioPrefetchY = 0;
4019 			*VRatioPrefetchC = 0;
4020 			*RequiredPrefetchPixDataBWLuma = 0;
4021 			*RequiredPrefetchPixDataBWChroma = 0;
4022 		}
4023 #ifdef __DML_VBA_DEBUG__
4024 		dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4025 			(double)LinesToRequestPrefetchPixelData * LineTime +
4026 			2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4027 		dml_print("DML:  Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4028 		dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4029 			(*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4030 		dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4031 		dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4032 			TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4033 			((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4034 		dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4035 				PixelPTEBytesPerRow);
4036 #endif
4037 	} else {
4038 		MyError = true;
4039 #ifdef __DML_VBA_DEBUG__
4040 		dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4041 				__func__, dst_y_prefetch_equ);
4042 #endif
4043 	}
4044 
4045 	{
4046 		double prefetch_vm_bw;
4047 		double prefetch_row_bw;
4048 
4049 		if (PDEAndMetaPTEBytesFrame == 0) {
4050 			prefetch_vm_bw = 0;
4051 		} else if (*DestinationLinesToRequestVMInVBlank > 0) {
4052 #ifdef __DML_VBA_DEBUG__
4053 			dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4054 			dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4055 			dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4056 					__func__, *DestinationLinesToRequestVMInVBlank);
4057 			dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4058 #endif
4059 			prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4060 					(*DestinationLinesToRequestVMInVBlank * LineTime);
4061 #ifdef __DML_VBA_DEBUG__
4062 			dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4063 #endif
4064 		} else {
4065 			prefetch_vm_bw = 0;
4066 			MyError = true;
4067 #ifdef __DML_VBA_DEBUG__
4068 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4069 					__func__, *DestinationLinesToRequestVMInVBlank);
4070 #endif
4071 		}
4072 
4073 		if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4074 			prefetch_row_bw = 0;
4075 		} else if (*DestinationLinesToRequestRowInVBlank > 0) {
4076 			prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4077 					(*DestinationLinesToRequestRowInVBlank * LineTime);
4078 
4079 #ifdef __DML_VBA_DEBUG__
4080 			dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4081 			dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4082 			dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4083 					__func__, *DestinationLinesToRequestRowInVBlank);
4084 			dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4085 #endif
4086 		} else {
4087 			prefetch_row_bw = 0;
4088 			MyError = true;
4089 #ifdef __DML_VBA_DEBUG__
4090 			dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4091 					__func__, *DestinationLinesToRequestRowInVBlank);
4092 #endif
4093 		}
4094 
4095 		*prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4096 	}
4097 
4098 	if (MyError) {
4099 		*PrefetchBandwidth = 0;
4100 		TimeForFetchingMetaPTE = 0;
4101 		TimeForFetchingRowInVBlank = 0;
4102 		*DestinationLinesToRequestVMInVBlank = 0;
4103 		*DestinationLinesToRequestRowInVBlank = 0;
4104 		*DestinationLinesForPrefetch = 0;
4105 		LinesToRequestPrefetchPixelData = 0;
4106 		*VRatioPrefetchY = 0;
4107 		*VRatioPrefetchC = 0;
4108 		*RequiredPrefetchPixDataBWLuma = 0;
4109 		*RequiredPrefetchPixDataBWChroma = 0;
4110 	}
4111 
4112 	return MyError;
4113 } // CalculatePrefetchSchedule
4114 
dml32_CalculateFlipSchedule(double HostVMInefficiencyFactor,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,bool use_one_row_for_frame_flip,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)4115 void dml32_CalculateFlipSchedule(
4116 		double HostVMInefficiencyFactor,
4117 		double UrgentExtraLatency,
4118 		double UrgentLatency,
4119 		unsigned int GPUVMMaxPageTableLevels,
4120 		bool HostVMEnable,
4121 		unsigned int HostVMMaxNonCachedPageTableLevels,
4122 		bool GPUVMEnable,
4123 		double HostVMMinPageSize,
4124 		double PDEAndMetaPTEBytesPerFrame,
4125 		double MetaRowBytes,
4126 		double DPTEBytesPerRow,
4127 		double BandwidthAvailableForImmediateFlip,
4128 		unsigned int TotImmediateFlipBytes,
4129 		enum source_format_class SourcePixelFormat,
4130 		double LineTime,
4131 		double VRatio,
4132 		double VRatioChroma,
4133 		double Tno_bw,
4134 		bool DCCEnable,
4135 		unsigned int dpte_row_height,
4136 		unsigned int meta_row_height,
4137 		unsigned int dpte_row_height_chroma,
4138 		unsigned int meta_row_height_chroma,
4139 		bool    use_one_row_for_frame_flip,
4140 
4141 		/* Output */
4142 		double *DestinationLinesToRequestVMInImmediateFlip,
4143 		double *DestinationLinesToRequestRowInImmediateFlip,
4144 		double *final_flip_bw,
4145 		bool *ImmediateFlipSupportedForPipe)
4146 {
4147 	double min_row_time = 0.0;
4148 	unsigned int HostVMDynamicLevelsTrips;
4149 	double TimeForFetchingMetaPTEImmediateFlip;
4150 	double TimeForFetchingRowInVBlankImmediateFlip;
4151 	double ImmediateFlipBW = 1.0;
4152 
4153 	if (GPUVMEnable == true && HostVMEnable == true)
4154 		HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4155 	else
4156 		HostVMDynamicLevelsTrips = 0;
4157 
4158 #ifdef __DML_VBA_DEBUG__
4159 	dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4160 	dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4161 #endif
4162 
4163 	if (TotImmediateFlipBytes > 0) {
4164 		if (use_one_row_for_frame_flip) {
4165 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4166 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4167 		} else {
4168 			ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4169 					BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4170 		}
4171 		if (GPUVMEnable == true) {
4172 			TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4173 					HostVMInefficiencyFactor / ImmediateFlipBW,
4174 					UrgentExtraLatency + UrgentLatency *
4175 					(GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4176 					LineTime / 4.0);
4177 		} else {
4178 			TimeForFetchingMetaPTEImmediateFlip = 0;
4179 		}
4180 		if ((GPUVMEnable == true || DCCEnable == true)) {
4181 			TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4182 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4183 					UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4184 		} else {
4185 			TimeForFetchingRowInVBlankImmediateFlip = 0;
4186 		}
4187 
4188 		*DestinationLinesToRequestVMInImmediateFlip =
4189 				dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4190 		*DestinationLinesToRequestRowInImmediateFlip =
4191 				dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4192 
4193 		if (GPUVMEnable == true) {
4194 			*final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4195 					(*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4196 					(MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4197 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4198 		} else if ((GPUVMEnable == true || DCCEnable == true)) {
4199 			*final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4200 					(*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4201 		} else {
4202 			*final_flip_bw = 0;
4203 		}
4204 	} else {
4205 		TimeForFetchingMetaPTEImmediateFlip = 0;
4206 		TimeForFetchingRowInVBlankImmediateFlip = 0;
4207 		*DestinationLinesToRequestVMInImmediateFlip = 0;
4208 		*DestinationLinesToRequestRowInImmediateFlip = 0;
4209 		*final_flip_bw = 0;
4210 	}
4211 
4212 	if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4213 		if (GPUVMEnable == true && DCCEnable != true) {
4214 			min_row_time = dml_min(dpte_row_height *
4215 					LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4216 		} else if (GPUVMEnable != true && DCCEnable == true) {
4217 			min_row_time = dml_min(meta_row_height *
4218 					LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4219 		} else {
4220 			min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4221 					LineTime / VRatio, dpte_row_height_chroma * LineTime /
4222 					VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4223 		}
4224 	} else {
4225 		if (GPUVMEnable == true && DCCEnable != true) {
4226 			min_row_time = dpte_row_height * LineTime / VRatio;
4227 		} else if (GPUVMEnable != true && DCCEnable == true) {
4228 			min_row_time = meta_row_height * LineTime / VRatio;
4229 		} else {
4230 			min_row_time =
4231 				dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4232 		}
4233 	}
4234 
4235 	if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4236 			|| TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4237 					> min_row_time) {
4238 		*ImmediateFlipSupportedForPipe = false;
4239 	} else {
4240 		*ImmediateFlipSupportedForPipe = true;
4241 	}
4242 
4243 #ifdef __DML_VBA_DEBUG__
4244 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4245 	dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4246 	dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4247 			__func__, *DestinationLinesToRequestVMInImmediateFlip);
4248 	dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4249 			__func__, *DestinationLinesToRequestRowInImmediateFlip);
4250 	dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4251 	dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4252 			__func__, TimeForFetchingRowInVBlankImmediateFlip);
4253 	dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4254 	dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4255 #endif
4256 } // CalculateFlipSchedule
4257 
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct vba_vars_st * v,unsigned int PrefetchMode,double DCFCLK,double ReturnBW,SOCParametersList mmSOCParameters,double SOCCLK,double DCFClkDeepSleep,unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool UnboundedRequestEnabled,unsigned int CompressedBufferSizeInkByte,enum clock_change_support * DRAMClockChangeSupport,double MaxActiveDRAMClockChangeLatencySupported[],unsigned int SubViewportLinesNeededInMALL[],enum dm_fclock_change_support * FCLKChangeSupport,double * MinActiveFCLKChangeLatencySupported,bool * USRRetrainingSupport,double ActiveDRAMClockChangeLatencyMargin[])4258 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4259 		struct vba_vars_st *v,
4260 		unsigned int PrefetchMode,
4261 		double DCFCLK,
4262 		double ReturnBW,
4263 		SOCParametersList mmSOCParameters,
4264 		double SOCCLK,
4265 		double DCFClkDeepSleep,
4266 		unsigned int DETBufferSizeY[],
4267 		unsigned int DETBufferSizeC[],
4268 		unsigned int SwathHeightY[],
4269 		unsigned int SwathHeightC[],
4270 		double SwathWidthY[],
4271 		double SwathWidthC[],
4272 		unsigned int DPPPerSurface[],
4273 		double BytePerPixelDETY[],
4274 		double BytePerPixelDETC[],
4275 		double DSTXAfterScaler[],
4276 		double DSTYAfterScaler[],
4277 		bool UnboundedRequestEnabled,
4278 		unsigned int CompressedBufferSizeInkByte,
4279 
4280 		/* Output */
4281 		enum clock_change_support *DRAMClockChangeSupport,
4282 		double MaxActiveDRAMClockChangeLatencySupported[],
4283 		unsigned int SubViewportLinesNeededInMALL[],
4284 		enum dm_fclock_change_support *FCLKChangeSupport,
4285 		double *MinActiveFCLKChangeLatencySupported,
4286 		bool *USRRetrainingSupport,
4287 		double ActiveDRAMClockChangeLatencyMargin[])
4288 {
4289 	unsigned int i, j, k;
4290 	unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4291 	unsigned int DRAMClockChangeSupportNumber = 0;
4292 	unsigned int LastSurfaceWithoutMargin = 0;
4293 	unsigned int DRAMClockChangeMethod = 0;
4294 	bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4295 	double MinActiveFCLKChangeMargin = 0.;
4296 	double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4297 	double ActiveClockChangeLatencyHidingY;
4298 	double ActiveClockChangeLatencyHidingC;
4299 	double ActiveClockChangeLatencyHiding;
4300 	double EffectiveDETBufferSizeY;
4301 	double     ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4302 	double     USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4303 	double TotalPixelBW = 0.0;
4304 	bool    SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4305 	double     EffectiveLBLatencyHidingY;
4306 	double     EffectiveLBLatencyHidingC;
4307 	double     LinesInDETY[DC__NUM_DPP__MAX];
4308 	double     LinesInDETC[DC__NUM_DPP__MAX];
4309 	unsigned int    LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4310 	unsigned int    LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4311 	double     FullDETBufferingTimeY;
4312 	double     FullDETBufferingTimeC;
4313 	double     WritebackDRAMClockChangeLatencyMargin;
4314 	double     WritebackFCLKChangeLatencyMargin;
4315 	double     WritebackLatencyHiding;
4316 	bool    SameTimingForFCLKChange;
4317 
4318 	unsigned int    TotalActiveWriteback = 0;
4319 	unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4320 	unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4321 
4322 	v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4323 	v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4324 			+ mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4325 	v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4326 	v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4327 	v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4328 			+ 10 / DCFClkDeepSleep;
4329 	v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4330 			+ 10 / DCFClkDeepSleep;
4331 	v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4332 			+ 10 / DCFClkDeepSleep;
4333 	v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4334 			+ mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4335 
4336 #ifdef __DML_VBA_DEBUG__
4337 	dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4338 	dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4339 	dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4340 	dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4341 	dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4342 	dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4343 	dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4344 	dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4345 	dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4346 	dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4347 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4348 			__func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4349 #endif
4350 
4351 
4352 	TotalActiveWriteback = 0;
4353 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4354 		if (v->WritebackEnable[k] == true)
4355 			TotalActiveWriteback = TotalActiveWriteback + 1;
4356 	}
4357 
4358 	if (TotalActiveWriteback <= 1) {
4359 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4360 	} else {
4361 		v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4362 				+ v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4363 	}
4364 	if (v->USRRetrainingRequiredFinal)
4365 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4366 				+ mmSOCParameters.USRRetrainingLatency;
4367 
4368 	if (TotalActiveWriteback <= 1) {
4369 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4370 				+ mmSOCParameters.WritebackLatency;
4371 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4372 				+ mmSOCParameters.WritebackLatency;
4373 	} else {
4374 		v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4375 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4376 		v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4377 				+ mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4378 	}
4379 
4380 	if (v->USRRetrainingRequiredFinal)
4381 		v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4382 				+ mmSOCParameters.USRRetrainingLatency;
4383 
4384 	if (v->USRRetrainingRequiredFinal)
4385 		v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4386 				+ mmSOCParameters.USRRetrainingLatency;
4387 
4388 #ifdef __DML_VBA_DEBUG__
4389 	dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4390 			__func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4391 	dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4392 	dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4393 	dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4394 	dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4395 #endif
4396 
4397 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4398 		TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4399 				SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4400 	}
4401 
4402 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4403 
4404 		LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4405 		LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4406 
4407 
4408 #ifdef __DML_VBA_DEBUG__
4409 		dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4410 		dml_print("DML::%s: k=%d, v->LineBufferSizeFinal     = %d\n", __func__, k, v->LineBufferSizeFinal);
4411 		dml_print("DML::%s: k=%d, v->LBBitPerPixel      = %d\n", __func__, k, v->LBBitPerPixel[k]);
4412 		dml_print("DML::%s: k=%d, v->HRatio             = %f\n", __func__, k, v->HRatio[k]);
4413 		dml_print("DML::%s: k=%d, v->vtaps              = %d\n", __func__, k, v->vtaps[k]);
4414 #endif
4415 
4416 		EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4417 		EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4418 		EffectiveDETBufferSizeY = DETBufferSizeY[k];
4419 
4420 		if (UnboundedRequestEnabled) {
4421 			EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4422 					+ CompressedBufferSizeInkByte * 1024
4423 							* (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4424 							/ (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4425 		}
4426 
4427 		LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4428 		LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4429 		FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4430 
4431 		ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4432 				- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4433 
4434 		if (v->NumberOfActiveSurfaces > 1) {
4435 			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4436 					- (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4437 							/ v->PixelClock[k] / v->VRatio[k];
4438 		}
4439 
4440 		if (BytePerPixelDETC[k] > 0) {
4441 			LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4442 			LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4443 			FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4444 					/ v->VRatioChroma[k];
4445 			ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4446 					- (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4447 							/ v->PixelClock[k];
4448 			if (v->NumberOfActiveSurfaces > 1) {
4449 				ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4450 						- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4451 								/ v->PixelClock[k] / v->VRatioChroma[k];
4452 			}
4453 			ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4454 					ActiveClockChangeLatencyHidingC);
4455 		} else {
4456 			ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4457 		}
4458 
4459 		ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4460 				- v->Watermark.DRAMClockChangeWatermark;
4461 		ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4462 				- v->Watermark.FCLKChangeWatermark;
4463 		USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4464 
4465 		if (v->WritebackEnable[k]) {
4466 			WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4467 					/ (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4468 							/ (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4469 			if (v->WritebackPixelFormat[k] == dm_444_64)
4470 				WritebackLatencyHiding = WritebackLatencyHiding / 2;
4471 
4472 			WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4473 					- v->Watermark.WritebackDRAMClockChangeWatermark;
4474 
4475 			WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4476 					- v->Watermark.WritebackFCLKChangeWatermark;
4477 
4478 			ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4479 					WritebackFCLKChangeLatencyMargin);
4480 			ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4481 					WritebackDRAMClockChangeLatencyMargin);
4482 		}
4483 		MaxActiveDRAMClockChangeLatencySupported[k] =
4484 				(v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4485 						0 :
4486 						(ActiveDRAMClockChangeLatencyMargin[k]
4487 								+ mmSOCParameters.DRAMClockChangeLatency);
4488 	}
4489 
4490 	for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4491 		for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4492 			if (i == j ||
4493 					(v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4494 					(v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4495 					(v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4496 					(v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4497 					v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4498 					v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4499 					(v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4500 				SynchronizedSurfaces[i][j] = true;
4501 			} else {
4502 				SynchronizedSurfaces[i][j] = false;
4503 			}
4504 		}
4505 	}
4506 
4507 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4508 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4509 				(!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4510 				ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4511 			FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4512 			MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4513 			SurfaceWithMinActiveFCLKChangeMargin = k;
4514 		}
4515 	}
4516 
4517 	*MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4518 
4519 	SameTimingForFCLKChange = true;
4520 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4521 		if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4522 			if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4523 					(SameTimingForFCLKChange ||
4524 					ActiveFCLKChangeLatencyMargin[k] <
4525 					SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4526 				SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4527 			}
4528 			SameTimingForFCLKChange = false;
4529 		}
4530 	}
4531 
4532 	if (MinActiveFCLKChangeMargin > 0) {
4533 		*FCLKChangeSupport = dm_fclock_change_vactive;
4534 	} else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4535 			(PrefetchMode <= 1)) {
4536 		*FCLKChangeSupport = dm_fclock_change_vblank;
4537 	} else {
4538 		*FCLKChangeSupport = dm_fclock_change_unsupported;
4539 	}
4540 
4541 	*USRRetrainingSupport = true;
4542 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4543 		if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4544 				(USRRetrainingLatencyMargin[k] < 0)) {
4545 			*USRRetrainingSupport = false;
4546 		}
4547 	}
4548 
4549 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4550 		if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4551 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4552 				v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4553 				ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4554 			if (PrefetchMode > 0) {
4555 				DRAMClockChangeSupportNumber = 2;
4556 			} else if (DRAMClockChangeSupportNumber == 0) {
4557 				DRAMClockChangeSupportNumber = 1;
4558 				LastSurfaceWithoutMargin = k;
4559 			} else if (DRAMClockChangeSupportNumber == 1 &&
4560 					!SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4561 				DRAMClockChangeSupportNumber = 2;
4562 			}
4563 		}
4564 	}
4565 
4566 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4567 		if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4568 			DRAMClockChangeMethod = 1;
4569 		else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4570 			DRAMClockChangeMethod = 2;
4571 	}
4572 
4573 	if (DRAMClockChangeMethod == 0) {
4574 		if (DRAMClockChangeSupportNumber == 0)
4575 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4576 		else if (DRAMClockChangeSupportNumber == 1)
4577 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4578 		else
4579 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4580 	} else if (DRAMClockChangeMethod == 1) {
4581 		if (DRAMClockChangeSupportNumber == 0)
4582 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4583 		else if (DRAMClockChangeSupportNumber == 1)
4584 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4585 		else
4586 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4587 	} else {
4588 		if (DRAMClockChangeSupportNumber == 0)
4589 			*DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4590 		else if (DRAMClockChangeSupportNumber == 1)
4591 			*DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4592 		else
4593 			*DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4594 	}
4595 
4596 	for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4597 		unsigned int dst_y_pstate;
4598 		unsigned int src_y_pstate_l;
4599 		unsigned int src_y_pstate_c;
4600 		unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4601 
4602 		dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4603 		src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4604 		src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4605 		sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4606 
4607 #ifdef __DML_VBA_DEBUG__
4608 dml_print("DML::%s: k=%d, DETBufferSizeY               = %d\n", __func__, k, DETBufferSizeY[k]);
4609 dml_print("DML::%s: k=%d, BytePerPixelDETY             = %f\n", __func__, k, BytePerPixelDETY[k]);
4610 dml_print("DML::%s: k=%d, SwathWidthY                  = %d\n", __func__, k, SwathWidthY[k]);
4611 dml_print("DML::%s: k=%d, SwathHeightY                 = %d\n", __func__, k, SwathHeightY[k]);
4612 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY  = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4613 dml_print("DML::%s: k=%d, dst_y_pstate      = %d\n", __func__, k, dst_y_pstate);
4614 dml_print("DML::%s: k=%d, src_y_pstate_l    = %d\n", __func__, k, src_y_pstate_l);
4615 dml_print("DML::%s: k=%d, src_y_ahead_l     = %d\n", __func__, k, src_y_ahead_l);
4616 dml_print("DML::%s: k=%d, v->meta_row_height   = %d\n", __func__, k, v->meta_row_height[k]);
4617 dml_print("DML::%s: k=%d, sub_vp_lines_l    = %d\n", __func__, k, sub_vp_lines_l);
4618 #endif
4619 		SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4620 
4621 		if (BytePerPixelDETC[k] > 0) {
4622 			src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4623 			src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4624 			sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4625 			SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4626 
4627 #ifdef __DML_VBA_DEBUG__
4628 dml_print("DML::%s: k=%d, src_y_pstate_c            = %d\n", __func__, k, src_y_pstate_c);
4629 dml_print("DML::%s: k=%d, src_y_ahead_c             = %d\n", __func__, k, src_y_ahead_c);
4630 dml_print("DML::%s: k=%d, v->meta_row_height_chroma    = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4631 dml_print("DML::%s: k=%d, sub_vp_lines_c            = %d\n", __func__, k, sub_vp_lines_c);
4632 #endif
4633 		}
4634 	}
4635 #ifdef __DML_VBA_DEBUG__
4636 	dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4637 	dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4638 	dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4639 			__func__, *MinActiveFCLKChangeLatencySupported);
4640 	dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4641 #endif
4642 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4643 
dml32_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize,double DISPCLKDPPCLKVCOSpeed)4644 double dml32_CalculateWriteBackDISPCLK(
4645 		enum source_format_class WritebackPixelFormat,
4646 		double PixelClock,
4647 		double WritebackHRatio,
4648 		double WritebackVRatio,
4649 		unsigned int WritebackHTaps,
4650 		unsigned int WritebackVTaps,
4651 		unsigned int   WritebackSourceWidth,
4652 		unsigned int   WritebackDestinationWidth,
4653 		unsigned int HTotal,
4654 		unsigned int WritebackLineBufferSize,
4655 		double DISPCLKDPPCLKVCOSpeed)
4656 {
4657 	double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4658 
4659 	DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4660 	DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4661 	DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4662 			WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4663 	return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4664 }
4665 
dml32_CalculateMinAndMaxPrefetchMode(enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,unsigned int * MinPrefetchMode,unsigned int * MaxPrefetchMode)4666 void dml32_CalculateMinAndMaxPrefetchMode(
4667 		enum dm_prefetch_modes   AllowForPStateChangeOrStutterInVBlankFinal,
4668 		unsigned int             *MinPrefetchMode,
4669 		unsigned int             *MaxPrefetchMode)
4670 {
4671 	if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4672 		*MinPrefetchMode = 3;
4673 		*MaxPrefetchMode = 3;
4674 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4675 		*MinPrefetchMode = 2;
4676 		*MaxPrefetchMode = 2;
4677 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4678 		*MinPrefetchMode = 1;
4679 		*MaxPrefetchMode = 1;
4680 	} else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4681 		*MinPrefetchMode = 0;
4682 		*MaxPrefetchMode = 0;
4683 	} else {
4684 		*MinPrefetchMode = 0;
4685 		*MaxPrefetchMode = 3;
4686 	}
4687 } // CalculateMinAndMaxPrefetchMode
4688 
dml32_CalculatePixelDeliveryTimes(unsigned int NumberOfActiveSurfaces,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])4689 void dml32_CalculatePixelDeliveryTimes(
4690 		unsigned int             NumberOfActiveSurfaces,
4691 		double              VRatio[],
4692 		double              VRatioChroma[],
4693 		double              VRatioPrefetchY[],
4694 		double              VRatioPrefetchC[],
4695 		unsigned int             swath_width_luma_ub[],
4696 		unsigned int             swath_width_chroma_ub[],
4697 		unsigned int             DPPPerSurface[],
4698 		double              HRatio[],
4699 		double              HRatioChroma[],
4700 		double              PixelClock[],
4701 		double              PSCL_THROUGHPUT[],
4702 		double              PSCL_THROUGHPUT_CHROMA[],
4703 		double              Dppclk[],
4704 		unsigned int             BytePerPixelC[],
4705 		enum dm_rotation_angle   SourceRotation[],
4706 		unsigned int             NumberOfCursors[],
4707 		unsigned int             CursorWidth[][DC__NUM_CURSOR__MAX],
4708 		unsigned int             CursorBPP[][DC__NUM_CURSOR__MAX],
4709 		unsigned int             BlockWidth256BytesY[],
4710 		unsigned int             BlockHeight256BytesY[],
4711 		unsigned int             BlockWidth256BytesC[],
4712 		unsigned int             BlockHeight256BytesC[],
4713 
4714 		/* Output */
4715 		double              DisplayPipeLineDeliveryTimeLuma[],
4716 		double              DisplayPipeLineDeliveryTimeChroma[],
4717 		double              DisplayPipeLineDeliveryTimeLumaPrefetch[],
4718 		double              DisplayPipeLineDeliveryTimeChromaPrefetch[],
4719 		double              DisplayPipeRequestDeliveryTimeLuma[],
4720 		double              DisplayPipeRequestDeliveryTimeChroma[],
4721 		double              DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4722 		double              DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4723 		double              CursorRequestDeliveryTime[],
4724 		double              CursorRequestDeliveryTimePrefetch[])
4725 {
4726 	double   req_per_swath_ub;
4727 	unsigned int k;
4728 
4729 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4730 
4731 #ifdef __DML_VBA_DEBUG__
4732 		dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4733 		dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4734 		dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4735 		dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4736 		dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4737 		dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4738 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4739 		dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4740 		dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4741 		dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4742 		dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4743 #endif
4744 
4745 		if (VRatio[k] <= 1) {
4746 			DisplayPipeLineDeliveryTimeLuma[k] =
4747 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4748 		} else {
4749 			DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4750 		}
4751 
4752 		if (BytePerPixelC[k] == 0) {
4753 			DisplayPipeLineDeliveryTimeChroma[k] = 0;
4754 		} else {
4755 			if (VRatioChroma[k] <= 1) {
4756 				DisplayPipeLineDeliveryTimeChroma[k] =
4757 					swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4758 			} else {
4759 				DisplayPipeLineDeliveryTimeChroma[k] =
4760 					swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4761 			}
4762 		}
4763 
4764 		if (VRatioPrefetchY[k] <= 1) {
4765 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4766 					swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4767 		} else {
4768 			DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4769 					swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4770 		}
4771 
4772 		if (BytePerPixelC[k] == 0) {
4773 			DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4774 		} else {
4775 			if (VRatioPrefetchC[k] <= 1) {
4776 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4777 						DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4778 			} else {
4779 				DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4780 						swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4781 			}
4782 		}
4783 #ifdef __DML_VBA_DEBUG__
4784 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4785 				__func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4786 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4787 				__func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4788 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4789 				__func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4790 		dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4791 				__func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4792 #endif
4793 	}
4794 
4795 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4796 		if (!IsVertical(SourceRotation[k]))
4797 			req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4798 		else
4799 			req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4800 #ifdef __DML_VBA_DEBUG__
4801 		dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4802 #endif
4803 
4804 		DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4805 		DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4806 				DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4807 		if (BytePerPixelC[k] == 0) {
4808 			DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4809 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4810 		} else {
4811 			if (!IsVertical(SourceRotation[k]))
4812 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4813 			else
4814 				req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4815 #ifdef __DML_VBA_DEBUG__
4816 			dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4817 #endif
4818 			DisplayPipeRequestDeliveryTimeChroma[k] =
4819 					DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4820 			DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4821 					DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4822 		}
4823 #ifdef __DML_VBA_DEBUG__
4824 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4825 				__func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4826 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4827 				__func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4828 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4829 				__func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4830 		dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4831 				__func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4832 #endif
4833 	}
4834 
4835 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4836 		unsigned int cursor_req_per_width;
4837 
4838 		cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4839 				256.0 / 8.0, 1.0);
4840 		if (NumberOfCursors[k] > 0) {
4841 			if (VRatio[k] <= 1) {
4842 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4843 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4844 			} else {
4845 				CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4846 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4847 			}
4848 			if (VRatioPrefetchY[k] <= 1) {
4849 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4850 						HRatio[k] / PixelClock[k] / cursor_req_per_width;
4851 			} else {
4852 				CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4853 						PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4854 			}
4855 		} else {
4856 			CursorRequestDeliveryTime[k] = 0;
4857 			CursorRequestDeliveryTimePrefetch[k] = 0;
4858 		}
4859 #ifdef __DML_VBA_DEBUG__
4860 		dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4861 				__func__, k, NumberOfCursors[k]);
4862 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4863 				__func__, k, CursorRequestDeliveryTime[k]);
4864 		dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4865 				__func__, k, CursorRequestDeliveryTimePrefetch[k]);
4866 #endif
4867 	}
4868 } // CalculatePixelDeliveryTimes
4869 
dml32_CalculateMetaAndPTETimes(bool use_one_row_for_frame[],unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])4870 void dml32_CalculateMetaAndPTETimes(
4871 		bool use_one_row_for_frame[],
4872 		unsigned int NumberOfActiveSurfaces,
4873 		bool GPUVMEnable,
4874 		unsigned int MetaChunkSize,
4875 		unsigned int MinMetaChunkSizeBytes,
4876 		unsigned int    HTotal[],
4877 		double  VRatio[],
4878 		double  VRatioChroma[],
4879 		double  DestinationLinesToRequestRowInVBlank[],
4880 		double  DestinationLinesToRequestRowInImmediateFlip[],
4881 		bool DCCEnable[],
4882 		double  PixelClock[],
4883 		unsigned int BytePerPixelY[],
4884 		unsigned int BytePerPixelC[],
4885 		enum dm_rotation_angle SourceRotation[],
4886 		unsigned int dpte_row_height[],
4887 		unsigned int dpte_row_height_chroma[],
4888 		unsigned int meta_row_width[],
4889 		unsigned int meta_row_width_chroma[],
4890 		unsigned int meta_row_height[],
4891 		unsigned int meta_row_height_chroma[],
4892 		unsigned int meta_req_width[],
4893 		unsigned int meta_req_width_chroma[],
4894 		unsigned int meta_req_height[],
4895 		unsigned int meta_req_height_chroma[],
4896 		unsigned int dpte_group_bytes[],
4897 		unsigned int    PTERequestSizeY[],
4898 		unsigned int    PTERequestSizeC[],
4899 		unsigned int    PixelPTEReqWidthY[],
4900 		unsigned int    PixelPTEReqHeightY[],
4901 		unsigned int    PixelPTEReqWidthC[],
4902 		unsigned int    PixelPTEReqHeightC[],
4903 		unsigned int    dpte_row_width_luma_ub[],
4904 		unsigned int    dpte_row_width_chroma_ub[],
4905 
4906 		/* Output */
4907 		double DST_Y_PER_PTE_ROW_NOM_L[],
4908 		double DST_Y_PER_PTE_ROW_NOM_C[],
4909 		double DST_Y_PER_META_ROW_NOM_L[],
4910 		double DST_Y_PER_META_ROW_NOM_C[],
4911 		double TimePerMetaChunkNominal[],
4912 		double TimePerChromaMetaChunkNominal[],
4913 		double TimePerMetaChunkVBlank[],
4914 		double TimePerChromaMetaChunkVBlank[],
4915 		double TimePerMetaChunkFlip[],
4916 		double TimePerChromaMetaChunkFlip[],
4917 		double time_per_pte_group_nom_luma[],
4918 		double time_per_pte_group_vblank_luma[],
4919 		double time_per_pte_group_flip_luma[],
4920 		double time_per_pte_group_nom_chroma[],
4921 		double time_per_pte_group_vblank_chroma[],
4922 		double time_per_pte_group_flip_chroma[])
4923 {
4924 	unsigned int   meta_chunk_width;
4925 	unsigned int   min_meta_chunk_width;
4926 	unsigned int   meta_chunk_per_row_int;
4927 	unsigned int   meta_row_remainder;
4928 	unsigned int   meta_chunk_threshold;
4929 	unsigned int   meta_chunks_per_row_ub;
4930 	unsigned int   meta_chunk_width_chroma;
4931 	unsigned int   min_meta_chunk_width_chroma;
4932 	unsigned int   meta_chunk_per_row_int_chroma;
4933 	unsigned int   meta_row_remainder_chroma;
4934 	unsigned int   meta_chunk_threshold_chroma;
4935 	unsigned int   meta_chunks_per_row_ub_chroma;
4936 	unsigned int   dpte_group_width_luma;
4937 	unsigned int   dpte_groups_per_row_luma_ub;
4938 	unsigned int   dpte_group_width_chroma;
4939 	unsigned int   dpte_groups_per_row_chroma_ub;
4940 	unsigned int k;
4941 
4942 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4943 		DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4944 		if (BytePerPixelC[k] == 0)
4945 			DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4946 		else
4947 			DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4948 		DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4949 		if (BytePerPixelC[k] == 0)
4950 			DST_Y_PER_META_ROW_NOM_C[k] = 0;
4951 		else
4952 			DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4953 	}
4954 
4955 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4956 		if (DCCEnable[k] == true) {
4957 			meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4958 			min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4959 			meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4960 			meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4961 			if (!IsVertical(SourceRotation[k]))
4962 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4963 			else
4964 				meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4965 
4966 			if (meta_row_remainder <= meta_chunk_threshold)
4967 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4968 			else
4969 				meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4970 
4971 			TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4972 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4973 			TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4974 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4975 			TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4976 					HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4977 			if (BytePerPixelC[k] == 0) {
4978 				TimePerChromaMetaChunkNominal[k] = 0;
4979 				TimePerChromaMetaChunkVBlank[k] = 0;
4980 				TimePerChromaMetaChunkFlip[k] = 0;
4981 			} else {
4982 				meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4983 						meta_row_height_chroma[k];
4984 				min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4985 						meta_row_height_chroma[k];
4986 				meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4987 						meta_chunk_width_chroma;
4988 				meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4989 				if (!IsVertical(SourceRotation[k])) {
4990 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4991 							meta_req_width_chroma[k];
4992 				} else {
4993 					meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4994 							meta_req_height_chroma[k];
4995 				}
4996 				if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4997 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4998 				else
4999 					meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5000 
5001 				TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
5002 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5003 				TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
5004 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5005 				TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5006 						HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5007 			}
5008 		} else {
5009 			TimePerMetaChunkNominal[k] = 0;
5010 			TimePerMetaChunkVBlank[k] = 0;
5011 			TimePerMetaChunkFlip[k] = 0;
5012 			TimePerChromaMetaChunkNominal[k] = 0;
5013 			TimePerChromaMetaChunkVBlank[k] = 0;
5014 			TimePerChromaMetaChunkFlip[k] = 0;
5015 		}
5016 	}
5017 
5018 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5019 		if (GPUVMEnable == true) {
5020 			if (!IsVertical(SourceRotation[k])) {
5021 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5022 						(double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5023 			} else {
5024 				dpte_group_width_luma = (double) dpte_group_bytes[k] /
5025 						(double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5026 			}
5027 
5028 			if (use_one_row_for_frame[k]) {
5029 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5030 						(double) dpte_group_width_luma / 2.0, 1.0);
5031 			} else {
5032 				dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5033 						(double) dpte_group_width_luma, 1.0);
5034 			}
5035 #ifdef __DML_VBA_DEBUG__
5036 			dml_print("DML::%s: k=%0d, use_one_row_for_frame        = %d\n",
5037 					__func__, k, use_one_row_for_frame[k]);
5038 			dml_print("DML::%s: k=%0d, dpte_group_bytes             = %d\n",
5039 					__func__, k, dpte_group_bytes[k]);
5040 			dml_print("DML::%s: k=%0d, PTERequestSizeY              = %d\n",
5041 					__func__, k, PTERequestSizeY[k]);
5042 			dml_print("DML::%s: k=%0d, PixelPTEReqWidthY            = %d\n",
5043 					__func__, k, PixelPTEReqWidthY[k]);
5044 			dml_print("DML::%s: k=%0d, PixelPTEReqHeightY           = %d\n",
5045 					__func__, k, PixelPTEReqHeightY[k]);
5046 			dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub       = %d\n",
5047 					__func__, k, dpte_row_width_luma_ub[k]);
5048 			dml_print("DML::%s: k=%0d, dpte_group_width_luma        = %d\n",
5049 					__func__, k, dpte_group_width_luma);
5050 			dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub  = %d\n",
5051 					__func__, k, dpte_groups_per_row_luma_ub);
5052 #endif
5053 
5054 			time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5055 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5056 			time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5057 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5058 			time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5059 					HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5060 			if (BytePerPixelC[k] == 0) {
5061 				time_per_pte_group_nom_chroma[k] = 0;
5062 				time_per_pte_group_vblank_chroma[k] = 0;
5063 				time_per_pte_group_flip_chroma[k] = 0;
5064 			} else {
5065 				if (!IsVertical(SourceRotation[k])) {
5066 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5067 							(double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5068 				} else {
5069 					dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5070 							(double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5071 				}
5072 
5073 				if (use_one_row_for_frame[k]) {
5074 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5075 							(double) dpte_group_width_chroma / 2.0, 1.0);
5076 				} else {
5077 					dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5078 							(double) dpte_group_width_chroma, 1.0);
5079 				}
5080 #ifdef __DML_VBA_DEBUG__
5081 				dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub        = %d\n",
5082 						__func__, k, dpte_row_width_chroma_ub[k]);
5083 				dml_print("DML::%s: k=%0d, dpte_group_width_chroma        = %d\n",
5084 						__func__, k, dpte_group_width_chroma);
5085 				dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub  = %d\n",
5086 						__func__, k, dpte_groups_per_row_chroma_ub);
5087 #endif
5088 				time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5089 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5090 				time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5091 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5092 				time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5093 						HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5094 			}
5095 		} else {
5096 			time_per_pte_group_nom_luma[k] = 0;
5097 			time_per_pte_group_vblank_luma[k] = 0;
5098 			time_per_pte_group_flip_luma[k] = 0;
5099 			time_per_pte_group_nom_chroma[k] = 0;
5100 			time_per_pte_group_vblank_chroma[k] = 0;
5101 			time_per_pte_group_flip_chroma[k] = 0;
5102 		}
5103 #ifdef __DML_VBA_DEBUG__
5104 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank         = %f\n",
5105 				__func__, k, DestinationLinesToRequestRowInVBlank[k]);
5106 		dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip  = %f\n",
5107 				__func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5108 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L                      = %f\n",
5109 				__func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5110 		dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C                      = %f\n",
5111 				__func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5112 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L                     = %f\n",
5113 				__func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5114 		dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C                     = %f\n",
5115 				__func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5116 		dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal          = %f\n",
5117 				__func__, k, TimePerMetaChunkNominal[k]);
5118 		dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank           = %f\n",
5119 				__func__, k, TimePerMetaChunkVBlank[k]);
5120 		dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip             = %f\n",
5121 				__func__, k, TimePerMetaChunkFlip[k]);
5122 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal    = %f\n",
5123 				__func__, k, TimePerChromaMetaChunkNominal[k]);
5124 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank     = %f\n",
5125 				__func__, k, TimePerChromaMetaChunkVBlank[k]);
5126 		dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip       = %f\n",
5127 				__func__, k, TimePerChromaMetaChunkFlip[k]);
5128 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma      = %f\n",
5129 				__func__, k, time_per_pte_group_nom_luma[k]);
5130 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma   = %f\n",
5131 				__func__, k, time_per_pte_group_vblank_luma[k]);
5132 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma     = %f\n",
5133 				__func__, k, time_per_pte_group_flip_luma[k]);
5134 		dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma    = %f\n",
5135 				__func__, k, time_per_pte_group_nom_chroma[k]);
5136 		dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5137 				__func__, k, time_per_pte_group_vblank_chroma[k]);
5138 		dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma   = %f\n",
5139 				__func__, k, time_per_pte_group_flip_chroma[k]);
5140 #endif
5141 	}
5142 } // CalculateMetaAndPTETimes
5143 
dml32_CalculateVMGroupAndRequestTimes(unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],unsigned int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5144 void dml32_CalculateVMGroupAndRequestTimes(
5145 		unsigned int     NumberOfActiveSurfaces,
5146 		bool     GPUVMEnable,
5147 		unsigned int     GPUVMMaxPageTableLevels,
5148 		unsigned int     HTotal[],
5149 		unsigned int     BytePerPixelC[],
5150 		double      DestinationLinesToRequestVMInVBlank[],
5151 		double      DestinationLinesToRequestVMInImmediateFlip[],
5152 		bool     DCCEnable[],
5153 		double      PixelClock[],
5154 		unsigned int        dpte_row_width_luma_ub[],
5155 		unsigned int        dpte_row_width_chroma_ub[],
5156 		unsigned int     vm_group_bytes[],
5157 		unsigned int     dpde0_bytes_per_frame_ub_l[],
5158 		unsigned int     dpde0_bytes_per_frame_ub_c[],
5159 		unsigned int        meta_pte_bytes_per_frame_ub_l[],
5160 		unsigned int        meta_pte_bytes_per_frame_ub_c[],
5161 
5162 		/* Output */
5163 		double      TimePerVMGroupVBlank[],
5164 		double      TimePerVMGroupFlip[],
5165 		double      TimePerVMRequestVBlank[],
5166 		double      TimePerVMRequestFlip[])
5167 {
5168 	unsigned int k;
5169 	unsigned int   num_group_per_lower_vm_stage;
5170 	unsigned int   num_req_per_lower_vm_stage;
5171 
5172 #ifdef __DML_VBA_DEBUG__
5173 	dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5174 	dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5175 #endif
5176 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5177 
5178 #ifdef __DML_VBA_DEBUG__
5179 		dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5180 		dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5181 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5182 				__func__, k, dpde0_bytes_per_frame_ub_l[k]);
5183 		dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5184 				__func__, k, dpde0_bytes_per_frame_ub_c[k]);
5185 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5186 				__func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5187 		dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5188 				__func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5189 #endif
5190 
5191 		if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5192 			if (DCCEnable[k] == false) {
5193 				if (BytePerPixelC[k] > 0) {
5194 					num_group_per_lower_vm_stage = dml_ceil(
5195 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5196 							(double) (vm_group_bytes[k]), 1.0) +
5197 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5198 							(double) (vm_group_bytes[k]), 1.0);
5199 				} else {
5200 					num_group_per_lower_vm_stage = dml_ceil(
5201 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5202 							(double) (vm_group_bytes[k]), 1.0);
5203 				}
5204 			} else {
5205 				if (GPUVMMaxPageTableLevels == 1) {
5206 					if (BytePerPixelC[k] > 0) {
5207 						num_group_per_lower_vm_stage = dml_ceil(
5208 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5209 							(double) (vm_group_bytes[k]), 1.0) +
5210 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5211 							(double) (vm_group_bytes[k]), 1.0);
5212 					} else {
5213 						num_group_per_lower_vm_stage = dml_ceil(
5214 								(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5215 								(double) (vm_group_bytes[k]), 1.0);
5216 					}
5217 				} else {
5218 					if (BytePerPixelC[k] > 0) {
5219 						num_group_per_lower_vm_stage = 2 + dml_ceil(
5220 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5221 							(double) (vm_group_bytes[k]), 1) +
5222 							dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5223 							(double) (vm_group_bytes[k]), 1) +
5224 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5225 							(double) (vm_group_bytes[k]), 1) +
5226 							dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5227 							(double) (vm_group_bytes[k]), 1);
5228 					} else {
5229 						num_group_per_lower_vm_stage = 1 + dml_ceil(
5230 							(double) (dpde0_bytes_per_frame_ub_l[k]) /
5231 							(double) (vm_group_bytes[k]), 1) + dml_ceil(
5232 							(double) (meta_pte_bytes_per_frame_ub_l[k]) /
5233 							(double) (vm_group_bytes[k]), 1);
5234 					}
5235 				}
5236 			}
5237 
5238 			if (DCCEnable[k] == false) {
5239 				if (BytePerPixelC[k] > 0) {
5240 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5241 							dpde0_bytes_per_frame_ub_c[k] / 64;
5242 				} else {
5243 					num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5244 				}
5245 			} else {
5246 				if (GPUVMMaxPageTableLevels == 1) {
5247 					if (BytePerPixelC[k] > 0) {
5248 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5249 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5250 					} else {
5251 						num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5252 					}
5253 				} else {
5254 					if (BytePerPixelC[k] > 0) {
5255 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5256 								64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5257 								meta_pte_bytes_per_frame_ub_l[k] / 64 +
5258 								meta_pte_bytes_per_frame_ub_c[k] / 64;
5259 					} else {
5260 						num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5261 								64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5262 					}
5263 				}
5264 			}
5265 
5266 			TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5267 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5268 			TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5269 					HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5270 			TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5271 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5272 			TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5273 					HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5274 
5275 			if (GPUVMMaxPageTableLevels > 2) {
5276 				TimePerVMGroupVBlank[k]    = TimePerVMGroupVBlank[k] / 2;
5277 				TimePerVMGroupFlip[k]      = TimePerVMGroupFlip[k] / 2;
5278 				TimePerVMRequestVBlank[k]  = TimePerVMRequestVBlank[k] / 2;
5279 				TimePerVMRequestFlip[k]    = TimePerVMRequestFlip[k] / 2;
5280 			}
5281 
5282 		} else {
5283 			TimePerVMGroupVBlank[k] = 0;
5284 			TimePerVMGroupFlip[k] = 0;
5285 			TimePerVMRequestVBlank[k] = 0;
5286 			TimePerVMRequestFlip[k] = 0;
5287 		}
5288 
5289 #ifdef __DML_VBA_DEBUG__
5290 		dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5291 		dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5292 		dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5293 		dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5294 #endif
5295 	}
5296 } // CalculateVMGroupAndRequestTimes
5297 
dml32_CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dm_rotation_angle SourceRotation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)5298 void dml32_CalculateDCCConfiguration(
5299 		bool             DCCEnabled,
5300 		bool             DCCProgrammingAssumesScanDirectionUnknown,
5301 		enum source_format_class SourcePixelFormat,
5302 		unsigned int             SurfaceWidthLuma,
5303 		unsigned int             SurfaceWidthChroma,
5304 		unsigned int             SurfaceHeightLuma,
5305 		unsigned int             SurfaceHeightChroma,
5306 		unsigned int                nomDETInKByte,
5307 		unsigned int             RequestHeight256ByteLuma,
5308 		unsigned int             RequestHeight256ByteChroma,
5309 		enum dm_swizzle_mode     TilingFormat,
5310 		unsigned int             BytePerPixelY,
5311 		unsigned int             BytePerPixelC,
5312 		double              BytePerPixelDETY,
5313 		double              BytePerPixelDETC,
5314 		enum dm_rotation_angle   SourceRotation,
5315 		/* Output */
5316 		unsigned int        *MaxUncompressedBlockLuma,
5317 		unsigned int        *MaxUncompressedBlockChroma,
5318 		unsigned int        *MaxCompressedBlockLuma,
5319 		unsigned int        *MaxCompressedBlockChroma,
5320 		unsigned int        *IndependentBlockLuma,
5321 		unsigned int        *IndependentBlockChroma)
5322 {
5323 	typedef enum {
5324 		REQ_256Bytes,
5325 		REQ_128BytesNonContiguous,
5326 		REQ_128BytesContiguous,
5327 		REQ_NA
5328 	} RequestType;
5329 
5330 	RequestType   RequestLuma;
5331 	RequestType   RequestChroma;
5332 
5333 	unsigned int   segment_order_horz_contiguous_luma;
5334 	unsigned int   segment_order_horz_contiguous_chroma;
5335 	unsigned int   segment_order_vert_contiguous_luma;
5336 	unsigned int   segment_order_vert_contiguous_chroma;
5337 	unsigned int req128_horz_wc_l;
5338 	unsigned int req128_horz_wc_c;
5339 	unsigned int req128_vert_wc_l;
5340 	unsigned int req128_vert_wc_c;
5341 	unsigned int MAS_vp_horz_limit;
5342 	unsigned int MAS_vp_vert_limit;
5343 	unsigned int max_vp_horz_width;
5344 	unsigned int max_vp_vert_height;
5345 	unsigned int eff_surf_width_l;
5346 	unsigned int eff_surf_width_c;
5347 	unsigned int eff_surf_height_l;
5348 	unsigned int eff_surf_height_c;
5349 	unsigned int full_swath_bytes_horz_wc_l;
5350 	unsigned int full_swath_bytes_horz_wc_c;
5351 	unsigned int full_swath_bytes_vert_wc_l;
5352 	unsigned int full_swath_bytes_vert_wc_c;
5353 	unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5354 
5355 	unsigned int   yuv420;
5356 	unsigned int   horz_div_l;
5357 	unsigned int   horz_div_c;
5358 	unsigned int   vert_div_l;
5359 	unsigned int   vert_div_c;
5360 
5361 	unsigned int     swath_buf_size;
5362 	double   detile_buf_vp_horz_limit;
5363 	double   detile_buf_vp_vert_limit;
5364 
5365 	yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5366 			SourcePixelFormat == dm_420_12) ? 1 : 0);
5367 	horz_div_l = 1;
5368 	horz_div_c = 1;
5369 	vert_div_l = 1;
5370 	vert_div_c = 1;
5371 
5372 	if (BytePerPixelY == 1)
5373 		vert_div_l = 0;
5374 	if (BytePerPixelC == 1)
5375 		vert_div_c = 0;
5376 
5377 	if (BytePerPixelC == 0) {
5378 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5379 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5380 				BytePerPixelY / (1 + horz_div_l));
5381 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5382 				(1 + vert_div_l));
5383 	} else {
5384 		swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5385 		detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5386 				BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5387 				BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5388 		detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5389 				(1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5390 				(1 + vert_div_c) / (1 + yuv420));
5391 	}
5392 
5393 	if (SourcePixelFormat == dm_420_10) {
5394 		detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5395 		detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5396 	}
5397 
5398 	detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5399 	detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5400 
5401 	MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5402 	MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5403 	max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5404 	max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5405 	eff_surf_width_l =  (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5406 	eff_surf_width_c =  eff_surf_width_l / (1 + yuv420);
5407 	eff_surf_height_l =  (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5408 	eff_surf_height_c =  eff_surf_height_l / (1 + yuv420);
5409 
5410 	full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5411 	full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5412 	if (BytePerPixelC > 0) {
5413 		full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5414 		full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5415 	} else {
5416 		full_swath_bytes_horz_wc_c = 0;
5417 		full_swath_bytes_vert_wc_c = 0;
5418 	}
5419 
5420 	if (SourcePixelFormat == dm_420_10) {
5421 		full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5422 		full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5423 		full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5424 		full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5425 	}
5426 
5427 	if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5428 		req128_horz_wc_l = 0;
5429 		req128_horz_wc_c = 0;
5430 	} else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5431 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5432 		req128_horz_wc_l = 0;
5433 		req128_horz_wc_c = 1;
5434 	} else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5435 			full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5436 		req128_horz_wc_l = 1;
5437 		req128_horz_wc_c = 0;
5438 	} else {
5439 		req128_horz_wc_l = 1;
5440 		req128_horz_wc_c = 1;
5441 	}
5442 
5443 	if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5444 		req128_vert_wc_l = 0;
5445 		req128_vert_wc_c = 0;
5446 	} else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5447 			full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5448 		req128_vert_wc_l = 0;
5449 		req128_vert_wc_c = 1;
5450 	} else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5451 			full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5452 		req128_vert_wc_l = 1;
5453 		req128_vert_wc_c = 0;
5454 	} else {
5455 		req128_vert_wc_l = 1;
5456 		req128_vert_wc_c = 1;
5457 	}
5458 
5459 	if (BytePerPixelY == 2) {
5460 		segment_order_horz_contiguous_luma = 0;
5461 		segment_order_vert_contiguous_luma = 1;
5462 	} else {
5463 		segment_order_horz_contiguous_luma = 1;
5464 		segment_order_vert_contiguous_luma = 0;
5465 	}
5466 
5467 	if (BytePerPixelC == 2) {
5468 		segment_order_horz_contiguous_chroma = 0;
5469 		segment_order_vert_contiguous_chroma = 1;
5470 	} else {
5471 		segment_order_horz_contiguous_chroma = 1;
5472 		segment_order_vert_contiguous_chroma = 0;
5473 	}
5474 #ifdef __DML_VBA_DEBUG__
5475 	dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5476 	dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5477 	dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5478 	dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5479 	dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5480 	dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5481 	dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5482 	dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5483 	dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5484 			__func__, segment_order_horz_contiguous_chroma);
5485 #endif
5486 
5487 	if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5488 		if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5489 			RequestLuma = REQ_256Bytes;
5490 		else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5491 				(req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5492 			RequestLuma = REQ_128BytesNonContiguous;
5493 		else
5494 			RequestLuma = REQ_128BytesContiguous;
5495 
5496 		if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5497 			RequestChroma = REQ_256Bytes;
5498 		else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5499 				(req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5500 			RequestChroma = REQ_128BytesNonContiguous;
5501 		else
5502 			RequestChroma = REQ_128BytesContiguous;
5503 
5504 	} else if (!IsVertical(SourceRotation)) {
5505 		if (req128_horz_wc_l == 0)
5506 			RequestLuma = REQ_256Bytes;
5507 		else if (segment_order_horz_contiguous_luma == 0)
5508 			RequestLuma = REQ_128BytesNonContiguous;
5509 		else
5510 			RequestLuma = REQ_128BytesContiguous;
5511 
5512 		if (req128_horz_wc_c == 0)
5513 			RequestChroma = REQ_256Bytes;
5514 		else if (segment_order_horz_contiguous_chroma == 0)
5515 			RequestChroma = REQ_128BytesNonContiguous;
5516 		else
5517 			RequestChroma = REQ_128BytesContiguous;
5518 
5519 	} else {
5520 		if (req128_vert_wc_l == 0)
5521 			RequestLuma = REQ_256Bytes;
5522 		else if (segment_order_vert_contiguous_luma == 0)
5523 			RequestLuma = REQ_128BytesNonContiguous;
5524 		else
5525 			RequestLuma = REQ_128BytesContiguous;
5526 
5527 		if (req128_vert_wc_c == 0)
5528 			RequestChroma = REQ_256Bytes;
5529 		else if (segment_order_vert_contiguous_chroma == 0)
5530 			RequestChroma = REQ_128BytesNonContiguous;
5531 		else
5532 			RequestChroma = REQ_128BytesContiguous;
5533 	}
5534 
5535 	if (RequestLuma == REQ_256Bytes) {
5536 		*MaxUncompressedBlockLuma = 256;
5537 		*MaxCompressedBlockLuma = 256;
5538 		*IndependentBlockLuma = 0;
5539 	} else if (RequestLuma == REQ_128BytesContiguous) {
5540 		*MaxUncompressedBlockLuma = 256;
5541 		*MaxCompressedBlockLuma = 128;
5542 		*IndependentBlockLuma = 128;
5543 	} else {
5544 		*MaxUncompressedBlockLuma = 256;
5545 		*MaxCompressedBlockLuma = 64;
5546 		*IndependentBlockLuma = 64;
5547 	}
5548 
5549 	if (RequestChroma == REQ_256Bytes) {
5550 		*MaxUncompressedBlockChroma = 256;
5551 		*MaxCompressedBlockChroma = 256;
5552 		*IndependentBlockChroma = 0;
5553 	} else if (RequestChroma == REQ_128BytesContiguous) {
5554 		*MaxUncompressedBlockChroma = 256;
5555 		*MaxCompressedBlockChroma = 128;
5556 		*IndependentBlockChroma = 128;
5557 	} else {
5558 		*MaxUncompressedBlockChroma = 256;
5559 		*MaxCompressedBlockChroma = 64;
5560 		*IndependentBlockChroma = 64;
5561 	}
5562 
5563 	if (DCCEnabled != true || BytePerPixelC == 0) {
5564 		*MaxUncompressedBlockChroma = 0;
5565 		*MaxCompressedBlockChroma = 0;
5566 		*IndependentBlockChroma = 0;
5567 	}
5568 
5569 	if (DCCEnabled != true) {
5570 		*MaxUncompressedBlockLuma = 0;
5571 		*MaxCompressedBlockLuma = 0;
5572 		*IndependentBlockLuma = 0;
5573 	}
5574 
5575 #ifdef __DML_VBA_DEBUG__
5576 	dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5577 	dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5578 	dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5579 	dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5580 	dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5581 	dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5582 #endif
5583 
5584 } // CalculateDCCConfiguration
5585 
dml32_CalculateStutterEfficiency(unsigned int CompressedBufferSizeInkByte,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool UnboundedRequestEnabled,unsigned int MetaFIFOSizeInKEntries,unsigned int ZeroSizeBufferEntries,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,unsigned int CompbufReservedSpace64B,unsigned int CompbufReservedSpaceZs,double SRExitTime,double SRExitZ8Time,bool SynchronizeTimingsFinal,unsigned int BlendingAndTiming[],double StutterEnterPlusExitWatermark,double Z8StutterEnterPlusExitWatermark,bool ProgressiveToInterlaceUnitInOPP,bool Interlace[],double MinTTUVBlank[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeY[],unsigned int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double NetDCCRateLuma[],double NetDCCRateChroma[],double DCCFractionOfZeroSizeRequestsLuma[],double DCCFractionOfZeroSizeRequestsChroma[],unsigned int HTotal[],unsigned int VTotal[],double PixelClock[],double VRatio[],enum dm_rotation_angle SourceRotation[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesC[],unsigned int BlockWidth256BytesC[],unsigned int DCCYMaxUncompressedBlock[],unsigned int DCCCMaxUncompressedBlock[],unsigned int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthSurfaceLuma[],double ReadBandwidthSurfaceChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,unsigned int * NumberOfStutterBurstsPerFrame,double * Z8StutterEfficiencyNotIncludingVBlank,double * Z8StutterEfficiency,unsigned int * Z8NumberOfStutterBurstsPerFrame,double * StutterPeriod,bool * DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)5586 void dml32_CalculateStutterEfficiency(
5587 		unsigned int      CompressedBufferSizeInkByte,
5588 		enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5589 		bool   UnboundedRequestEnabled,
5590 		unsigned int      MetaFIFOSizeInKEntries,
5591 		unsigned int      ZeroSizeBufferEntries,
5592 		unsigned int      PixelChunkSizeInKByte,
5593 		unsigned int   NumberOfActiveSurfaces,
5594 		unsigned int      ROBBufferSizeInKByte,
5595 		double    TotalDataReadBandwidth,
5596 		double    DCFCLK,
5597 		double    ReturnBW,
5598 		unsigned int      CompbufReservedSpace64B,
5599 		unsigned int      CompbufReservedSpaceZs,
5600 		double    SRExitTime,
5601 		double    SRExitZ8Time,
5602 		bool   SynchronizeTimingsFinal,
5603 		unsigned int   BlendingAndTiming[],
5604 		double    StutterEnterPlusExitWatermark,
5605 		double    Z8StutterEnterPlusExitWatermark,
5606 		bool   ProgressiveToInterlaceUnitInOPP,
5607 		bool   Interlace[],
5608 		double    MinTTUVBlank[],
5609 		unsigned int   DPPPerSurface[],
5610 		unsigned int      DETBufferSizeY[],
5611 		unsigned int   BytePerPixelY[],
5612 		double    BytePerPixelDETY[],
5613 		double      SwathWidthY[],
5614 		unsigned int   SwathHeightY[],
5615 		unsigned int   SwathHeightC[],
5616 		double    NetDCCRateLuma[],
5617 		double    NetDCCRateChroma[],
5618 		double    DCCFractionOfZeroSizeRequestsLuma[],
5619 		double    DCCFractionOfZeroSizeRequestsChroma[],
5620 		unsigned int      HTotal[],
5621 		unsigned int      VTotal[],
5622 		double    PixelClock[],
5623 		double    VRatio[],
5624 		enum dm_rotation_angle SourceRotation[],
5625 		unsigned int   BlockHeight256BytesY[],
5626 		unsigned int   BlockWidth256BytesY[],
5627 		unsigned int   BlockHeight256BytesC[],
5628 		unsigned int   BlockWidth256BytesC[],
5629 		unsigned int   DCCYMaxUncompressedBlock[],
5630 		unsigned int   DCCCMaxUncompressedBlock[],
5631 		unsigned int      VActive[],
5632 		bool   DCCEnable[],
5633 		bool   WritebackEnable[],
5634 		double    ReadBandwidthSurfaceLuma[],
5635 		double    ReadBandwidthSurfaceChroma[],
5636 		double    meta_row_bw[],
5637 		double    dpte_row_bw[],
5638 
5639 		/* Output */
5640 		double   *StutterEfficiencyNotIncludingVBlank,
5641 		double   *StutterEfficiency,
5642 		unsigned int     *NumberOfStutterBurstsPerFrame,
5643 		double   *Z8StutterEfficiencyNotIncludingVBlank,
5644 		double   *Z8StutterEfficiency,
5645 		unsigned int     *Z8NumberOfStutterBurstsPerFrame,
5646 		double   *StutterPeriod,
5647 		bool  *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5648 {
5649 
5650 	bool FoundCriticalSurface = false;
5651 	unsigned int SwathSizeCriticalSurface = 0;
5652 	unsigned int LastChunkOfSwathSize;
5653 	unsigned int MissingPartOfLastSwathOfDETSize;
5654 	double LastZ8StutterPeriod = 0.0;
5655 	double LastStutterPeriod = 0.0;
5656 	unsigned int TotalNumberOfActiveOTG = 0;
5657 	double doublePixelClock = 0;
5658 	unsigned int doubleHTotal = 0;
5659 	unsigned int doubleVTotal = 0;
5660 	bool SameTiming = true;
5661 	double DETBufferingTimeY;
5662 	double SwathWidthYCriticalSurface = 0.0;
5663 	double SwathHeightYCriticalSurface = 0.0;
5664 	double VActiveTimeCriticalSurface = 0.0;
5665 	double FrameTimeCriticalSurface = 0.0;
5666 	unsigned int BytePerPixelYCriticalSurface = 0;
5667 	double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5668 	unsigned int DETBufferSizeYCriticalSurface = 0;
5669 	double MinTTUVBlankCriticalSurface = 0.0;
5670 	unsigned int BlockWidth256BytesYCriticalSurface = 0;
5671 	bool doublePlaneCriticalSurface = 0;
5672 	bool doublePipeCriticalSurface = 0;
5673 	double TotalCompressedReadBandwidth;
5674 	double TotalRowReadBandwidth;
5675 	double AverageDCCCompressionRate;
5676 	double EffectiveCompressedBufferSize;
5677 	double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5678 	double StutterBurstTime;
5679 	unsigned int TotalActiveWriteback;
5680 	double LinesInDETY;
5681 	double LinesInDETYRoundedDownToSwath;
5682 	double MaximumEffectiveCompressionLuma;
5683 	double MaximumEffectiveCompressionChroma;
5684 	double TotalZeroSizeRequestReadBandwidth;
5685 	double TotalZeroSizeCompressedReadBandwidth;
5686 	double AverageDCCZeroSizeFraction;
5687 	double AverageZeroSizeCompressionRate;
5688 	unsigned int k;
5689 
5690 	TotalZeroSizeRequestReadBandwidth = 0;
5691 	TotalZeroSizeCompressedReadBandwidth = 0;
5692 	TotalRowReadBandwidth = 0;
5693 	TotalCompressedReadBandwidth = 0;
5694 
5695 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5696 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5697 			if (DCCEnable[k] == true) {
5698 				if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5699 						|| (!IsVertical(SourceRotation[k])
5700 								&& BlockHeight256BytesY[k] > SwathHeightY[k])
5701 						|| DCCYMaxUncompressedBlock[k] < 256) {
5702 					MaximumEffectiveCompressionLuma = 2;
5703 				} else {
5704 					MaximumEffectiveCompressionLuma = 4;
5705 				}
5706 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5707 						+ ReadBandwidthSurfaceLuma[k]
5708 								/ dml_min(NetDCCRateLuma[k],
5709 										MaximumEffectiveCompressionLuma);
5710 #ifdef __DML_VBA_DEBUG__
5711 				dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5712 						__func__, k, ReadBandwidthSurfaceLuma[k]);
5713 				dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5714 						__func__, k, NetDCCRateLuma[k]);
5715 				dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5716 						__func__, k, MaximumEffectiveCompressionLuma);
5717 #endif
5718 				TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5719 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5720 				TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5721 						+ ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5722 								/ MaximumEffectiveCompressionLuma;
5723 
5724 				if (ReadBandwidthSurfaceChroma[k] > 0) {
5725 					if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5726 							|| (!IsVertical(SourceRotation[k])
5727 									&& BlockHeight256BytesC[k] > SwathHeightC[k])
5728 							|| DCCCMaxUncompressedBlock[k] < 256) {
5729 						MaximumEffectiveCompressionChroma = 2;
5730 					} else {
5731 						MaximumEffectiveCompressionChroma = 4;
5732 					}
5733 					TotalCompressedReadBandwidth =
5734 							TotalCompressedReadBandwidth
5735 							+ ReadBandwidthSurfaceChroma[k]
5736 							/ dml_min(NetDCCRateChroma[k],
5737 							MaximumEffectiveCompressionChroma);
5738 #ifdef __DML_VBA_DEBUG__
5739 					dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5740 							__func__, k, ReadBandwidthSurfaceChroma[k]);
5741 					dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5742 							__func__, k, NetDCCRateChroma[k]);
5743 					dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5744 							__func__, k, MaximumEffectiveCompressionChroma);
5745 #endif
5746 					TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5747 							+ ReadBandwidthSurfaceChroma[k]
5748 									* DCCFractionOfZeroSizeRequestsChroma[k];
5749 					TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5750 							+ ReadBandwidthSurfaceChroma[k]
5751 									* DCCFractionOfZeroSizeRequestsChroma[k]
5752 									/ MaximumEffectiveCompressionChroma;
5753 				}
5754 			} else {
5755 				TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5756 						+ ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5757 			}
5758 			TotalRowReadBandwidth = TotalRowReadBandwidth
5759 					+ DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5760 		}
5761 	}
5762 
5763 	AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5764 	AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5765 
5766 #ifdef __DML_VBA_DEBUG__
5767 	dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5768 	dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5769 	dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5770 	dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5771 			__func__, TotalZeroSizeCompressedReadBandwidth);
5772 	dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5773 	dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5774 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5775 	dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5776 	dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5777 	dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5778 	dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5779 #endif
5780 	if (AverageDCCZeroSizeFraction == 1) {
5781 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5782 				/ TotalZeroSizeCompressedReadBandwidth;
5783 		EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5784 				* AverageZeroSizeCompressionRate
5785 				+ ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5786 						* AverageZeroSizeCompressionRate;
5787 	} else if (AverageDCCZeroSizeFraction > 0) {
5788 		AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5789 				/ TotalZeroSizeCompressedReadBandwidth;
5790 		EffectiveCompressedBufferSize = dml_min(
5791 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5792 				(double) MetaFIFOSizeInKEntries * 1024 * 64
5793 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5794 					+ 1 / AverageDCCCompressionRate))
5795 					+ dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5796 					* AverageDCCCompressionRate,
5797 					((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5798 					/ (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5799 
5800 #ifdef __DML_VBA_DEBUG__
5801 		dml_print("DML::%s: min 1 = %f\n", __func__,
5802 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5803 		dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5804 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5805 						AverageDCCCompressionRate));
5806 		dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5807 				CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5808 		dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5809 				(AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5810 #endif
5811 	} else {
5812 		EffectiveCompressedBufferSize = dml_min(
5813 				(double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5814 				(double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5815 				+ ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5816 						* AverageDCCCompressionRate;
5817 
5818 #ifdef __DML_VBA_DEBUG__
5819 		dml_print("DML::%s: min 1 = %f\n", __func__,
5820 				CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5821 		dml_print("DML::%s: min 2 = %f\n", __func__,
5822 				MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5823 #endif
5824 	}
5825 
5826 #ifdef __DML_VBA_DEBUG__
5827 	dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5828 	dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5829 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5830 #endif
5831 
5832 	*StutterPeriod = 0;
5833 
5834 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5835 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5836 			LinesInDETY = ((double) DETBufferSizeY[k]
5837 					+ (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5838 							* ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5839 					/ BytePerPixelDETY[k] / SwathWidthY[k];
5840 			LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5841 			DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5842 					/ VRatio[k];
5843 #ifdef __DML_VBA_DEBUG__
5844 			dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5845 			dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5846 			dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5847 			dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5848 					__func__, k, ReadBandwidthSurfaceLuma[k]);
5849 			dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5850 			dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5851 			dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5852 					__func__, k, LinesInDETYRoundedDownToSwath);
5853 			dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5854 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5855 			dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5856 			dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5857 			dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5858 #endif
5859 
5860 			if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5861 				bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5862 
5863 				FoundCriticalSurface = true;
5864 				*StutterPeriod = DETBufferingTimeY;
5865 				FrameTimeCriticalSurface = (
5866 						isInterlaceTiming ?
5867 								dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5868 						* (double) HTotal[k] / PixelClock[k];
5869 				VActiveTimeCriticalSurface = (
5870 						isInterlaceTiming ?
5871 								dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5872 						* (double) HTotal[k] / PixelClock[k];
5873 				BytePerPixelYCriticalSurface = BytePerPixelY[k];
5874 				SwathWidthYCriticalSurface = SwathWidthY[k];
5875 				SwathHeightYCriticalSurface = SwathHeightY[k];
5876 				BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5877 				LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5878 						- (LinesInDETY - LinesInDETYRoundedDownToSwath);
5879 				DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5880 				MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5881 				doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5882 				doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5883 
5884 #ifdef __DML_VBA_DEBUG__
5885 				dml_print("DML::%s: k=%0d, FoundCriticalSurface                = %d\n",
5886 						__func__, k, FoundCriticalSurface);
5887 				dml_print("DML::%s: k=%0d, StutterPeriod                       = %f\n",
5888 						__func__, k, *StutterPeriod);
5889 				dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface         = %f\n",
5890 						__func__, k, MinTTUVBlankCriticalSurface);
5891 				dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface            = %f\n",
5892 						__func__, k, FrameTimeCriticalSurface);
5893 				dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface          = %f\n",
5894 						__func__, k, VActiveTimeCriticalSurface);
5895 				dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface        = %d\n",
5896 						__func__, k, BytePerPixelYCriticalSurface);
5897 				dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface          = %f\n",
5898 						__func__, k, SwathWidthYCriticalSurface);
5899 				dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface         = %f\n",
5900 						__func__, k, SwathHeightYCriticalSurface);
5901 				dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface  = %d\n",
5902 						__func__, k, BlockWidth256BytesYCriticalSurface);
5903 				dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface          = %d\n",
5904 						__func__, k, doublePlaneCriticalSurface);
5905 				dml_print("DML::%s: k=%0d, doublePipeCriticalSurface           = %d\n",
5906 						__func__, k, doublePipeCriticalSurface);
5907 				dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5908 						__func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5909 #endif
5910 			}
5911 		}
5912 	}
5913 
5914 	PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5915 			EffectiveCompressedBufferSize);
5916 #ifdef __DML_VBA_DEBUG__
5917 	dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5918 	dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5919 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5920 			__func__, *StutterPeriod * TotalDataReadBandwidth);
5921 	dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5922 	dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5923 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5924 	dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5925 	dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5926 	dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5927 	dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5928 #endif
5929 
5930 	StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5931 			/ ReturnBW
5932 			+ (*StutterPeriod * TotalDataReadBandwidth
5933 					- PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5934 			+ *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5935 #ifdef __DML_VBA_DEBUG__
5936 	dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5937 			AverageDCCCompressionRate / ReturnBW);
5938 	dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5939 			__func__, (*StutterPeriod * TotalDataReadBandwidth));
5940 	dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5941 			PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5942 	dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5943 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5944 #endif
5945 	StutterBurstTime = dml_max(StutterBurstTime,
5946 			LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5947 					* SwathWidthYCriticalSurface / ReturnBW);
5948 
5949 #ifdef __DML_VBA_DEBUG__
5950 	dml_print("DML::%s: Time to finish residue swath=%f\n",
5951 			__func__,
5952 			LinesToFinishSwathTransferStutterCriticalSurface *
5953 			BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5954 #endif
5955 
5956 	TotalActiveWriteback = 0;
5957 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5958 		if (WritebackEnable[k])
5959 			TotalActiveWriteback = TotalActiveWriteback + 1;
5960 	}
5961 
5962 	if (TotalActiveWriteback == 0) {
5963 #ifdef __DML_VBA_DEBUG__
5964 		dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5965 		dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5966 		dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5967 		dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5968 #endif
5969 		*StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5970 				1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5971 		*Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5972 				1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5973 		*NumberOfStutterBurstsPerFrame = (
5974 				*StutterEfficiencyNotIncludingVBlank > 0 ?
5975 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5976 		*Z8NumberOfStutterBurstsPerFrame = (
5977 				*Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5978 						dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5979 	} else {
5980 		*StutterEfficiencyNotIncludingVBlank = 0.;
5981 		*Z8StutterEfficiencyNotIncludingVBlank = 0.;
5982 		*NumberOfStutterBurstsPerFrame = 0;
5983 		*Z8NumberOfStutterBurstsPerFrame = 0;
5984 	}
5985 #ifdef __DML_VBA_DEBUG__
5986 	dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5987 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5988 			__func__, *StutterEfficiencyNotIncludingVBlank);
5989 	dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5990 			__func__, *Z8StutterEfficiencyNotIncludingVBlank);
5991 	dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5992 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5993 #endif
5994 
5995 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5996 		if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5997 			if (BlendingAndTiming[k] == k) {
5998 				if (TotalNumberOfActiveOTG == 0) {
5999 					doublePixelClock = PixelClock[k];
6000 					doubleHTotal = HTotal[k];
6001 					doubleVTotal = VTotal[k];
6002 				} else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
6003 						|| doubleVTotal != VTotal[k]) {
6004 					SameTiming = false;
6005 				}
6006 				TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6007 			}
6008 		}
6009 	}
6010 
6011 	if (*StutterEfficiencyNotIncludingVBlank > 0) {
6012 		LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6013 
6014 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6015 				&& LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6016 			*StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6017 						+ StutterBurstTime * VActiveTimeCriticalSurface
6018 						/ *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6019 		} else {
6020 			*StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6021 		}
6022 	} else {
6023 		*StutterEfficiency = 0;
6024 	}
6025 
6026 	if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6027 		LastZ8StutterPeriod = VActiveTimeCriticalSurface
6028 				- (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6029 		if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6030 				MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6031 			*Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6032 				* VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6033 		} else {
6034 			*Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6035 		}
6036 	} else {
6037 		*Z8StutterEfficiency = 0.;
6038 	}
6039 
6040 #ifdef __DML_VBA_DEBUG__
6041 	dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6042 	dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6043 	dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6044 	dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6045 	dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6046 	dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6047 	dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6048 			__func__, *StutterEfficiencyNotIncludingVBlank);
6049 	dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6050 #endif
6051 
6052 	SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6053 			* dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6054 	LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6055 	MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6056 			- DETBufferSizeYCriticalSurface;
6057 
6058 	*DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6059 			&& doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6060 			&& (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6061 			&& (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6062 
6063 #ifdef __DML_VBA_DEBUG__
6064 	dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6065 	dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6066 	dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6067 	dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6068 #endif
6069 } // CalculateStutterEfficiency
6070 
dml32_CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,bool nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)6071 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6072 		unsigned int    ConfigReturnBufferSizeInKByte,
6073 		unsigned int    ROBBufferSizeInKByte,
6074 		unsigned int MaxNumDPP,
6075 		bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6076 		unsigned int nomDETInKByteOverrideValue,  // VBA_DELTA
6077 
6078 		/* Output */
6079 		unsigned int *MaxTotalDETInKByte,
6080 		unsigned int *nomDETInKByte,
6081 		unsigned int *MinCompressedBufferSizeInKByte)
6082 {
6083 	bool     det_buff_size_override_en  = nomDETInKByteOverrideEnable;
6084 	unsigned int        det_buff_size_override_val = nomDETInKByteOverrideValue;
6085 
6086 	*MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6087 			(double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6088 	*nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6089 	*MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6090 
6091 #ifdef __DML_VBA_DEBUG__
6092 	dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6093 	dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6094 	dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6095 	dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6096 	dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6097 	dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6098 #endif
6099 
6100 	if (det_buff_size_override_en) {
6101 		*nomDETInKByte = det_buff_size_override_val;
6102 #ifdef __DML_VBA_DEBUG__
6103 		dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6104 #endif
6105 	}
6106 } // CalculateMaxDETAndMinCompressedBufferSize
6107 
dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[])6108 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6109 		double ReturnBW,
6110 		bool NotUrgentLatencyHiding[],
6111 		double ReadBandwidthLuma[],
6112 		double ReadBandwidthChroma[],
6113 		double cursor_bw[],
6114 		double meta_row_bandwidth[],
6115 		double dpte_row_bandwidth[],
6116 		unsigned int NumberOfDPP[],
6117 		double UrgentBurstFactorLuma[],
6118 		double UrgentBurstFactorChroma[],
6119 		double UrgentBurstFactorCursor[])
6120 {
6121 	unsigned int k;
6122 	bool NotEnoughUrgentLatencyHiding = false;
6123 	bool CalculateVActiveBandwithSupport_val = false;
6124 	double VActiveBandwith = 0;
6125 
6126 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6127 		if (NotUrgentLatencyHiding[k]) {
6128 			NotEnoughUrgentLatencyHiding = true;
6129 		}
6130 	}
6131 
6132 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6133 		VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6134 	}
6135 
6136 	CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6137 
6138 #ifdef __DML_VBA_DEBUG__
6139 dml_print("DML::%s: NotEnoughUrgentLatencyHiding        = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6140 dml_print("DML::%s: VActiveBandwith                     = %f\n", __func__, VActiveBandwith);
6141 dml_print("DML::%s: ReturnBW                            = %f\n", __func__, ReturnBW);
6142 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6143 #endif
6144 	return CalculateVActiveBandwithSupport_val;
6145 }
6146 
dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double PrefetchBW[],double VRatio[],double MaxVRatioPre,double * MaxPrefetchBandwidth,double * FractionOfUrgentBandwidth,bool * PrefetchBandwidthSupport)6147 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6148 		double ReturnBW,
6149 		bool NotUrgentLatencyHiding[],
6150 		double ReadBandwidthLuma[],
6151 		double ReadBandwidthChroma[],
6152 		double PrefetchBandwidthLuma[],
6153 		double PrefetchBandwidthChroma[],
6154 		double cursor_bw[],
6155 		double meta_row_bandwidth[],
6156 		double dpte_row_bandwidth[],
6157 		double cursor_bw_pre[],
6158 		double prefetch_vmrow_bw[],
6159 		unsigned int NumberOfDPP[],
6160 		double UrgentBurstFactorLuma[],
6161 		double UrgentBurstFactorChroma[],
6162 		double UrgentBurstFactorCursor[],
6163 		double UrgentBurstFactorLumaPre[],
6164 		double UrgentBurstFactorChromaPre[],
6165 		double UrgentBurstFactorCursorPre[],
6166 		double PrefetchBW[],
6167 		double VRatio[],
6168 		double MaxVRatioPre,
6169 
6170 		/* output */
6171 		double  *MaxPrefetchBandwidth,
6172 		double  *FractionOfUrgentBandwidth,
6173 		bool *PrefetchBandwidthSupport)
6174 {
6175 	unsigned int k;
6176 	double ActiveBandwidthPerSurface;
6177 	bool NotEnoughUrgentLatencyHiding = false;
6178 	double TotalActiveBandwidth = 0;
6179 	double TotalPrefetchBandwidth = 0;
6180 
6181 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6182 		if (NotUrgentLatencyHiding[k]) {
6183 			NotEnoughUrgentLatencyHiding = true;
6184 		}
6185 	}
6186 
6187 	*MaxPrefetchBandwidth = 0;
6188 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6189 		ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6190 
6191 		TotalActiveBandwidth += ActiveBandwidthPerSurface;
6192 
6193 		TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6194 
6195 		*MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6196 				ActiveBandwidthPerSurface,
6197 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6198 	}
6199 
6200 	if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6201 		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6202 	else
6203 		*PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6204 
6205 	*FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6206 }
6207 
dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,double ReturnBW,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double cursor_bw_pre[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])6208 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6209 		double ReturnBW,
6210 		double ReadBandwidthLuma[],
6211 		double ReadBandwidthChroma[],
6212 		double PrefetchBandwidthLuma[],
6213 		double PrefetchBandwidthChroma[],
6214 		double cursor_bw[],
6215 		double cursor_bw_pre[],
6216 		unsigned int NumberOfDPP[],
6217 		double UrgentBurstFactorLuma[],
6218 		double UrgentBurstFactorChroma[],
6219 		double UrgentBurstFactorCursor[],
6220 		double UrgentBurstFactorLumaPre[],
6221 		double UrgentBurstFactorChromaPre[],
6222 		double UrgentBurstFactorCursorPre[])
6223 {
6224 	unsigned int k;
6225 	double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6226 
6227 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6228 		CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6229 				NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6230 	}
6231 
6232 	return CalculateBandwidthAvailableForImmediateFlip_val;
6233 }
6234 
dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,enum immediate_flip_requirement ImmediateFlipRequirement[],double final_flip_bw[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double * TotalBandwidth,double * FractionOfUrgentBandwidth,bool * ImmediateFlipBandwidthSupport)6235 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6236 		double ReturnBW,
6237 		enum immediate_flip_requirement ImmediateFlipRequirement[],
6238 		double final_flip_bw[],
6239 		double ReadBandwidthLuma[],
6240 		double ReadBandwidthChroma[],
6241 		double PrefetchBandwidthLuma[],
6242 		double PrefetchBandwidthChroma[],
6243 		double cursor_bw[],
6244 		double meta_row_bandwidth[],
6245 		double dpte_row_bandwidth[],
6246 		double cursor_bw_pre[],
6247 		double prefetch_vmrow_bw[],
6248 		unsigned int NumberOfDPP[],
6249 		double UrgentBurstFactorLuma[],
6250 		double UrgentBurstFactorChroma[],
6251 		double UrgentBurstFactorCursor[],
6252 		double UrgentBurstFactorLumaPre[],
6253 		double UrgentBurstFactorChromaPre[],
6254 		double UrgentBurstFactorCursorPre[],
6255 
6256 		/* output */
6257 		double  *TotalBandwidth,
6258 		double  *FractionOfUrgentBandwidth,
6259 		bool *ImmediateFlipBandwidthSupport)
6260 {
6261 	unsigned int k;
6262 	*TotalBandwidth = 0;
6263 	for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6264 		if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6265 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6266 					NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6267 					NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6268 		} else {
6269 			*TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6270 					NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6271 					NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6272 		}
6273 	}
6274 	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6275 	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6276 }
6277 
dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,double ReturnBW,double UrgentLatency,unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],double BytePerPixelInDETY[],double BytePerPixelInDETC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int NumOfDPP[],unsigned int HTotal[],double PixelClock[],double VRatioY[],double VRatioC[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],enum unbounded_requesting_policy UseUnboundedRequesting)6278 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6279 		double ReturnBW,
6280 		double UrgentLatency,
6281 		unsigned int SwathHeightY[],
6282 		unsigned int SwathHeightC[],
6283 		unsigned int SwathWidthY[],
6284 		unsigned int SwathWidthC[],
6285 		double  BytePerPixelInDETY[],
6286 		double  BytePerPixelInDETC[],
6287 		unsigned int    DETBufferSizeY[],
6288 		unsigned int    DETBufferSizeC[],
6289 		unsigned int	NumOfDPP[],
6290 		unsigned int	HTotal[],
6291 		double	PixelClock[],
6292 		double	VRatioY[],
6293 		double	VRatioC[],
6294 		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6295 		enum unbounded_requesting_policy UseUnboundedRequesting)
6296 {
6297 	int k;
6298 	double SwathSizeAllSurfaces = 0;
6299 	double SwathSizeAllSurfacesInFetchTimeUs;
6300 	double DETSwathLatencyHidingUs;
6301 	double DETSwathLatencyHidingYUs;
6302 	double DETSwathLatencyHidingCUs;
6303 	double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6304 	double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6305 	bool NotEnoughDETSwathFillLatencyHiding = false;
6306 
6307 	if (UseUnboundedRequesting == dm_unbounded_requesting)
6308 		return false;
6309 
6310 	/* calculate sum of single swath size for all pipes in bytes */
6311 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6312 		SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6313 
6314 		if (SwathHeightC[k] != 0)
6315 			SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6316 		else
6317 			SwathSizePerSurfaceC[k] = 0;
6318 
6319 		SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6320 	}
6321 
6322 	SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6323 
6324 	/* ensure all DET - 1 swath can hide a fetch for all surfaces */
6325 	for (k = 0; k < NumberOfActiveSurfaces; k++) {
6326 		double LineTime = HTotal[k] / PixelClock[k];
6327 
6328 		/* only care if surface is not phantom */
6329 		if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6330 			DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6331 
6332 			if (SwathHeightC[k] != 0) {
6333 				DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6334 
6335 				DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6336 			} else {
6337 				DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6338 			}
6339 
6340 			/* DET must be able to hide time to fetch 1 swath for each surface */
6341 			if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6342 				NotEnoughDETSwathFillLatencyHiding = true;
6343 				break;
6344 			}
6345 		}
6346 	}
6347 
6348 	return NotEnoughDETSwathFillLatencyHiding;
6349 }
6350