1 /*
2 * Copyright 2022 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25 #include "display_mode_vba_util_32.h"
26 #include "../dml_inline_defs.h"
27 #include "display_mode_vba_32.h"
28 #include "../display_mode_lib.h"
29
30 #define DCN32_MAX_FMT_420_BUFFER_WIDTH 4096
31
dml32_dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)32 unsigned int dml32_dscceComputeDelay(
33 unsigned int bpc,
34 double BPP,
35 unsigned int sliceWidth,
36 unsigned int numSlices,
37 enum output_format_class pixelFormat,
38 enum output_encoder_class Output)
39 {
40 // valid bpc = source bits per component in the set of {8, 10, 12}
41 // valid bpp = increments of 1/16 of a bit
42 // min = 6/7/8 in N420/N422/444, respectively
43 // max = such that compression is 1:1
44 //valid sliceWidth = number of pixels per slice line,
45 // must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
46 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
47 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
48
49 // fixed value
50 unsigned int rcModelSize = 8192;
51
52 // N422/N420 operate at 2 pixels per clock
53 unsigned int pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
54 Delay, pixels;
55
56 if (pixelFormat == dm_420)
57 pixelsPerClock = 2;
58 else if (pixelFormat == dm_n422)
59 pixelsPerClock = 2;
60 // #all other modes operate at 1 pixel per clock
61 else
62 pixelsPerClock = 1;
63
64 //initial transmit delay as per PPS
65 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
66
67 //compute ssm delay
68 if (bpc == 8)
69 D = 81;
70 else if (bpc == 10)
71 D = 89;
72 else
73 D = 113;
74
75 //divide by pixel per cycle to compute slice width as seen by DSC
76 w = sliceWidth / pixelsPerClock;
77
78 //422 mode has an additional cycle of delay
79 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
80 s = 0;
81 else
82 s = 1;
83
84 //main calculation for the dscce
85 ix = initalXmitDelay + 45;
86 wx = (w + 2) / 3;
87 p = 3 * wx - w;
88 l0 = ix / w;
89 a = ix + p * l0;
90 ax = (a + 2) / 3 + D + 6 + 1;
91 L = (ax + wx - 1) / wx;
92 if ((ix % w) == 0 && p != 0)
93 lstall = 1;
94 else
95 lstall = 0;
96 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
97
98 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
99 pixels = Delay * 3 * pixelsPerClock;
100
101 #ifdef __DML_VBA_DEBUG__
102 dml_print("DML::%s: bpc: %d\n", __func__, bpc);
103 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
104 dml_print("DML::%s: sliceWidth: %d\n", __func__, sliceWidth);
105 dml_print("DML::%s: numSlices: %d\n", __func__, numSlices);
106 dml_print("DML::%s: pixelFormat: %d\n", __func__, pixelFormat);
107 dml_print("DML::%s: Output: %d\n", __func__, Output);
108 dml_print("DML::%s: pixels: %d\n", __func__, pixels);
109 #endif
110
111 return pixels;
112 }
113
dml32_dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)114 unsigned int dml32_dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
115 {
116 unsigned int Delay = 0;
117
118 if (pixelFormat == dm_420) {
119 // sfr
120 Delay = Delay + 2;
121 // dsccif
122 Delay = Delay + 0;
123 // dscc - input deserializer
124 Delay = Delay + 3;
125 // dscc gets pixels every other cycle
126 Delay = Delay + 2;
127 // dscc - input cdc fifo
128 Delay = Delay + 12;
129 // dscc gets pixels every other cycle
130 Delay = Delay + 13;
131 // dscc - cdc uncertainty
132 Delay = Delay + 2;
133 // dscc - output cdc fifo
134 Delay = Delay + 7;
135 // dscc gets pixels every other cycle
136 Delay = Delay + 3;
137 // dscc - cdc uncertainty
138 Delay = Delay + 2;
139 // dscc - output serializer
140 Delay = Delay + 1;
141 // sft
142 Delay = Delay + 1;
143 } else if (pixelFormat == dm_n422 || (pixelFormat != dm_444)) {
144 // sfr
145 Delay = Delay + 2;
146 // dsccif
147 Delay = Delay + 1;
148 // dscc - input deserializer
149 Delay = Delay + 5;
150 // dscc - input cdc fifo
151 Delay = Delay + 25;
152 // dscc - cdc uncertainty
153 Delay = Delay + 2;
154 // dscc - output cdc fifo
155 Delay = Delay + 10;
156 // dscc - cdc uncertainty
157 Delay = Delay + 2;
158 // dscc - output serializer
159 Delay = Delay + 1;
160 // sft
161 Delay = Delay + 1;
162 } else {
163 // sfr
164 Delay = Delay + 2;
165 // dsccif
166 Delay = Delay + 0;
167 // dscc - input deserializer
168 Delay = Delay + 3;
169 // dscc - input cdc fifo
170 Delay = Delay + 12;
171 // dscc - cdc uncertainty
172 Delay = Delay + 2;
173 // dscc - output cdc fifo
174 Delay = Delay + 7;
175 // dscc - output serializer
176 Delay = Delay + 1;
177 // dscc - cdc uncertainty
178 Delay = Delay + 2;
179 // sft
180 Delay = Delay + 1;
181 }
182
183 return Delay;
184 }
185
186
IsVertical(enum dm_rotation_angle Scan)187 bool IsVertical(enum dm_rotation_angle Scan)
188 {
189 bool is_vert = false;
190
191 if (Scan == dm_rotation_90 || Scan == dm_rotation_90m || Scan == dm_rotation_270 || Scan == dm_rotation_270m)
192 is_vert = true;
193 else
194 is_vert = false;
195 return is_vert;
196 }
197
dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)198 void dml32_CalculateSinglePipeDPPCLKAndSCLThroughput(
199 double HRatio,
200 double HRatioChroma,
201 double VRatio,
202 double VRatioChroma,
203 double MaxDCHUBToPSCLThroughput,
204 double MaxPSCLToLBThroughput,
205 double PixelClock,
206 enum source_format_class SourcePixelFormat,
207 unsigned int HTaps,
208 unsigned int HTapsChroma,
209 unsigned int VTaps,
210 unsigned int VTapsChroma,
211
212 /* output */
213 double *PSCL_THROUGHPUT,
214 double *PSCL_THROUGHPUT_CHROMA,
215 double *DPPCLKUsingSingleDPP)
216 {
217 double DPPCLKUsingSingleDPPLuma;
218 double DPPCLKUsingSingleDPPChroma;
219
220 if (HRatio > 1) {
221 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio /
222 dml_ceil((double) HTaps / 6.0, 1.0));
223 } else {
224 *PSCL_THROUGHPUT = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
225 }
226
227 DPPCLKUsingSingleDPPLuma = PixelClock * dml_max3(VTaps / 6 * dml_min(1, HRatio), HRatio * VRatio /
228 *PSCL_THROUGHPUT, 1);
229
230 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
231 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
232
233 if ((SourcePixelFormat != dm_420_8 && SourcePixelFormat != dm_420_10 && SourcePixelFormat != dm_420_12 &&
234 SourcePixelFormat != dm_rgbe_alpha)) {
235 *PSCL_THROUGHPUT_CHROMA = 0;
236 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
237 } else {
238 if (HRatioChroma > 1) {
239 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput *
240 HRatioChroma / dml_ceil((double) HTapsChroma / 6.0, 1.0));
241 } else {
242 *PSCL_THROUGHPUT_CHROMA = dml_min(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
243 }
244 DPPCLKUsingSingleDPPChroma = PixelClock * dml_max3(VTapsChroma / 6 * dml_min(1, HRatioChroma),
245 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
246 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
247 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
248 *DPPCLKUsingSingleDPP = dml_max(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
249 }
250 }
251
dml32_CalculateBytePerPixelAndBlockSizes(enum source_format_class SourcePixelFormat,enum dm_swizzle_mode SurfaceTiling,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC)252 void dml32_CalculateBytePerPixelAndBlockSizes(
253 enum source_format_class SourcePixelFormat,
254 enum dm_swizzle_mode SurfaceTiling,
255
256 /* Output */
257 unsigned int *BytePerPixelY,
258 unsigned int *BytePerPixelC,
259 double *BytePerPixelDETY,
260 double *BytePerPixelDETC,
261 unsigned int *BlockHeight256BytesY,
262 unsigned int *BlockHeight256BytesC,
263 unsigned int *BlockWidth256BytesY,
264 unsigned int *BlockWidth256BytesC,
265 unsigned int *MacroTileHeightY,
266 unsigned int *MacroTileHeightC,
267 unsigned int *MacroTileWidthY,
268 unsigned int *MacroTileWidthC)
269 {
270 if (SourcePixelFormat == dm_444_64) {
271 *BytePerPixelDETY = 8;
272 *BytePerPixelDETC = 0;
273 *BytePerPixelY = 8;
274 *BytePerPixelC = 0;
275 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) {
276 *BytePerPixelDETY = 4;
277 *BytePerPixelDETC = 0;
278 *BytePerPixelY = 4;
279 *BytePerPixelC = 0;
280 } else if (SourcePixelFormat == dm_444_16) {
281 *BytePerPixelDETY = 2;
282 *BytePerPixelDETC = 0;
283 *BytePerPixelY = 2;
284 *BytePerPixelC = 0;
285 } else if (SourcePixelFormat == dm_444_8) {
286 *BytePerPixelDETY = 1;
287 *BytePerPixelDETC = 0;
288 *BytePerPixelY = 1;
289 *BytePerPixelC = 0;
290 } else if (SourcePixelFormat == dm_rgbe_alpha) {
291 *BytePerPixelDETY = 4;
292 *BytePerPixelDETC = 1;
293 *BytePerPixelY = 4;
294 *BytePerPixelC = 1;
295 } else if (SourcePixelFormat == dm_420_8) {
296 *BytePerPixelDETY = 1;
297 *BytePerPixelDETC = 2;
298 *BytePerPixelY = 1;
299 *BytePerPixelC = 2;
300 } else if (SourcePixelFormat == dm_420_12) {
301 *BytePerPixelDETY = 2;
302 *BytePerPixelDETC = 4;
303 *BytePerPixelY = 2;
304 *BytePerPixelC = 4;
305 } else {
306 *BytePerPixelDETY = 4.0 / 3;
307 *BytePerPixelDETC = 8.0 / 3;
308 *BytePerPixelY = 2;
309 *BytePerPixelC = 4;
310 }
311 #ifdef __DML_VBA_DEBUG__
312 dml_print("DML::%s: SourcePixelFormat = %d\n", __func__, SourcePixelFormat);
313 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
314 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
315 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, *BytePerPixelY);
316 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, *BytePerPixelC);
317 #endif
318 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32
319 || SourcePixelFormat == dm_444_16
320 || SourcePixelFormat == dm_444_8
321 || SourcePixelFormat == dm_mono_16
322 || SourcePixelFormat == dm_mono_8
323 || SourcePixelFormat == dm_rgbe)) {
324 if (SurfaceTiling == dm_sw_linear)
325 *BlockHeight256BytesY = 1;
326 else if (SourcePixelFormat == dm_444_64)
327 *BlockHeight256BytesY = 4;
328 else if (SourcePixelFormat == dm_444_8)
329 *BlockHeight256BytesY = 16;
330 else
331 *BlockHeight256BytesY = 8;
332
333 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
334 *BlockHeight256BytesC = 0;
335 *BlockWidth256BytesC = 0;
336 } else {
337 if (SurfaceTiling == dm_sw_linear) {
338 *BlockHeight256BytesY = 1;
339 *BlockHeight256BytesC = 1;
340 } else if (SourcePixelFormat == dm_rgbe_alpha) {
341 *BlockHeight256BytesY = 8;
342 *BlockHeight256BytesC = 16;
343 } else if (SourcePixelFormat == dm_420_8) {
344 *BlockHeight256BytesY = 16;
345 *BlockHeight256BytesC = 8;
346 } else {
347 *BlockHeight256BytesY = 8;
348 *BlockHeight256BytesC = 8;
349 }
350 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
351 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
352 }
353 #ifdef __DML_VBA_DEBUG__
354 dml_print("DML::%s: BlockWidth256BytesY = %d\n", __func__, *BlockWidth256BytesY);
355 dml_print("DML::%s: BlockHeight256BytesY = %d\n", __func__, *BlockHeight256BytesY);
356 dml_print("DML::%s: BlockWidth256BytesC = %d\n", __func__, *BlockWidth256BytesC);
357 dml_print("DML::%s: BlockHeight256BytesC = %d\n", __func__, *BlockHeight256BytesC);
358 #endif
359
360 if (SurfaceTiling == dm_sw_linear) {
361 *MacroTileHeightY = *BlockHeight256BytesY;
362 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
363 *MacroTileHeightC = *BlockHeight256BytesC;
364 if (*MacroTileHeightC == 0)
365 *MacroTileWidthC = 0;
366 else
367 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
368 } else if (SurfaceTiling == dm_sw_64kb_d || SurfaceTiling == dm_sw_64kb_d_t ||
369 SurfaceTiling == dm_sw_64kb_d_x || SurfaceTiling == dm_sw_64kb_r_x) {
370 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
371 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
372 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
373 if (*MacroTileHeightC == 0)
374 *MacroTileWidthC = 0;
375 else
376 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
377 } else {
378 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
379 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
380 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
381 if (*MacroTileHeightC == 0)
382 *MacroTileWidthC = 0;
383 else
384 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
385 }
386
387 #ifdef __DML_VBA_DEBUG__
388 dml_print("DML::%s: MacroTileWidthY = %d\n", __func__, *MacroTileWidthY);
389 dml_print("DML::%s: MacroTileHeightY = %d\n", __func__, *MacroTileHeightY);
390 dml_print("DML::%s: MacroTileWidthC = %d\n", __func__, *MacroTileWidthC);
391 dml_print("DML::%s: MacroTileHeightC = %d\n", __func__, *MacroTileHeightC);
392 #endif
393 } // CalculateBytePerPixelAndBlockSizes
394
dml32_CalculateSwathAndDETConfiguration(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int ConfigReturnBufferSizeInKByte,unsigned int MaxTotalDETInKByte,unsigned int MinCompressedBufferSizeInKByte,double ForceSingleDPP,unsigned int NumberOfActiveSurfaces,unsigned int nomDETInKByte,enum unbounded_requesting_policy UseUnboundedRequestingFinal,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,unsigned int PixelChunkSizeKBytes,unsigned int ROBSizeKBytes,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum output_encoder_class Output[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double MaximumSwathWidthLuma[],double MaximumSwathWidthChroma[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],enum source_format_class SourcePixelFormat[],enum dm_swizzle_mode SurfaceTiling[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],enum odm_combine_mode ODMMode[],unsigned int BlendingAndTiming[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],double BytePerPixDETY[],double BytePerPixDETC[],unsigned int HActive[],double HRatio[],double HRatioChroma[],unsigned int DPPPerSurface[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double SwathWidth[],double SwathWidthChroma[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int DETBufferSizeInKByte[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],bool * UnboundedRequestEnabled,unsigned int * CompressedBufferSizeInkByte,unsigned int * CompBufReservedSpaceKBytes,bool * CompBufReservedSpaceNeedAdjustment,bool ViewportSizeSupportPerSurface[],bool * ViewportSizeSupport)395 void dml32_CalculateSwathAndDETConfiguration(
396 unsigned int DETSizeOverride[],
397 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
398 unsigned int ConfigReturnBufferSizeInKByte,
399 unsigned int MaxTotalDETInKByte,
400 unsigned int MinCompressedBufferSizeInKByte,
401 double ForceSingleDPP,
402 unsigned int NumberOfActiveSurfaces,
403 unsigned int nomDETInKByte,
404 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
405 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment,
406 unsigned int PixelChunkSizeKBytes,
407 unsigned int ROBSizeKBytes,
408 unsigned int CompressedBufferSegmentSizeInkByteFinal,
409 enum output_encoder_class Output[],
410 double ReadBandwidthLuma[],
411 double ReadBandwidthChroma[],
412 double MaximumSwathWidthLuma[],
413 double MaximumSwathWidthChroma[],
414 enum dm_rotation_angle SourceRotation[],
415 bool ViewportStationary[],
416 enum source_format_class SourcePixelFormat[],
417 enum dm_swizzle_mode SurfaceTiling[],
418 unsigned int ViewportWidth[],
419 unsigned int ViewportHeight[],
420 unsigned int ViewportXStart[],
421 unsigned int ViewportYStart[],
422 unsigned int ViewportXStartC[],
423 unsigned int ViewportYStartC[],
424 unsigned int SurfaceWidthY[],
425 unsigned int SurfaceWidthC[],
426 unsigned int SurfaceHeightY[],
427 unsigned int SurfaceHeightC[],
428 unsigned int Read256BytesBlockHeightY[],
429 unsigned int Read256BytesBlockHeightC[],
430 unsigned int Read256BytesBlockWidthY[],
431 unsigned int Read256BytesBlockWidthC[],
432 enum odm_combine_mode ODMMode[],
433 unsigned int BlendingAndTiming[],
434 unsigned int BytePerPixY[],
435 unsigned int BytePerPixC[],
436 double BytePerPixDETY[],
437 double BytePerPixDETC[],
438 unsigned int HActive[],
439 double HRatio[],
440 double HRatioChroma[],
441 unsigned int DPPPerSurface[],
442
443 /* Output */
444 unsigned int swath_width_luma_ub[],
445 unsigned int swath_width_chroma_ub[],
446 double SwathWidth[],
447 double SwathWidthChroma[],
448 unsigned int SwathHeightY[],
449 unsigned int SwathHeightC[],
450 unsigned int DETBufferSizeInKByte[],
451 unsigned int DETBufferSizeY[],
452 unsigned int DETBufferSizeC[],
453 bool *UnboundedRequestEnabled,
454 unsigned int *CompressedBufferSizeInkByte,
455 unsigned int *CompBufReservedSpaceKBytes,
456 bool *CompBufReservedSpaceNeedAdjustment,
457 bool ViewportSizeSupportPerSurface[],
458 bool *ViewportSizeSupport)
459 {
460 unsigned int MaximumSwathHeightY[DC__NUM_DPP__MAX];
461 unsigned int MaximumSwathHeightC[DC__NUM_DPP__MAX];
462 unsigned int RoundedUpMaxSwathSizeBytesY[DC__NUM_DPP__MAX];
463 unsigned int RoundedUpMaxSwathSizeBytesC[DC__NUM_DPP__MAX];
464 unsigned int RoundedUpSwathSizeBytesY;
465 unsigned int RoundedUpSwathSizeBytesC;
466 double SwathWidthdoubleDPP[DC__NUM_DPP__MAX];
467 double SwathWidthdoubleDPPChroma[DC__NUM_DPP__MAX];
468 unsigned int k;
469 unsigned int TotalActiveDPP = 0;
470 bool NoChromaSurfaces = true;
471 unsigned int DETBufferSizeInKByteForSwathCalculation;
472
473 #ifdef __DML_VBA_DEBUG__
474 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
475 dml_print("DML::%s: ROBSizeKBytes = %d\n", __func__, ROBSizeKBytes);
476 dml_print("DML::%s: PixelChunkSizeKBytes = %d\n", __func__, PixelChunkSizeKBytes);
477 #endif
478 dml32_CalculateSwathWidth(ForceSingleDPP,
479 NumberOfActiveSurfaces,
480 SourcePixelFormat,
481 SourceRotation,
482 ViewportStationary,
483 ViewportWidth,
484 ViewportHeight,
485 ViewportXStart,
486 ViewportYStart,
487 ViewportXStartC,
488 ViewportYStartC,
489 SurfaceWidthY,
490 SurfaceWidthC,
491 SurfaceHeightY,
492 SurfaceHeightC,
493 ODMMode,
494 BytePerPixY,
495 BytePerPixC,
496 Read256BytesBlockHeightY,
497 Read256BytesBlockHeightC,
498 Read256BytesBlockWidthY,
499 Read256BytesBlockWidthC,
500 BlendingAndTiming,
501 HActive,
502 HRatio,
503 DPPPerSurface,
504
505 /* Output */
506 SwathWidthdoubleDPP,
507 SwathWidthdoubleDPPChroma,
508 SwathWidth,
509 SwathWidthChroma,
510 MaximumSwathHeightY,
511 MaximumSwathHeightC,
512 swath_width_luma_ub,
513 swath_width_chroma_ub);
514
515 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
516 RoundedUpMaxSwathSizeBytesY[k] = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
517 RoundedUpMaxSwathSizeBytesC[k] = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
518 #ifdef __DML_VBA_DEBUG__
519 dml_print("DML::%s: k=%0d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
520 dml_print("DML::%s: k=%0d swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
521 dml_print("DML::%s: k=%0d BytePerPixDETY = %f\n", __func__, k, BytePerPixDETY[k]);
522 dml_print("DML::%s: k=%0d MaximumSwathHeightY = %d\n", __func__, k, MaximumSwathHeightY[k]);
523 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
524 RoundedUpMaxSwathSizeBytesY[k]);
525 dml_print("DML::%s: k=%0d swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
526 dml_print("DML::%s: k=%0d BytePerPixDETC = %f\n", __func__, k, BytePerPixDETC[k]);
527 dml_print("DML::%s: k=%0d MaximumSwathHeightC = %d\n", __func__, k, MaximumSwathHeightC[k]);
528 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
529 RoundedUpMaxSwathSizeBytesC[k]);
530 #endif
531
532 if (SourcePixelFormat[k] == dm_420_10) {
533 RoundedUpMaxSwathSizeBytesY[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesY[k], 256);
534 RoundedUpMaxSwathSizeBytesC[k] = dml_ceil((unsigned int) RoundedUpMaxSwathSizeBytesC[k], 256);
535 }
536 }
537
538 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
539 TotalActiveDPP = TotalActiveDPP + (ForceSingleDPP ? 1 : DPPPerSurface[k]);
540 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
541 SourcePixelFormat[k] == dm_420_12 || SourcePixelFormat[k] == dm_rgbe_alpha) {
542 NoChromaSurfaces = false;
543 }
544 }
545
546 // By default, just set the reserved space to 2 pixel chunks size
547 *CompBufReservedSpaceKBytes = PixelChunkSizeKBytes * 2;
548
549 // if unbounded req is enabled, program reserved space such that the ROB will not hold more than 8 swaths worth of data
550 // - assume worst-case compression rate of 4. [ROB size - 8 * swath_size / max_compression ratio]
551 // - assume for "narrow" vp case in which the ROB can fit 8 swaths, the DET should be big enough to do full size req
552 *CompBufReservedSpaceNeedAdjustment = ((int) ROBSizeKBytes - (int) *CompBufReservedSpaceKBytes) > (int) (RoundedUpMaxSwathSizeBytesY[0]/512);
553
554 if (*CompBufReservedSpaceNeedAdjustment == 1) {
555 *CompBufReservedSpaceKBytes = ROBSizeKBytes - RoundedUpMaxSwathSizeBytesY[0]/512;
556 }
557
558 #ifdef __DML_VBA_DEBUG__
559 dml_print("DML::%s: CompBufReservedSpaceKBytes = %d\n", __func__, *CompBufReservedSpaceKBytes);
560 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, *CompBufReservedSpaceNeedAdjustment);
561 #endif
562
563 *UnboundedRequestEnabled = dml32_UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaSurfaces, Output[0], SurfaceTiling[0], *CompBufReservedSpaceNeedAdjustment, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
564
565 dml32_CalculateDETBufferSize(DETSizeOverride,
566 UseMALLForPStateChange,
567 ForceSingleDPP,
568 NumberOfActiveSurfaces,
569 *UnboundedRequestEnabled,
570 nomDETInKByte,
571 MaxTotalDETInKByte,
572 ConfigReturnBufferSizeInKByte,
573 MinCompressedBufferSizeInKByte,
574 CompressedBufferSegmentSizeInkByteFinal,
575 SourcePixelFormat,
576 ReadBandwidthLuma,
577 ReadBandwidthChroma,
578 RoundedUpMaxSwathSizeBytesY,
579 RoundedUpMaxSwathSizeBytesC,
580 DPPPerSurface,
581
582 /* Output */
583 DETBufferSizeInKByte, // per hubp pipe
584 CompressedBufferSizeInkByte);
585
586 #ifdef __DML_VBA_DEBUG__
587 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
588 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
589 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
590 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
591 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
592 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
593 #endif
594
595 *ViewportSizeSupport = true;
596 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
597
598 DETBufferSizeInKByteForSwathCalculation = (UseMALLForPStateChange[k] ==
599 dm_use_mall_pstate_change_phantom_pipe ? 1024 : DETBufferSizeInKByte[k]);
600 #ifdef __DML_VBA_DEBUG__
601 dml_print("DML::%s: k=%0d DETBufferSizeInKByteForSwathCalculation = %d\n", __func__, k,
602 DETBufferSizeInKByteForSwathCalculation);
603 #endif
604
605 if (RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] <=
606 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
607 SwathHeightY[k] = MaximumSwathHeightY[k];
608 SwathHeightC[k] = MaximumSwathHeightC[k];
609 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
610 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
611 } else if (RoundedUpMaxSwathSizeBytesY[k] >= 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
612 RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] <=
613 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
614 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
615 SwathHeightC[k] = MaximumSwathHeightC[k];
616 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
617 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k];
618 } else if (RoundedUpMaxSwathSizeBytesY[k] < 1.5 * RoundedUpMaxSwathSizeBytesC[k] &&
619 RoundedUpMaxSwathSizeBytesY[k] + RoundedUpMaxSwathSizeBytesC[k] / 2 <=
620 DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
621 SwathHeightY[k] = MaximumSwathHeightY[k];
622 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
623 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k];
624 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
625 } else {
626 SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
627 SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
628 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY[k] / 2;
629 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC[k] / 2;
630 }
631
632 if ((RoundedUpMaxSwathSizeBytesY[k] / 2 + RoundedUpMaxSwathSizeBytesC[k] / 2 >
633 DETBufferSizeInKByteForSwathCalculation * 1024 / 2)
634 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 &&
635 SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
636 *ViewportSizeSupport = false;
637 ViewportSizeSupportPerSurface[k] = false;
638 } else {
639 ViewportSizeSupportPerSurface[k] = true;
640 }
641
642 if (SwathHeightC[k] == 0) {
643 #ifdef __DML_VBA_DEBUG__
644 dml_print("DML::%s: k=%0d All DET for plane0\n", __func__, k);
645 #endif
646 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024;
647 DETBufferSizeC[k] = 0;
648 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
649 #ifdef __DML_VBA_DEBUG__
650 dml_print("DML::%s: k=%0d Half DET for plane0, half for plane1\n", __func__, k);
651 #endif
652 DETBufferSizeY[k] = DETBufferSizeInKByte[k] * 1024 / 2;
653 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 / 2;
654 } else {
655 #ifdef __DML_VBA_DEBUG__
656 dml_print("DML::%s: k=%0d 2/3 DET for plane0, 1/3 for plane1\n", __func__, k);
657 #endif
658 DETBufferSizeY[k] = dml_floor(DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024);
659 DETBufferSizeC[k] = DETBufferSizeInKByte[k] * 1024 - DETBufferSizeY[k];
660 }
661
662 #ifdef __DML_VBA_DEBUG__
663 dml_print("DML::%s: k=%0d SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
664 dml_print("DML::%s: k=%0d SwathHeightC = %d\n", __func__, k, SwathHeightC[k]);
665 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__,
666 k, RoundedUpMaxSwathSizeBytesY[k]);
667 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__,
668 k, RoundedUpMaxSwathSizeBytesC[k]);
669 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesY = %d\n", __func__, k, RoundedUpSwathSizeBytesY);
670 dml_print("DML::%s: k=%0d RoundedUpSwathSizeBytesC = %d\n", __func__, k, RoundedUpSwathSizeBytesC);
671 dml_print("DML::%s: k=%0d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
672 dml_print("DML::%s: k=%0d DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
673 dml_print("DML::%s: k=%0d DETBufferSizeC = %d\n", __func__, k, DETBufferSizeC[k]);
674 dml_print("DML::%s: k=%0d ViewportSizeSupportPerSurface = %d\n", __func__, k,
675 ViewportSizeSupportPerSurface[k]);
676 #endif
677
678 }
679 } // CalculateSwathAndDETConfiguration
680
dml32_CalculateSwathWidth(bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum source_format_class SourcePixelFormat[],enum dm_rotation_angle SourceRotation[],bool ViewportStationary[],unsigned int ViewportWidth[],unsigned int ViewportHeight[],unsigned int ViewportXStart[],unsigned int ViewportYStart[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],enum odm_combine_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int BlendingAndTiming[],unsigned int HActive[],double HRatio[],unsigned int DPPPerSurface[],double SwathWidthdoubleDPPY[],double SwathWidthdoubleDPPC[],double SwathWidthY[],double SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])681 void dml32_CalculateSwathWidth(
682 bool ForceSingleDPP,
683 unsigned int NumberOfActiveSurfaces,
684 enum source_format_class SourcePixelFormat[],
685 enum dm_rotation_angle SourceRotation[],
686 bool ViewportStationary[],
687 unsigned int ViewportWidth[],
688 unsigned int ViewportHeight[],
689 unsigned int ViewportXStart[],
690 unsigned int ViewportYStart[],
691 unsigned int ViewportXStartC[],
692 unsigned int ViewportYStartC[],
693 unsigned int SurfaceWidthY[],
694 unsigned int SurfaceWidthC[],
695 unsigned int SurfaceHeightY[],
696 unsigned int SurfaceHeightC[],
697 enum odm_combine_mode ODMMode[],
698 unsigned int BytePerPixY[],
699 unsigned int BytePerPixC[],
700 unsigned int Read256BytesBlockHeightY[],
701 unsigned int Read256BytesBlockHeightC[],
702 unsigned int Read256BytesBlockWidthY[],
703 unsigned int Read256BytesBlockWidthC[],
704 unsigned int BlendingAndTiming[],
705 unsigned int HActive[],
706 double HRatio[],
707 unsigned int DPPPerSurface[],
708
709 /* Output */
710 double SwathWidthdoubleDPPY[],
711 double SwathWidthdoubleDPPC[],
712 double SwathWidthY[], // per-pipe
713 double SwathWidthC[], // per-pipe
714 unsigned int MaximumSwathHeightY[],
715 unsigned int MaximumSwathHeightC[],
716 unsigned int swath_width_luma_ub[], // per-pipe
717 unsigned int swath_width_chroma_ub[]) // per-pipe
718 {
719 unsigned int k, j;
720 enum odm_combine_mode MainSurfaceODMMode;
721
722 unsigned int surface_width_ub_l;
723 unsigned int surface_height_ub_l;
724 unsigned int surface_width_ub_c = 0;
725 unsigned int surface_height_ub_c = 0;
726
727 #ifdef __DML_VBA_DEBUG__
728 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
729 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
730 #endif
731
732 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
733 if (!IsVertical(SourceRotation[k]))
734 SwathWidthdoubleDPPY[k] = ViewportWidth[k];
735 else
736 SwathWidthdoubleDPPY[k] = ViewportHeight[k];
737
738 #ifdef __DML_VBA_DEBUG__
739 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
740 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
741 #endif
742
743 MainSurfaceODMMode = ODMMode[k];
744 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
745 if (BlendingAndTiming[k] == j)
746 MainSurfaceODMMode = ODMMode[j];
747 }
748
749 if (ForceSingleDPP) {
750 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
751 } else {
752 if (MainSurfaceODMMode == dm_odm_combine_mode_4to1) {
753 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
754 dml_round(HActive[k] / 4.0 * HRatio[k]));
755 } else if (MainSurfaceODMMode == dm_odm_combine_mode_2to1) {
756 SwathWidthY[k] = dml_min(SwathWidthdoubleDPPY[k],
757 dml_round(HActive[k] / 2.0 * HRatio[k]));
758 } else if (DPPPerSurface[k] == 2) {
759 SwathWidthY[k] = SwathWidthdoubleDPPY[k] / 2;
760 } else {
761 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
762 }
763 }
764
765 #ifdef __DML_VBA_DEBUG__
766 dml_print("DML::%s: k=%d HActive=%d\n", __func__, k, HActive[k]);
767 dml_print("DML::%s: k=%d HRatio=%f\n", __func__, k, HRatio[k]);
768 dml_print("DML::%s: k=%d MainSurfaceODMMode=%d\n", __func__, k, MainSurfaceODMMode);
769 dml_print("DML::%s: k=%d SwathWidthdoubleDPPY=%d\n", __func__, k, SwathWidthdoubleDPPY[k]);
770 dml_print("DML::%s: k=%d SwathWidthY=%d\n", __func__, k, SwathWidthY[k]);
771 #endif
772
773 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
774 SourcePixelFormat[k] == dm_420_12) {
775 SwathWidthC[k] = SwathWidthY[k] / 2;
776 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k] / 2;
777 } else {
778 SwathWidthC[k] = SwathWidthY[k];
779 SwathWidthdoubleDPPC[k] = SwathWidthdoubleDPPY[k];
780 }
781
782 if (ForceSingleDPP == true) {
783 SwathWidthY[k] = SwathWidthdoubleDPPY[k];
784 SwathWidthC[k] = SwathWidthdoubleDPPC[k];
785 }
786
787 surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
788 surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
789
790 if (!IsVertical(SourceRotation[k])) {
791 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
792 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
793 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
794 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
795 dml_floor(ViewportXStart[k] +
796 SwathWidthY[k] +
797 Read256BytesBlockWidthY[k] - 1,
798 Read256BytesBlockWidthY[k]) -
799 dml_floor(ViewportXStart[k],
800 Read256BytesBlockWidthY[k]));
801 } else {
802 swath_width_luma_ub[k] = dml_min(surface_width_ub_l,
803 dml_ceil(SwathWidthY[k] - 1,
804 Read256BytesBlockWidthY[k]) +
805 Read256BytesBlockWidthY[k]);
806 }
807 if (BytePerPixC[k] > 0) {
808 surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
809 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
810 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
811 dml_floor(ViewportXStartC[k] + SwathWidthC[k] +
812 Read256BytesBlockWidthC[k] - 1,
813 Read256BytesBlockWidthC[k]) -
814 dml_floor(ViewportXStartC[k],
815 Read256BytesBlockWidthC[k]));
816 } else {
817 swath_width_chroma_ub[k] = dml_min(surface_width_ub_c,
818 dml_ceil(SwathWidthC[k] - 1,
819 Read256BytesBlockWidthC[k]) +
820 Read256BytesBlockWidthC[k]);
821 }
822 } else {
823 swath_width_chroma_ub[k] = 0;
824 }
825 } else {
826 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
827 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
828
829 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
830 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_floor(ViewportYStart[k] +
831 SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1,
832 Read256BytesBlockHeightY[k]) -
833 dml_floor(ViewportYStart[k], Read256BytesBlockHeightY[k]));
834 } else {
835 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, dml_ceil(SwathWidthY[k] - 1,
836 Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
837 }
838 if (BytePerPixC[k] > 0) {
839 surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
840 if (ViewportStationary[k] && DPPPerSurface[k] == 1) {
841 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
842 dml_floor(ViewportYStartC[k] + SwathWidthC[k] +
843 Read256BytesBlockHeightC[k] - 1,
844 Read256BytesBlockHeightC[k]) -
845 dml_floor(ViewportYStartC[k],
846 Read256BytesBlockHeightC[k]));
847 } else {
848 swath_width_chroma_ub[k] = dml_min(surface_height_ub_c,
849 dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) +
850 Read256BytesBlockHeightC[k]);
851 }
852 } else {
853 swath_width_chroma_ub[k] = 0;
854 }
855 }
856
857 #ifdef __DML_VBA_DEBUG__
858 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
859 dml_print("DML::%s: k=%d surface_height_ub_l=%0d\n", __func__, k, surface_height_ub_l);
860 dml_print("DML::%s: k=%d surface_width_ub_c=%0d\n", __func__, k, surface_width_ub_c);
861 dml_print("DML::%s: k=%d surface_height_ub_c=%0d\n", __func__, k, surface_height_ub_c);
862 dml_print("DML::%s: k=%d Read256BytesBlockWidthY=%0d\n", __func__, k, Read256BytesBlockWidthY[k]);
863 dml_print("DML::%s: k=%d Read256BytesBlockHeightY=%0d\n", __func__, k, Read256BytesBlockHeightY[k]);
864 dml_print("DML::%s: k=%d Read256BytesBlockWidthC=%0d\n", __func__, k, Read256BytesBlockWidthC[k]);
865 dml_print("DML::%s: k=%d Read256BytesBlockHeightC=%0d\n", __func__, k, Read256BytesBlockHeightC[k]);
866 dml_print("DML::%s: k=%d ViewportStationary=%0d\n", __func__, k, ViewportStationary[k]);
867 dml_print("DML::%s: k=%d DPPPerSurface=%0d\n", __func__, k, DPPPerSurface[k]);
868 dml_print("DML::%s: k=%d swath_width_luma_ub=%0d\n", __func__, k, swath_width_luma_ub[k]);
869 dml_print("DML::%s: k=%d swath_width_chroma_ub=%0d\n", __func__, k, swath_width_chroma_ub[k]);
870 dml_print("DML::%s: k=%d MaximumSwathHeightY=%0d\n", __func__, k, MaximumSwathHeightY[k]);
871 dml_print("DML::%s: k=%d MaximumSwathHeightC=%0d\n", __func__, k, MaximumSwathHeightC[k]);
872 #endif
873
874 }
875 } // CalculateSwathWidth
876
dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,unsigned int TotalNumberOfActiveDPP,bool NoChroma,enum output_encoder_class Output,enum dm_swizzle_mode SurfaceTiling,bool CompBufReservedSpaceNeedAdjustment,bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)877 bool dml32_UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal,
878 unsigned int TotalNumberOfActiveDPP,
879 bool NoChroma,
880 enum output_encoder_class Output,
881 enum dm_swizzle_mode SurfaceTiling,
882 bool CompBufReservedSpaceNeedAdjustment,
883 bool DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
884 {
885 bool ret_val = false;
886
887 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable &&
888 TotalNumberOfActiveDPP == 1 && NoChroma);
889 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp)
890 ret_val = false;
891
892 if (SurfaceTiling == dm_sw_linear)
893 ret_val = false;
894
895 if (CompBufReservedSpaceNeedAdjustment == 1 && DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment)
896 ret_val = false;
897
898 #ifdef __DML_VBA_DEBUG__
899 dml_print("DML::%s: CompBufReservedSpaceNeedAdjustment = %d\n", __func__, CompBufReservedSpaceNeedAdjustment);
900 dml_print("DML::%s: DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment = %d\n", __func__, DisableUnboundRequestIfCompBufReservedSpaceNeedAdjustment);
901 dml_print("DML::%s: ret_val = %d\n", __func__, ret_val);
902 #endif
903
904 return (ret_val);
905 }
906
dml32_CalculateDETBufferSize(unsigned int DETSizeOverride[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int CompressedBufferSegmentSizeInkByteFinal,enum source_format_class SourcePixelFormat[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int RoundedUpMaxSwathSizeBytesY[],unsigned int RoundedUpMaxSwathSizeBytesC[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)907 void dml32_CalculateDETBufferSize(
908 unsigned int DETSizeOverride[],
909 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
910 bool ForceSingleDPP,
911 unsigned int NumberOfActiveSurfaces,
912 bool UnboundedRequestEnabled,
913 unsigned int nomDETInKByte,
914 unsigned int MaxTotalDETInKByte,
915 unsigned int ConfigReturnBufferSizeInKByte,
916 unsigned int MinCompressedBufferSizeInKByte,
917 unsigned int CompressedBufferSegmentSizeInkByteFinal,
918 enum source_format_class SourcePixelFormat[],
919 double ReadBandwidthLuma[],
920 double ReadBandwidthChroma[],
921 unsigned int RoundedUpMaxSwathSizeBytesY[],
922 unsigned int RoundedUpMaxSwathSizeBytesC[],
923 unsigned int DPPPerSurface[],
924 /* Output */
925 unsigned int DETBufferSizeInKByte[],
926 unsigned int *CompressedBufferSizeInkByte)
927 {
928 unsigned int DETBufferSizePoolInKByte;
929 unsigned int NextDETBufferPieceInKByte;
930 bool DETPieceAssignedToThisSurfaceAlready[DC__NUM_DPP__MAX];
931 bool NextPotentialSurfaceToAssignDETPieceFound;
932 unsigned int NextSurfaceToAssignDETPiece;
933 double TotalBandwidth;
934 double BandwidthOfSurfacesNotAssignedDETPiece;
935 unsigned int max_minDET;
936 unsigned int minDET;
937 unsigned int minDET_pipe;
938 unsigned int j, k;
939
940 #ifdef __DML_VBA_DEBUG__
941 dml_print("DML::%s: ForceSingleDPP = %d\n", __func__, ForceSingleDPP);
942 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
943 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
944 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
945 dml_print("DML::%s: MaxTotalDETInKByte = %d\n", __func__, MaxTotalDETInKByte);
946 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
947 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %d\n", __func__, MinCompressedBufferSizeInKByte);
948 dml_print("DML::%s: CompressedBufferSegmentSizeInkByteFinal = %d\n", __func__,
949 CompressedBufferSegmentSizeInkByteFinal);
950 #endif
951
952 // Note: Will use default det size if that fits 2 swaths
953 if (UnboundedRequestEnabled) {
954 if (DETSizeOverride[0] > 0) {
955 DETBufferSizeInKByte[0] = DETSizeOverride[0];
956 } else {
957 DETBufferSizeInKByte[0] = dml_max(nomDETInKByte, dml_ceil(2.0 *
958 ((double) RoundedUpMaxSwathSizeBytesY[0] +
959 (double) RoundedUpMaxSwathSizeBytesC[0]) / 1024.0, 64.0));
960 }
961 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
962 } else {
963 DETBufferSizePoolInKByte = MaxTotalDETInKByte;
964 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
965 DETBufferSizeInKByte[k] = nomDETInKByte;
966 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 ||
967 SourcePixelFormat[k] == dm_420_12) {
968 max_minDET = nomDETInKByte - 64;
969 } else {
970 max_minDET = nomDETInKByte;
971 }
972 minDET = 128;
973 minDET_pipe = 0;
974
975 // add DET resource until can hold 2 full swaths
976 while (minDET <= max_minDET && minDET_pipe == 0) {
977 if (2.0 * ((double) RoundedUpMaxSwathSizeBytesY[k] +
978 (double) RoundedUpMaxSwathSizeBytesC[k]) / 1024.0 <= minDET)
979 minDET_pipe = minDET;
980 minDET = minDET + 64;
981 }
982
983 #ifdef __DML_VBA_DEBUG__
984 dml_print("DML::%s: k=%0d minDET = %d\n", __func__, k, minDET);
985 dml_print("DML::%s: k=%0d max_minDET = %d\n", __func__, k, max_minDET);
986 dml_print("DML::%s: k=%0d minDET_pipe = %d\n", __func__, k, minDET_pipe);
987 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesY = %d\n", __func__, k,
988 RoundedUpMaxSwathSizeBytesY[k]);
989 dml_print("DML::%s: k=%0d RoundedUpMaxSwathSizeBytesC = %d\n", __func__, k,
990 RoundedUpMaxSwathSizeBytesC[k]);
991 #endif
992
993 if (minDET_pipe == 0) {
994 minDET_pipe = dml_max(128, dml_ceil(((double)RoundedUpMaxSwathSizeBytesY[k] +
995 (double)RoundedUpMaxSwathSizeBytesC[k]) / 1024.0, 64));
996 #ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: k=%0d minDET_pipe = %d (assume each plane take half DET)\n",
998 __func__, k, minDET_pipe);
999 #endif
1000 }
1001
1002 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1003 DETBufferSizeInKByte[k] = 0;
1004 } else if (DETSizeOverride[k] > 0) {
1005 DETBufferSizeInKByte[k] = DETSizeOverride[k];
1006 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1007 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * DETSizeOverride[k];
1008 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe <= DETBufferSizePoolInKByte) {
1009 DETBufferSizeInKByte[k] = minDET_pipe;
1010 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte -
1011 (ForceSingleDPP ? 1 : DPPPerSurface[k]) * minDET_pipe;
1012 }
1013
1014 #ifdef __DML_VBA_DEBUG__
1015 dml_print("DML::%s: k=%d DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
1016 dml_print("DML::%s: k=%d DETSizeOverride = %d\n", __func__, k, DETSizeOverride[k]);
1017 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1018 dml_print("DML::%s: DETBufferSizePoolInKByte = %d\n", __func__, DETBufferSizePoolInKByte);
1019 #endif
1020 }
1021
1022 TotalBandwidth = 0;
1023 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1024 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe)
1025 TotalBandwidth = TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1026 }
1027 #ifdef __DML_VBA_DEBUG__
1028 dml_print("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1029 for (uint k = 0; k < NumberOfActiveSurfaces; ++k)
1030 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d\n", __func__, k, DETBufferSizeInKByte[k]);
1031 dml_print("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1032 dml_print("DML::%s: TotalBandwidth = %f\n", __func__, TotalBandwidth);
1033 #endif
1034 BandwidthOfSurfacesNotAssignedDETPiece = TotalBandwidth;
1035 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1036
1037 if (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) {
1038 DETPieceAssignedToThisSurfaceAlready[k] = true;
1039 } else if (DETSizeOverride[k] > 0 || (((double) (ForceSingleDPP ? 1 : DPPPerSurface[k]) *
1040 (double) DETBufferSizeInKByte[k] / (double) MaxTotalDETInKByte) >=
1041 ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / TotalBandwidth))) {
1042 DETPieceAssignedToThisSurfaceAlready[k] = true;
1043 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1044 ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1045 } else {
1046 DETPieceAssignedToThisSurfaceAlready[k] = false;
1047 }
1048 #ifdef __DML_VBA_DEBUG__
1049 dml_print("DML::%s: k=%d DETPieceAssignedToThisSurfaceAlready = %d\n", __func__, k,
1050 DETPieceAssignedToThisSurfaceAlready[k]);
1051 dml_print("DML::%s: k=%d BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k,
1052 BandwidthOfSurfacesNotAssignedDETPiece);
1053 #endif
1054 }
1055
1056 for (j = 0; j < NumberOfActiveSurfaces; ++j) {
1057 NextPotentialSurfaceToAssignDETPieceFound = false;
1058 NextSurfaceToAssignDETPiece = 0;
1059
1060 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1061 #ifdef __DML_VBA_DEBUG__
1062 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[k] = %f\n", __func__, j, k,
1063 ReadBandwidthLuma[k]);
1064 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[k] = %f\n", __func__, j, k,
1065 ReadBandwidthChroma[k]);
1066 dml_print("DML::%s: j=%d k=%d, ReadBandwidthLuma[Next] = %f\n", __func__, j, k,
1067 ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1068 dml_print("DML::%s: j=%d k=%d, ReadBandwidthChroma[Next] = %f\n", __func__, j, k,
1069 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1070 dml_print("DML::%s: j=%d k=%d, NextSurfaceToAssignDETPiece = %d\n", __func__, j, k,
1071 NextSurfaceToAssignDETPiece);
1072 #endif
1073 if (!DETPieceAssignedToThisSurfaceAlready[k] &&
1074 (!NextPotentialSurfaceToAssignDETPieceFound ||
1075 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] <
1076 ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1077 ReadBandwidthChroma[NextSurfaceToAssignDETPiece])) {
1078 NextSurfaceToAssignDETPiece = k;
1079 NextPotentialSurfaceToAssignDETPieceFound = true;
1080 }
1081 #ifdef __DML_VBA_DEBUG__
1082 dml_print("DML::%s: j=%d k=%d, DETPieceAssignedToThisSurfaceAlready = %d\n",
1083 __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1084 dml_print("DML::%s: j=%d k=%d, NextPotentialSurfaceToAssignDETPieceFound = %d\n",
1085 __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1086 #endif
1087 }
1088
1089 if (NextPotentialSurfaceToAssignDETPieceFound) {
1090 // Note: To show the banker's rounding behavior in VBA and also the fact
1091 // that the DET buffer size varies due to precision issue
1092 //
1093 //double tmp1 = ((double) DETBufferSizePoolInKByte *
1094 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1095 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1096 // BandwidthOfSurfacesNotAssignedDETPiece /
1097 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1098 //double tmp2 = dml_round((double) DETBufferSizePoolInKByte *
1099 // (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1100 // ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1101 //BandwidthOfSurfacesNotAssignedDETPiece /
1102 // ((ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1103 //
1104 //dml_print("DML::%s: j=%d, tmp1 = %f\n", __func__, j, tmp1);
1105 //dml_print("DML::%s: j=%d, tmp2 = %f\n", __func__, j, tmp2);
1106
1107 NextDETBufferPieceInKByte = dml_min(
1108 dml_round((double) DETBufferSizePoolInKByte *
1109 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1110 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]) /
1111 BandwidthOfSurfacesNotAssignedDETPiece /
1112 ((ForceSingleDPP ? 1 :
1113 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0)) *
1114 (ForceSingleDPP ? 1 :
1115 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0,
1116 dml_floor((double) DETBufferSizePoolInKByte,
1117 (ForceSingleDPP ? 1 :
1118 DPPPerSurface[NextSurfaceToAssignDETPiece]) * 64.0));
1119
1120 // Above calculation can assign the entire DET buffer allocation to a single pipe.
1121 // We should limit the per-pipe DET size to the nominal / max per pipe.
1122 if (NextDETBufferPieceInKByte > nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1123 if (DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] <
1124 nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1125 NextDETBufferPieceInKByte = nomDETInKByte * (ForceSingleDPP ? 1 : DPPPerSurface[k]) -
1126 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece];
1127 } else {
1128 // Case where DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1129 // already has the max per-pipe value
1130 NextDETBufferPieceInKByte = 0;
1131 }
1132 }
1133
1134 #ifdef __DML_VBA_DEBUG__
1135 dml_print("DML::%s: j=%0d, DETBufferSizePoolInKByte = %d\n", __func__, j,
1136 DETBufferSizePoolInKByte);
1137 dml_print("DML::%s: j=%0d, NextSurfaceToAssignDETPiece = %d\n", __func__, j,
1138 NextSurfaceToAssignDETPiece);
1139 dml_print("DML::%s: j=%0d, ReadBandwidthLuma[%0d] = %f\n", __func__, j,
1140 NextSurfaceToAssignDETPiece, ReadBandwidthLuma[NextSurfaceToAssignDETPiece]);
1141 dml_print("DML::%s: j=%0d, ReadBandwidthChroma[%0d] = %f\n", __func__, j,
1142 NextSurfaceToAssignDETPiece, ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1143 dml_print("DML::%s: j=%0d, BandwidthOfSurfacesNotAssignedDETPiece = %f\n",
1144 __func__, j, BandwidthOfSurfacesNotAssignedDETPiece);
1145 dml_print("DML::%s: j=%0d, NextDETBufferPieceInKByte = %d\n", __func__, j,
1146 NextDETBufferPieceInKByte);
1147 dml_print("DML::%s: j=%0d, DETBufferSizeInKByte[%0d] increases from %0d ",
1148 __func__, j, NextSurfaceToAssignDETPiece,
1149 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1150 #endif
1151
1152 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece] =
1153 DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]
1154 + NextDETBufferPieceInKByte
1155 / (ForceSingleDPP ? 1 : DPPPerSurface[NextSurfaceToAssignDETPiece]);
1156 #ifdef __DML_VBA_DEBUG__
1157 dml_print("to %0d\n", DETBufferSizeInKByte[NextSurfaceToAssignDETPiece]);
1158 #endif
1159
1160 DETBufferSizePoolInKByte = DETBufferSizePoolInKByte - NextDETBufferPieceInKByte;
1161 DETPieceAssignedToThisSurfaceAlready[NextSurfaceToAssignDETPiece] = true;
1162 BandwidthOfSurfacesNotAssignedDETPiece = BandwidthOfSurfacesNotAssignedDETPiece -
1163 (ReadBandwidthLuma[NextSurfaceToAssignDETPiece] +
1164 ReadBandwidthChroma[NextSurfaceToAssignDETPiece]);
1165 }
1166 }
1167 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1168 }
1169 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
1170
1171 #ifdef __DML_VBA_DEBUG__
1172 dml_print("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1173 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
1174 for (uint k = 0; k < NumberOfActiveSurfaces; ++k) {
1175 dml_print("DML::%s: k=%d DETBufferSizeInKByte = %d (TotalReadBandWidth=%f)\n",
1176 __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1177 }
1178 #endif
1179 } // CalculateDETBufferSize
1180
dml32_CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum output_format_class OutFormat,enum output_encoder_class Output,enum odm_combine_policy ODMUse,double StateDispclk,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum odm_combine_mode * ODMMode,double * RequiredDISPCLKPerSurface)1181 void dml32_CalculateODMMode(
1182 unsigned int MaximumPixelsPerLinePerDSCUnit,
1183 unsigned int HActive,
1184 enum output_format_class OutFormat,
1185 enum output_encoder_class Output,
1186 enum odm_combine_policy ODMUse,
1187 double StateDispclk,
1188 double MaxDispclk,
1189 bool DSCEnable,
1190 unsigned int TotalNumberOfActiveDPP,
1191 unsigned int MaxNumDPP,
1192 double PixelClock,
1193 double DISPCLKDPPCLKDSCCLKDownSpreading,
1194 double DISPCLKRampingMargin,
1195 double DISPCLKDPPCLKVCOSpeed,
1196 unsigned int NumberOfDSCSlices,
1197
1198 /* Output */
1199 bool *TotalAvailablePipesSupport,
1200 unsigned int *NumberOfDPP,
1201 enum odm_combine_mode *ODMMode,
1202 double *RequiredDISPCLKPerSurface)
1203 {
1204
1205 double SurfaceRequiredDISPCLKWithoutODMCombine;
1206 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1207 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1208
1209 SurfaceRequiredDISPCLKWithoutODMCombine = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_disabled,
1210 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1211 MaxDispclk);
1212 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_2to1,
1213 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1214 MaxDispclk);
1215 SurfaceRequiredDISPCLKWithODMCombineFourToOne = dml32_CalculateRequiredDispclk(dm_odm_combine_mode_4to1,
1216 PixelClock, DISPCLKDPPCLKDSCCLKDownSpreading, DISPCLKRampingMargin, DISPCLKDPPCLKVCOSpeed,
1217 MaxDispclk);
1218 *TotalAvailablePipesSupport = true;
1219 *ODMMode = dm_odm_combine_mode_disabled; // initialize as disable
1220
1221 if (ODMUse == dm_odm_combine_policy_none)
1222 *ODMMode = dm_odm_combine_mode_disabled;
1223
1224 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithoutODMCombine;
1225 *NumberOfDPP = 0;
1226
1227 // FIXME check ODMUse == "" condition does it mean bypass or Gabriel means something like don't care??
1228 // (ODMUse == "" || ODMUse == "CombineAsNeeded")
1229
1230 if (!(Output == dm_hdmi || Output == dm_dp || Output == dm_edp) && (ODMUse == dm_odm_combine_policy_4to1 ||
1231 ((SurfaceRequiredDISPCLKWithODMCombineTwoToOne > StateDispclk ||
1232 (DSCEnable && (HActive > 2 * MaximumPixelsPerLinePerDSCUnit))
1233 || NumberOfDSCSlices > 8)))) {
1234 if (TotalNumberOfActiveDPP + 4 <= MaxNumDPP) {
1235 *ODMMode = dm_odm_combine_mode_4to1;
1236 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1237 *NumberOfDPP = 4;
1238 } else {
1239 *TotalAvailablePipesSupport = false;
1240 }
1241 } else if (Output != dm_hdmi && (ODMUse == dm_odm_combine_policy_2to1 ||
1242 (((SurfaceRequiredDISPCLKWithoutODMCombine > StateDispclk &&
1243 SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= StateDispclk) ||
1244 (DSCEnable && (HActive > MaximumPixelsPerLinePerDSCUnit))
1245 || (NumberOfDSCSlices <= 8 && NumberOfDSCSlices > 4))))) {
1246 if (TotalNumberOfActiveDPP + 2 <= MaxNumDPP) {
1247 *ODMMode = dm_odm_combine_mode_2to1;
1248 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1249 *NumberOfDPP = 2;
1250 } else {
1251 *TotalAvailablePipesSupport = false;
1252 }
1253 } else {
1254 if (TotalNumberOfActiveDPP + 1 <= MaxNumDPP)
1255 *NumberOfDPP = 1;
1256 else
1257 *TotalAvailablePipesSupport = false;
1258 }
1259 if (OutFormat == dm_420 && HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH &&
1260 ODMUse != dm_odm_combine_policy_4to1) {
1261 if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 4) {
1262 *ODMMode = dm_odm_combine_mode_disabled;
1263 *NumberOfDPP = 0;
1264 *TotalAvailablePipesSupport = false;
1265 } else if (HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH * 2 ||
1266 *ODMMode == dm_odm_combine_mode_4to1) {
1267 *ODMMode = dm_odm_combine_mode_4to1;
1268 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
1269 *NumberOfDPP = 4;
1270 } else {
1271 *ODMMode = dm_odm_combine_mode_2to1;
1272 *RequiredDISPCLKPerSurface = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
1273 *NumberOfDPP = 2;
1274 }
1275 }
1276 if (Output == dm_hdmi && OutFormat == dm_420 &&
1277 HActive > DCN32_MAX_FMT_420_BUFFER_WIDTH) {
1278 *ODMMode = dm_odm_combine_mode_disabled;
1279 *NumberOfDPP = 0;
1280 *TotalAvailablePipesSupport = false;
1281 }
1282 }
1283
dml32_CalculateRequiredDispclk(enum odm_combine_mode ODMMode,double PixelClock,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKRampingMargin,double DISPCLKDPPCLKVCOSpeed,double MaxDispclk)1284 double dml32_CalculateRequiredDispclk(
1285 enum odm_combine_mode ODMMode,
1286 double PixelClock,
1287 double DISPCLKDPPCLKDSCCLKDownSpreading,
1288 double DISPCLKRampingMargin,
1289 double DISPCLKDPPCLKVCOSpeed,
1290 double MaxDispclk)
1291 {
1292 double RequiredDispclk = 0.;
1293 double PixelClockAfterODM;
1294 double DISPCLKWithRampingRoundedToDFSGranularity;
1295 double DISPCLKWithoutRampingRoundedToDFSGranularity;
1296 double MaxDispclkRoundedDownToDFSGranularity;
1297
1298 if (ODMMode == dm_odm_combine_mode_4to1)
1299 PixelClockAfterODM = PixelClock / 4;
1300 else if (ODMMode == dm_odm_combine_mode_2to1)
1301 PixelClockAfterODM = PixelClock / 2;
1302 else
1303 PixelClockAfterODM = PixelClock;
1304
1305
1306 DISPCLKWithRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1307 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100)
1308 * (1 + DISPCLKRampingMargin / 100), 1, DISPCLKDPPCLKVCOSpeed);
1309
1310 DISPCLKWithoutRampingRoundedToDFSGranularity = dml32_RoundToDFSGranularity(
1311 PixelClockAfterODM * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100), 1, DISPCLKDPPCLKVCOSpeed);
1312
1313 MaxDispclkRoundedDownToDFSGranularity = dml32_RoundToDFSGranularity(MaxDispclk, 0, DISPCLKDPPCLKVCOSpeed);
1314
1315 if (DISPCLKWithoutRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1316 RequiredDispclk = DISPCLKWithoutRampingRoundedToDFSGranularity;
1317 else if (DISPCLKWithRampingRoundedToDFSGranularity > MaxDispclkRoundedDownToDFSGranularity)
1318 RequiredDispclk = MaxDispclkRoundedDownToDFSGranularity;
1319 else
1320 RequiredDispclk = DISPCLKWithRampingRoundedToDFSGranularity;
1321
1322 return RequiredDispclk;
1323 }
1324
dml32_RoundToDFSGranularity(double Clock,bool round_up,double VCOSpeed)1325 double dml32_RoundToDFSGranularity(double Clock, bool round_up, double VCOSpeed)
1326 {
1327 if (Clock <= 0.0)
1328 return 0.0;
1329
1330 if (round_up)
1331 return VCOSpeed * 4.0 / dml_floor(VCOSpeed * 4.0 / Clock, 1.0);
1332 else
1333 return VCOSpeed * 4.0 / dml_ceil(VCOSpeed * 4.0 / Clock, 1.0);
1334 }
1335
dml32_CalculateOutputLink(double PHYCLKPerState,double PHYCLKD18PerState,double PHYCLKD32PerState,double Downspreading,bool IsMainSurfaceUsingTheIndicatedTiming,enum output_encoder_class Output,enum output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,bool DSCEnable,unsigned int OutputLinkDPLanes,enum dm_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,double * RequiresFEC,double * OutBpp,enum dm_output_type * OutputType,enum dm_output_rate * OutputRate,unsigned int * RequiredSlots)1336 void dml32_CalculateOutputLink(
1337 double PHYCLKPerState,
1338 double PHYCLKD18PerState,
1339 double PHYCLKD32PerState,
1340 double Downspreading,
1341 bool IsMainSurfaceUsingTheIndicatedTiming,
1342 enum output_encoder_class Output,
1343 enum output_format_class OutputFormat,
1344 unsigned int HTotal,
1345 unsigned int HActive,
1346 double PixelClockBackEnd,
1347 double ForcedOutputLinkBPP,
1348 unsigned int DSCInputBitPerComponent,
1349 unsigned int NumberOfDSCSlices,
1350 double AudioSampleRate,
1351 unsigned int AudioSampleLayout,
1352 enum odm_combine_mode ODMModeNoDSC,
1353 enum odm_combine_mode ODMModeDSC,
1354 bool DSCEnable,
1355 unsigned int OutputLinkDPLanes,
1356 enum dm_output_link_dp_rate OutputLinkDPRate,
1357
1358 /* Output */
1359 bool *RequiresDSC,
1360 double *RequiresFEC,
1361 double *OutBpp,
1362 enum dm_output_type *OutputType,
1363 enum dm_output_rate *OutputRate,
1364 unsigned int *RequiredSlots)
1365 {
1366 bool LinkDSCEnable;
1367 unsigned int dummy;
1368 *RequiresDSC = false;
1369 *RequiresFEC = false;
1370 *OutBpp = 0;
1371 *OutputType = dm_output_type_unknown;
1372 *OutputRate = dm_output_rate_unknown;
1373
1374 if (IsMainSurfaceUsingTheIndicatedTiming) {
1375 if (Output == dm_hdmi) {
1376 *RequiresDSC = false;
1377 *RequiresFEC = false;
1378 *OutBpp = dml32_TruncToValidBPP(dml_min(600, PHYCLKPerState) * 10, 3, HTotal, HActive,
1379 PixelClockBackEnd, ForcedOutputLinkBPP, false, Output, OutputFormat,
1380 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1381 ODMModeNoDSC, ODMModeDSC, &dummy);
1382 //OutputTypeAndRate = "HDMI";
1383 *OutputType = dm_output_type_hdmi;
1384
1385 } else if (Output == dm_dp || Output == dm_dp2p0 || Output == dm_edp) {
1386 if (DSCEnable == true) {
1387 *RequiresDSC = true;
1388 LinkDSCEnable = true;
1389 if (Output == dm_dp || Output == dm_dp2p0)
1390 *RequiresFEC = true;
1391 else
1392 *RequiresFEC = false;
1393 } else {
1394 *RequiresDSC = false;
1395 LinkDSCEnable = false;
1396 if (Output == dm_dp2p0)
1397 *RequiresFEC = true;
1398 else
1399 *RequiresFEC = false;
1400 }
1401 if (Output == dm_dp2p0) {
1402 *OutBpp = 0;
1403 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr10) &&
1404 PHYCLKD32PerState >= 10000.0 / 32) {
1405 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1406 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1407 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1408 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1409 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1410 if (*OutBpp == 0 && PHYCLKD32PerState < 13500.0 / 32 && DSCEnable == true &&
1411 ForcedOutputLinkBPP == 0) {
1412 *RequiresDSC = true;
1413 LinkDSCEnable = true;
1414 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 10000,
1415 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1416 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1417 OutputFormat, DSCInputBitPerComponent,
1418 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1419 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1420 }
1421 //OutputTypeAndRate = Output & " UHBR10";
1422 *OutputType = dm_output_type_dp2p0;
1423 *OutputRate = dm_output_rate_dp_rate_uhbr10;
1424 }
1425 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr13p5) &&
1426 *OutBpp == 0 && PHYCLKD32PerState >= 13500.0 / 32) {
1427 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1428 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1429 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1430 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1431 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1432
1433 if (*OutBpp == 0 && PHYCLKD32PerState < 20000 / 32 && DSCEnable == true &&
1434 ForcedOutputLinkBPP == 0) {
1435 *RequiresDSC = true;
1436 LinkDSCEnable = true;
1437 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 13500,
1438 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1439 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1440 OutputFormat, DSCInputBitPerComponent,
1441 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1442 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1443 }
1444 //OutputTypeAndRate = Output & " UHBR13p5";
1445 *OutputType = dm_output_type_dp2p0;
1446 *OutputRate = dm_output_rate_dp_rate_uhbr13p5;
1447 }
1448 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_uhbr20) &&
1449 *OutBpp == 0 && PHYCLKD32PerState >= 20000 / 32) {
1450 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1451 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1452 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1453 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1454 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1455 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1456 *RequiresDSC = true;
1457 LinkDSCEnable = true;
1458 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 20000,
1459 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1460 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1461 OutputFormat, DSCInputBitPerComponent,
1462 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1463 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1464 }
1465 //OutputTypeAndRate = Output & " UHBR20";
1466 *OutputType = dm_output_type_dp2p0;
1467 *OutputRate = dm_output_rate_dp_rate_uhbr20;
1468 }
1469 } else {
1470 *OutBpp = 0;
1471 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr) &&
1472 PHYCLKPerState >= 270) {
1473 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1474 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1475 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1476 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1477 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1478 if (*OutBpp == 0 && PHYCLKPerState < 540 && DSCEnable == true &&
1479 ForcedOutputLinkBPP == 0) {
1480 *RequiresDSC = true;
1481 LinkDSCEnable = true;
1482 if (Output == dm_dp)
1483 *RequiresFEC = true;
1484 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 2700,
1485 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1486 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1487 OutputFormat, DSCInputBitPerComponent,
1488 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1489 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1490 }
1491 //OutputTypeAndRate = Output & " HBR";
1492 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1493 *OutputRate = dm_output_rate_dp_rate_hbr;
1494 }
1495 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr2) &&
1496 *OutBpp == 0 && PHYCLKPerState >= 540) {
1497 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1498 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1499 ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat,
1500 DSCInputBitPerComponent, NumberOfDSCSlices, AudioSampleRate,
1501 AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1502
1503 if (*OutBpp == 0 && PHYCLKPerState < 810 && DSCEnable == true &&
1504 ForcedOutputLinkBPP == 0) {
1505 *RequiresDSC = true;
1506 LinkDSCEnable = true;
1507 if (Output == dm_dp)
1508 *RequiresFEC = true;
1509
1510 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 5400,
1511 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1512 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1513 OutputFormat, DSCInputBitPerComponent,
1514 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1515 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1516 }
1517 //OutputTypeAndRate = Output & " HBR2";
1518 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1519 *OutputRate = dm_output_rate_dp_rate_hbr2;
1520 }
1521 if ((OutputLinkDPRate == dm_dp_rate_na || OutputLinkDPRate == dm_dp_rate_hbr3) && *OutBpp == 0 && PHYCLKPerState >= 810) {
1522 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1523 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1524 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1525 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices,
1526 AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC,
1527 RequiredSlots);
1528
1529 if (*OutBpp == 0 && DSCEnable == true && ForcedOutputLinkBPP == 0) {
1530 *RequiresDSC = true;
1531 LinkDSCEnable = true;
1532 if (Output == dm_dp)
1533 *RequiresFEC = true;
1534
1535 *OutBpp = dml32_TruncToValidBPP((1 - Downspreading / 100) * 8100,
1536 OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd,
1537 ForcedOutputLinkBPP, LinkDSCEnable, Output,
1538 OutputFormat, DSCInputBitPerComponent,
1539 NumberOfDSCSlices, AudioSampleRate, AudioSampleLayout,
1540 ODMModeNoDSC, ODMModeDSC, RequiredSlots);
1541 }
1542 //OutputTypeAndRate = Output & " HBR3";
1543 *OutputType = (Output == dm_dp) ? dm_output_type_dp : dm_output_type_edp;
1544 *OutputRate = dm_output_rate_dp_rate_hbr3;
1545 }
1546 }
1547 }
1548 }
1549 }
1550
dml32_CalculateDPPCLK(unsigned int NumberOfActiveSurfaces,double DISPCLKDPPCLKDSCCLKDownSpreading,double DISPCLKDPPCLKVCOSpeed,double DPPCLKUsingSingleDPP[],unsigned int DPPPerSurface[],double * GlobalDPPCLK,double Dppclk[])1551 void dml32_CalculateDPPCLK(
1552 unsigned int NumberOfActiveSurfaces,
1553 double DISPCLKDPPCLKDSCCLKDownSpreading,
1554 double DISPCLKDPPCLKVCOSpeed,
1555 double DPPCLKUsingSingleDPP[],
1556 unsigned int DPPPerSurface[],
1557
1558 /* output */
1559 double *GlobalDPPCLK,
1560 double Dppclk[])
1561 {
1562 unsigned int k;
1563 *GlobalDPPCLK = 0;
1564 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1565 Dppclk[k] = DPPCLKUsingSingleDPP[k] / DPPPerSurface[k] * (1 + DISPCLKDPPCLKDSCCLKDownSpreading / 100);
1566 *GlobalDPPCLK = dml_max(*GlobalDPPCLK, Dppclk[k]);
1567 }
1568 *GlobalDPPCLK = dml32_RoundToDFSGranularity(*GlobalDPPCLK, 1, DISPCLKDPPCLKVCOSpeed);
1569 for (k = 0; k < NumberOfActiveSurfaces; ++k)
1570 Dppclk[k] = *GlobalDPPCLK / 255 * dml_ceil(Dppclk[k] * 255.0 / *GlobalDPPCLK, 1.0);
1571 }
1572
dml32_TruncToValidBPP(double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum output_encoder_class Output,enum output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum odm_combine_mode ODMModeNoDSC,enum odm_combine_mode ODMModeDSC,unsigned int * RequiredSlots)1573 double dml32_TruncToValidBPP(
1574 double LinkBitRate,
1575 unsigned int Lanes,
1576 unsigned int HTotal,
1577 unsigned int HActive,
1578 double PixelClock,
1579 double DesiredBPP,
1580 bool DSCEnable,
1581 enum output_encoder_class Output,
1582 enum output_format_class Format,
1583 unsigned int DSCInputBitPerComponent,
1584 unsigned int DSCSlices,
1585 unsigned int AudioRate,
1586 unsigned int AudioLayout,
1587 enum odm_combine_mode ODMModeNoDSC,
1588 enum odm_combine_mode ODMModeDSC,
1589 /* Output */
1590 unsigned int *RequiredSlots)
1591 {
1592 double MaxLinkBPP;
1593 unsigned int MinDSCBPP;
1594 double MaxDSCBPP;
1595 unsigned int NonDSCBPP0;
1596 unsigned int NonDSCBPP1;
1597 unsigned int NonDSCBPP2;
1598
1599 if (Format == dm_420) {
1600 NonDSCBPP0 = 12;
1601 NonDSCBPP1 = 15;
1602 NonDSCBPP2 = 18;
1603 MinDSCBPP = 6;
1604 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
1605 } else if (Format == dm_444) {
1606 NonDSCBPP0 = 24;
1607 NonDSCBPP1 = 30;
1608 NonDSCBPP2 = 36;
1609 MinDSCBPP = 8;
1610 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
1611 } else {
1612 if (Output == dm_hdmi) {
1613 NonDSCBPP0 = 24;
1614 NonDSCBPP1 = 24;
1615 NonDSCBPP2 = 24;
1616 } else {
1617 NonDSCBPP0 = 16;
1618 NonDSCBPP1 = 20;
1619 NonDSCBPP2 = 24;
1620 }
1621 if (Format == dm_n422) {
1622 MinDSCBPP = 7;
1623 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
1624 } else {
1625 MinDSCBPP = 8;
1626 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
1627 }
1628 }
1629 if (Output == dm_dp2p0) {
1630 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128 / 132 * 383 / 384 * 65536 / 65540;
1631 } else if (DSCEnable && Output == dm_dp) {
1632 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
1633 } else {
1634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
1635 }
1636
1637 if (DSCEnable) {
1638 if (ODMModeDSC == dm_odm_combine_mode_4to1)
1639 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1640 else if (ODMModeDSC == dm_odm_combine_mode_2to1)
1641 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1642 else if (ODMModeDSC == dm_odm_split_mode_1to2)
1643 MaxLinkBPP = 2 * MaxLinkBPP;
1644 } else {
1645 if (ODMModeNoDSC == dm_odm_combine_mode_4to1)
1646 MaxLinkBPP = dml_min(MaxLinkBPP, 16);
1647 else if (ODMModeNoDSC == dm_odm_combine_mode_2to1)
1648 MaxLinkBPP = dml_min(MaxLinkBPP, 32);
1649 else if (ODMModeNoDSC == dm_odm_split_mode_1to2)
1650 MaxLinkBPP = 2 * MaxLinkBPP;
1651 }
1652
1653 *RequiredSlots = dml_ceil(DesiredBPP / MaxLinkBPP * 64, 1);
1654
1655 if (DesiredBPP == 0) {
1656 if (DSCEnable) {
1657 if (MaxLinkBPP < MinDSCBPP)
1658 return BPP_INVALID;
1659 else if (MaxLinkBPP >= MaxDSCBPP)
1660 return MaxDSCBPP;
1661 else
1662 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
1663 } else {
1664 if (MaxLinkBPP >= NonDSCBPP2)
1665 return NonDSCBPP2;
1666 else if (MaxLinkBPP >= NonDSCBPP1)
1667 return NonDSCBPP1;
1668 else if (MaxLinkBPP >= NonDSCBPP0)
1669 return 16.0;
1670 else
1671 return BPP_INVALID;
1672 }
1673 } else {
1674 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 ||
1675 DesiredBPP <= NonDSCBPP0)) ||
1676 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP)))
1677 return BPP_INVALID;
1678 else
1679 return DesiredBPP;
1680 }
1681 } // TruncToValidBPP
1682
dml32_RequiredDTBCLK(bool DSCEnable,double PixelClock,enum output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)1683 double dml32_RequiredDTBCLK(
1684 bool DSCEnable,
1685 double PixelClock,
1686 enum output_format_class OutputFormat,
1687 double OutputBpp,
1688 unsigned int DSCSlices,
1689 unsigned int HTotal,
1690 unsigned int HActive,
1691 unsigned int AudioRate,
1692 unsigned int AudioLayout)
1693 {
1694 double PixelWordRate;
1695 double HCActive;
1696 double HCBlank;
1697 double AverageTribyteRate;
1698 double HActiveTribyteRate;
1699
1700 if (DSCEnable != true)
1701 return dml_max(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
1702
1703 PixelWordRate = PixelClock / (OutputFormat == dm_444 ? 1 : 2);
1704 HCActive = dml_ceil(DSCSlices * dml_ceil(OutputBpp *
1705 dml_ceil(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
1706 HCBlank = 64 + 32 *
1707 dml_ceil(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
1708 AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
1709 HActiveTribyteRate = PixelWordRate * HCActive / HActive;
1710 return dml_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
1711 }
1712
dml32_DSCDelayRequirement(bool DSCEnabled,enum odm_combine_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum output_format_class OutputFormat,enum output_encoder_class Output,double PixelClock,double PixelClockBackEnd,double dsc_delay_factor_wa)1713 unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
1714 enum odm_combine_mode ODMMode,
1715 unsigned int DSCInputBitPerComponent,
1716 double OutputBpp,
1717 unsigned int HActive,
1718 unsigned int HTotal,
1719 unsigned int NumberOfDSCSlices,
1720 enum output_format_class OutputFormat,
1721 enum output_encoder_class Output,
1722 double PixelClock,
1723 double PixelClockBackEnd,
1724 double dsc_delay_factor_wa)
1725 {
1726 unsigned int DSCDelayRequirement_val;
1727
1728 if (DSCEnabled == true && OutputBpp != 0) {
1729 if (ODMMode == dm_odm_combine_mode_4to1) {
1730 DSCDelayRequirement_val = 4 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1731 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 4,
1732 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1733 } else if (ODMMode == dm_odm_combine_mode_2to1) {
1734 DSCDelayRequirement_val = 2 * (dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1735 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices / 2,
1736 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output));
1737 } else {
1738 DSCDelayRequirement_val = dml32_dscceComputeDelay(DSCInputBitPerComponent, OutputBpp,
1739 dml_ceil(HActive / NumberOfDSCSlices, 1), NumberOfDSCSlices,
1740 OutputFormat, Output) + dml32_dscComputeDelay(OutputFormat, Output);
1741 }
1742
1743 DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
1744 dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
1745
1746 DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
1747
1748 } else {
1749 DSCDelayRequirement_val = 0;
1750 }
1751
1752 #ifdef __DML_VBA_DEBUG__
1753 dml_print("DML::%s: DSCEnabled = %d\n", __func__, DSCEnabled);
1754 dml_print("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
1755 dml_print("DML::%s: HActive = %d\n", __func__, HActive);
1756 dml_print("DML::%s: OutputFormat = %d\n", __func__, OutputFormat);
1757 dml_print("DML::%s: DSCInputBitPerComponent = %d\n", __func__, DSCInputBitPerComponent);
1758 dml_print("DML::%s: NumberOfDSCSlices = %d\n", __func__, NumberOfDSCSlices);
1759 dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
1760 #endif
1761
1762 return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
1763 }
1764
dml32_CalculateSurfaceSizeInMall(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],bool DCCEnable[],bool ViewportStationary[],unsigned int ViewportXStartY[],unsigned int ViewportYStartY[],unsigned int ViewportXStartC[],unsigned int ViewportYStartC[],unsigned int ViewportWidthY[],unsigned int ViewportHeightY[],unsigned int BytesPerPixelY[],unsigned int ViewportWidthC[],unsigned int ViewportHeightC[],unsigned int BytesPerPixelC[],unsigned int SurfaceWidthY[],unsigned int SurfaceWidthC[],unsigned int SurfaceHeightY[],unsigned int SurfaceHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int DCCMetaPitchY[],unsigned int DCCMetaPitchC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)1765 void dml32_CalculateSurfaceSizeInMall(
1766 unsigned int NumberOfActiveSurfaces,
1767 unsigned int MALLAllocatedForDCN,
1768 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1769 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
1770 bool DCCEnable[],
1771 bool ViewportStationary[],
1772 unsigned int ViewportXStartY[],
1773 unsigned int ViewportYStartY[],
1774 unsigned int ViewportXStartC[],
1775 unsigned int ViewportYStartC[],
1776 unsigned int ViewportWidthY[],
1777 unsigned int ViewportHeightY[],
1778 unsigned int BytesPerPixelY[],
1779 unsigned int ViewportWidthC[],
1780 unsigned int ViewportHeightC[],
1781 unsigned int BytesPerPixelC[],
1782 unsigned int SurfaceWidthY[],
1783 unsigned int SurfaceWidthC[],
1784 unsigned int SurfaceHeightY[],
1785 unsigned int SurfaceHeightC[],
1786 unsigned int Read256BytesBlockWidthY[],
1787 unsigned int Read256BytesBlockWidthC[],
1788 unsigned int Read256BytesBlockHeightY[],
1789 unsigned int Read256BytesBlockHeightC[],
1790 unsigned int ReadBlockWidthY[],
1791 unsigned int ReadBlockWidthC[],
1792 unsigned int ReadBlockHeightY[],
1793 unsigned int ReadBlockHeightC[],
1794 unsigned int DCCMetaPitchY[],
1795 unsigned int DCCMetaPitchC[],
1796
1797 /* Output */
1798 unsigned int SurfaceSizeInMALL[],
1799 bool *ExceededMALLSize)
1800 {
1801 unsigned int k;
1802 unsigned int TotalSurfaceSizeInMALLForSS = 0;
1803 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
1804 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
1805
1806 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1807 if (ViewportStationary[k]) {
1808 SurfaceSizeInMALL[k] = dml_min(dml_ceil(SurfaceWidthY[k], ReadBlockWidthY[k]),
1809 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + ReadBlockWidthY[k] - 1,
1810 ReadBlockWidthY[k]) - dml_floor(ViewportXStartY[k],
1811 ReadBlockWidthY[k])) * dml_min(dml_ceil(SurfaceHeightY[k],
1812 ReadBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1813 ViewportHeightY[k] + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
1814 dml_floor(ViewportYStartY[k], ReadBlockHeightY[k])) * BytesPerPixelY[k];
1815
1816 if (ReadBlockWidthC[k] > 0) {
1817 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1818 dml_min(dml_ceil(SurfaceWidthC[k], ReadBlockWidthC[k]),
1819 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] +
1820 ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
1821 dml_floor(ViewportXStartC[k], ReadBlockWidthC[k])) *
1822 dml_min(dml_ceil(SurfaceHeightC[k], ReadBlockHeightC[k]),
1823 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1824 ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
1825 dml_floor(ViewportYStartC[k], ReadBlockHeightC[k])) *
1826 BytesPerPixelC[k];
1827 }
1828 if (DCCEnable[k] == true) {
1829 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1830 (dml_min(dml_ceil(DCCMetaPitchY[k], 8 * Read256BytesBlockWidthY[k]),
1831 dml_floor(ViewportXStartY[k] + ViewportWidthY[k] + 8 *
1832 Read256BytesBlockWidthY[k] - 1, 8 * Read256BytesBlockWidthY[k])
1833 - dml_floor(ViewportXStartY[k], 8 * Read256BytesBlockWidthY[k]))
1834 * dml_min(dml_ceil(SurfaceHeightY[k], 8 *
1835 Read256BytesBlockHeightY[k]), dml_floor(ViewportYStartY[k] +
1836 ViewportHeightY[k] + 8 * Read256BytesBlockHeightY[k] - 1, 8 *
1837 Read256BytesBlockHeightY[k]) - dml_floor(ViewportYStartY[k], 8 *
1838 Read256BytesBlockHeightY[k])) * BytesPerPixelY[k] / 256) + (64 * 1024);
1839 if (Read256BytesBlockWidthC[k] > 0) {
1840 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1841 dml_min(dml_ceil(DCCMetaPitchC[k], 8 *
1842 Read256BytesBlockWidthC[k]),
1843 dml_floor(ViewportXStartC[k] + ViewportWidthC[k] + 8
1844 * Read256BytesBlockWidthC[k] - 1, 8 *
1845 Read256BytesBlockWidthC[k]) -
1846 dml_floor(ViewportXStartC[k], 8 *
1847 Read256BytesBlockWidthC[k])) *
1848 dml_min(dml_ceil(SurfaceHeightC[k], 8 *
1849 Read256BytesBlockHeightC[k]),
1850 dml_floor(ViewportYStartC[k] + ViewportHeightC[k] +
1851 8 * Read256BytesBlockHeightC[k] - 1, 8 *
1852 Read256BytesBlockHeightC[k]) -
1853 dml_floor(ViewportYStartC[k], 8 *
1854 Read256BytesBlockHeightC[k])) *
1855 BytesPerPixelC[k] / 256;
1856 }
1857 }
1858 } else {
1859 SurfaceSizeInMALL[k] = dml_ceil(dml_min(SurfaceWidthY[k], ViewportWidthY[k] +
1860 ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
1861 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] +
1862 ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) *
1863 BytesPerPixelY[k];
1864 if (ReadBlockWidthC[k] > 0) {
1865 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1866 dml_ceil(dml_min(SurfaceWidthC[k], ViewportWidthC[k] +
1867 ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
1868 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] +
1869 ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) *
1870 BytesPerPixelC[k];
1871 }
1872 if (DCCEnable[k] == true) {
1873 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1874 (dml_ceil(dml_min(DCCMetaPitchY[k], ViewportWidthY[k] + 8 *
1875 Read256BytesBlockWidthY[k] - 1), 8 *
1876 Read256BytesBlockWidthY[k]) *
1877 dml_ceil(dml_min(SurfaceHeightY[k], ViewportHeightY[k] + 8 *
1878 Read256BytesBlockHeightY[k] - 1), 8 *
1879 Read256BytesBlockHeightY[k]) * BytesPerPixelY[k] / 256) + (64 * 1024);
1880
1881 if (Read256BytesBlockWidthC[k] > 0) {
1882 SurfaceSizeInMALL[k] = SurfaceSizeInMALL[k] +
1883 dml_ceil(dml_min(DCCMetaPitchC[k], ViewportWidthC[k] + 8 *
1884 Read256BytesBlockWidthC[k] - 1), 8 *
1885 Read256BytesBlockWidthC[k]) *
1886 dml_ceil(dml_min(SurfaceHeightC[k], ViewportHeightC[k] + 8 *
1887 Read256BytesBlockHeightC[k] - 1), 8 *
1888 Read256BytesBlockHeightC[k]) *
1889 BytesPerPixelC[k] / 256;
1890 }
1891 }
1892 }
1893 }
1894
1895 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1896 /* SS and Subvp counted separate as they are never used at the same time */
1897 if (UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe)
1898 TotalSurfaceSizeInMALLForSubVP = TotalSurfaceSizeInMALLForSubVP + SurfaceSizeInMALL[k];
1899 else if (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable)
1900 TotalSurfaceSizeInMALLForSS = TotalSurfaceSizeInMALLForSS + SurfaceSizeInMALL[k];
1901 }
1902 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
1903 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
1904 } // CalculateSurfaceSizeInMall
1905
dml32_CalculateVMRowAndSwath(unsigned int NumberOfActiveSurfaces,DmlPipe myPipe[],unsigned int SurfaceSizeInMALL[],unsigned int PTEBufferSizeInRequestsLuma,unsigned int PTEBufferSizeInRequestsChroma,unsigned int DCCMetaBufferSizeBytes,enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],unsigned int MALLAllocatedForDCN,double SwathWidthY[],double SwathWidthC[],bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes[],unsigned int HostVMMinPageSize,bool PTEBufferSizeNotExceeded[],bool DCCMetaBufferSizeNotExceeded[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int dpte_row_height_luma[],unsigned int dpte_row_height_chroma[],unsigned int dpte_row_height_linear_luma[],unsigned int dpte_row_height_linear_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int vm_group_bytes[],unsigned int dpte_group_bytes[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PTERequestSizeY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int PTERequestSizeC[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_c[],double PrefetchSourceLinesY[],double PrefetchSourceLinesC[],double VInitPreFillY[],double VInitPreFillC[],unsigned int MaxNumSwathY[],unsigned int MaxNumSwathC[],double meta_row_bw[],double dpte_row_bw[],double PixelPTEBytesPerRow[],double PDEAndMetaPTEBytesFrame[],double MetaRowByte[],bool use_one_row_for_frame[],bool use_one_row_for_frame_flip[],bool UsesMALLForStaticScreen[],bool PTE_BUFFER_MODE[],unsigned int BIGK_FRAGMENT_SIZE[])1906 void dml32_CalculateVMRowAndSwath(
1907 unsigned int NumberOfActiveSurfaces,
1908 DmlPipe myPipe[],
1909 unsigned int SurfaceSizeInMALL[],
1910 unsigned int PTEBufferSizeInRequestsLuma,
1911 unsigned int PTEBufferSizeInRequestsChroma,
1912 unsigned int DCCMetaBufferSizeBytes,
1913 enum dm_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
1914 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
1915 unsigned int MALLAllocatedForDCN,
1916 double SwathWidthY[],
1917 double SwathWidthC[],
1918 bool GPUVMEnable,
1919 bool HostVMEnable,
1920 unsigned int HostVMMaxNonCachedPageTableLevels,
1921 unsigned int GPUVMMaxPageTableLevels,
1922 unsigned int GPUVMMinPageSizeKBytes[],
1923 unsigned int HostVMMinPageSize,
1924
1925 /* Output */
1926 bool PTEBufferSizeNotExceeded[],
1927 bool DCCMetaBufferSizeNotExceeded[],
1928 unsigned int dpte_row_width_luma_ub[],
1929 unsigned int dpte_row_width_chroma_ub[],
1930 unsigned int dpte_row_height_luma[],
1931 unsigned int dpte_row_height_chroma[],
1932 unsigned int dpte_row_height_linear_luma[], // VBA_DELTA
1933 unsigned int dpte_row_height_linear_chroma[], // VBA_DELTA
1934 unsigned int meta_req_width[],
1935 unsigned int meta_req_width_chroma[],
1936 unsigned int meta_req_height[],
1937 unsigned int meta_req_height_chroma[],
1938 unsigned int meta_row_width[],
1939 unsigned int meta_row_width_chroma[],
1940 unsigned int meta_row_height[],
1941 unsigned int meta_row_height_chroma[],
1942 unsigned int vm_group_bytes[],
1943 unsigned int dpte_group_bytes[],
1944 unsigned int PixelPTEReqWidthY[],
1945 unsigned int PixelPTEReqHeightY[],
1946 unsigned int PTERequestSizeY[],
1947 unsigned int PixelPTEReqWidthC[],
1948 unsigned int PixelPTEReqHeightC[],
1949 unsigned int PTERequestSizeC[],
1950 unsigned int dpde0_bytes_per_frame_ub_l[],
1951 unsigned int meta_pte_bytes_per_frame_ub_l[],
1952 unsigned int dpde0_bytes_per_frame_ub_c[],
1953 unsigned int meta_pte_bytes_per_frame_ub_c[],
1954 double PrefetchSourceLinesY[],
1955 double PrefetchSourceLinesC[],
1956 double VInitPreFillY[],
1957 double VInitPreFillC[],
1958 unsigned int MaxNumSwathY[],
1959 unsigned int MaxNumSwathC[],
1960 double meta_row_bw[],
1961 double dpte_row_bw[],
1962 double PixelPTEBytesPerRow[],
1963 double PDEAndMetaPTEBytesFrame[],
1964 double MetaRowByte[],
1965 bool use_one_row_for_frame[],
1966 bool use_one_row_for_frame_flip[],
1967 bool UsesMALLForStaticScreen[],
1968 bool PTE_BUFFER_MODE[],
1969 unsigned int BIGK_FRAGMENT_SIZE[])
1970 {
1971 unsigned int k;
1972 unsigned int PTEBufferSizeInRequestsForLuma[DC__NUM_DPP__MAX];
1973 unsigned int PTEBufferSizeInRequestsForChroma[DC__NUM_DPP__MAX];
1974 unsigned int PDEAndMetaPTEBytesFrameY;
1975 unsigned int PDEAndMetaPTEBytesFrameC;
1976 unsigned int MetaRowByteY[DC__NUM_DPP__MAX] = {0};
1977 unsigned int MetaRowByteC[DC__NUM_DPP__MAX] = {0};
1978 unsigned int PixelPTEBytesPerRowY[DC__NUM_DPP__MAX];
1979 unsigned int PixelPTEBytesPerRowC[DC__NUM_DPP__MAX];
1980 unsigned int PixelPTEBytesPerRowY_one_row_per_frame[DC__NUM_DPP__MAX];
1981 unsigned int PixelPTEBytesPerRowC_one_row_per_frame[DC__NUM_DPP__MAX];
1982 unsigned int dpte_row_width_luma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1983 unsigned int dpte_row_height_luma_one_row_per_frame[DC__NUM_DPP__MAX];
1984 unsigned int dpte_row_width_chroma_ub_one_row_per_frame[DC__NUM_DPP__MAX];
1985 unsigned int dpte_row_height_chroma_one_row_per_frame[DC__NUM_DPP__MAX];
1986 bool one_row_per_frame_fits_in_buffer[DC__NUM_DPP__MAX];
1987
1988 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
1989 if (HostVMEnable == true) {
1990 vm_group_bytes[k] = 512;
1991 dpte_group_bytes[k] = 512;
1992 } else if (GPUVMEnable == true) {
1993 vm_group_bytes[k] = 2048;
1994 if (GPUVMMinPageSizeKBytes[k] >= 64 && IsVertical(myPipe[k].SourceRotation))
1995 dpte_group_bytes[k] = 512;
1996 else
1997 dpte_group_bytes[k] = 2048;
1998 } else {
1999 vm_group_bytes[k] = 0;
2000 dpte_group_bytes[k] = 0;
2001 }
2002
2003 if (myPipe[k].SourcePixelFormat == dm_420_8 || myPipe[k].SourcePixelFormat == dm_420_10 ||
2004 myPipe[k].SourcePixelFormat == dm_420_12 ||
2005 myPipe[k].SourcePixelFormat == dm_rgbe_alpha) {
2006 if ((myPipe[k].SourcePixelFormat == dm_420_10 || myPipe[k].SourcePixelFormat == dm_420_12) &&
2007 !IsVertical(myPipe[k].SourceRotation)) {
2008 PTEBufferSizeInRequestsForLuma[k] =
2009 (PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma) / 2;
2010 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsForLuma[k];
2011 } else {
2012 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma;
2013 PTEBufferSizeInRequestsForChroma[k] = PTEBufferSizeInRequestsChroma;
2014 }
2015
2016 PDEAndMetaPTEBytesFrameC = dml32_CalculateVMAndRowBytes(
2017 myPipe[k].ViewportStationary,
2018 myPipe[k].DCCEnable,
2019 myPipe[k].DPPPerSurface,
2020 myPipe[k].BlockHeight256BytesC,
2021 myPipe[k].BlockWidth256BytesC,
2022 myPipe[k].SourcePixelFormat,
2023 myPipe[k].SurfaceTiling,
2024 myPipe[k].BytePerPixelC,
2025 myPipe[k].SourceRotation,
2026 SwathWidthC[k],
2027 myPipe[k].ViewportHeightChroma,
2028 myPipe[k].ViewportXStartC,
2029 myPipe[k].ViewportYStartC,
2030 GPUVMEnable,
2031 HostVMEnable,
2032 HostVMMaxNonCachedPageTableLevels,
2033 GPUVMMaxPageTableLevels,
2034 GPUVMMinPageSizeKBytes[k],
2035 HostVMMinPageSize,
2036 PTEBufferSizeInRequestsForChroma[k],
2037 myPipe[k].PitchC,
2038 myPipe[k].DCCMetaPitchC,
2039 myPipe[k].BlockWidthC,
2040 myPipe[k].BlockHeightC,
2041
2042 /* Output */
2043 &MetaRowByteC[k],
2044 &PixelPTEBytesPerRowC[k],
2045 &dpte_row_width_chroma_ub[k],
2046 &dpte_row_height_chroma[k],
2047 &dpte_row_height_linear_chroma[k],
2048 &PixelPTEBytesPerRowC_one_row_per_frame[k],
2049 &dpte_row_width_chroma_ub_one_row_per_frame[k],
2050 &dpte_row_height_chroma_one_row_per_frame[k],
2051 &meta_req_width_chroma[k],
2052 &meta_req_height_chroma[k],
2053 &meta_row_width_chroma[k],
2054 &meta_row_height_chroma[k],
2055 &PixelPTEReqWidthC[k],
2056 &PixelPTEReqHeightC[k],
2057 &PTERequestSizeC[k],
2058 &dpde0_bytes_per_frame_ub_c[k],
2059 &meta_pte_bytes_per_frame_ub_c[k]);
2060
2061 PrefetchSourceLinesC[k] = dml32_CalculatePrefetchSourceLines(
2062 myPipe[k].VRatioChroma,
2063 myPipe[k].VTapsChroma,
2064 myPipe[k].InterlaceEnable,
2065 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2066 myPipe[k].SwathHeightC,
2067 myPipe[k].SourceRotation,
2068 myPipe[k].ViewportStationary,
2069 SwathWidthC[k],
2070 myPipe[k].ViewportHeightChroma,
2071 myPipe[k].ViewportXStartC,
2072 myPipe[k].ViewportYStartC,
2073
2074 /* Output */
2075 &VInitPreFillC[k],
2076 &MaxNumSwathC[k]);
2077 } else {
2078 PTEBufferSizeInRequestsForLuma[k] = PTEBufferSizeInRequestsLuma + PTEBufferSizeInRequestsChroma;
2079 PTEBufferSizeInRequestsForChroma[k] = 0;
2080 PixelPTEBytesPerRowC[k] = 0;
2081 PDEAndMetaPTEBytesFrameC = 0;
2082 MetaRowByteC[k] = 0;
2083 MaxNumSwathC[k] = 0;
2084 PrefetchSourceLinesC[k] = 0;
2085 dpte_row_height_chroma_one_row_per_frame[k] = 0;
2086 dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2087 PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2088 }
2089
2090 PDEAndMetaPTEBytesFrameY = dml32_CalculateVMAndRowBytes(
2091 myPipe[k].ViewportStationary,
2092 myPipe[k].DCCEnable,
2093 myPipe[k].DPPPerSurface,
2094 myPipe[k].BlockHeight256BytesY,
2095 myPipe[k].BlockWidth256BytesY,
2096 myPipe[k].SourcePixelFormat,
2097 myPipe[k].SurfaceTiling,
2098 myPipe[k].BytePerPixelY,
2099 myPipe[k].SourceRotation,
2100 SwathWidthY[k],
2101 myPipe[k].ViewportHeight,
2102 myPipe[k].ViewportXStart,
2103 myPipe[k].ViewportYStart,
2104 GPUVMEnable,
2105 HostVMEnable,
2106 HostVMMaxNonCachedPageTableLevels,
2107 GPUVMMaxPageTableLevels,
2108 GPUVMMinPageSizeKBytes[k],
2109 HostVMMinPageSize,
2110 PTEBufferSizeInRequestsForLuma[k],
2111 myPipe[k].PitchY,
2112 myPipe[k].DCCMetaPitchY,
2113 myPipe[k].BlockWidthY,
2114 myPipe[k].BlockHeightY,
2115
2116 /* Output */
2117 &MetaRowByteY[k],
2118 &PixelPTEBytesPerRowY[k],
2119 &dpte_row_width_luma_ub[k],
2120 &dpte_row_height_luma[k],
2121 &dpte_row_height_linear_luma[k],
2122 &PixelPTEBytesPerRowY_one_row_per_frame[k],
2123 &dpte_row_width_luma_ub_one_row_per_frame[k],
2124 &dpte_row_height_luma_one_row_per_frame[k],
2125 &meta_req_width[k],
2126 &meta_req_height[k],
2127 &meta_row_width[k],
2128 &meta_row_height[k],
2129 &PixelPTEReqWidthY[k],
2130 &PixelPTEReqHeightY[k],
2131 &PTERequestSizeY[k],
2132 &dpde0_bytes_per_frame_ub_l[k],
2133 &meta_pte_bytes_per_frame_ub_l[k]);
2134
2135 PrefetchSourceLinesY[k] = dml32_CalculatePrefetchSourceLines(
2136 myPipe[k].VRatio,
2137 myPipe[k].VTaps,
2138 myPipe[k].InterlaceEnable,
2139 myPipe[k].ProgressiveToInterlaceUnitInOPP,
2140 myPipe[k].SwathHeightY,
2141 myPipe[k].SourceRotation,
2142 myPipe[k].ViewportStationary,
2143 SwathWidthY[k],
2144 myPipe[k].ViewportHeight,
2145 myPipe[k].ViewportXStart,
2146 myPipe[k].ViewportYStart,
2147
2148 /* Output */
2149 &VInitPreFillY[k],
2150 &MaxNumSwathY[k]);
2151
2152 PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2153 MetaRowByte[k] = MetaRowByteY[k] + MetaRowByteC[k];
2154
2155 if (PixelPTEBytesPerRowY[k] <= 64 * PTEBufferSizeInRequestsForLuma[k] &&
2156 PixelPTEBytesPerRowC[k] <= 64 * PTEBufferSizeInRequestsForChroma[k]) {
2157 PTEBufferSizeNotExceeded[k] = true;
2158 } else {
2159 PTEBufferSizeNotExceeded[k] = false;
2160 }
2161
2162 one_row_per_frame_fits_in_buffer[k] = (PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 *
2163 PTEBufferSizeInRequestsForLuma[k] &&
2164 PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * PTEBufferSizeInRequestsForChroma[k]);
2165 }
2166
2167 dml32_CalculateMALLUseForStaticScreen(
2168 NumberOfActiveSurfaces,
2169 MALLAllocatedForDCN,
2170 UseMALLForStaticScreen, // mode
2171 SurfaceSizeInMALL,
2172 one_row_per_frame_fits_in_buffer,
2173 /* Output */
2174 UsesMALLForStaticScreen); // boolen
2175
2176 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2177 PTE_BUFFER_MODE[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2178 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2179 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2180 (GPUVMMinPageSizeKBytes[k] > 64);
2181 BIGK_FRAGMENT_SIZE[k] = dml_log2(GPUVMMinPageSizeKBytes[k] * 1024) - 12;
2182 }
2183
2184 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2185 #ifdef __DML_VBA_DEBUG__
2186 dml_print("DML::%s: k=%d, SurfaceSizeInMALL = %d\n", __func__, k, SurfaceSizeInMALL[k]);
2187 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2188 #endif
2189 use_one_row_for_frame[k] = myPipe[k].FORCE_ONE_ROW_FOR_FRAME || UsesMALLForStaticScreen[k] ||
2190 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport) ||
2191 (UseMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ||
2192 (GPUVMMinPageSizeKBytes[k] > 64 && IsVertical(myPipe[k].SourceRotation));
2193
2194 use_one_row_for_frame_flip[k] = use_one_row_for_frame[k] &&
2195 !(UseMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame);
2196
2197 if (use_one_row_for_frame[k]) {
2198 dpte_row_height_luma[k] = dpte_row_height_luma_one_row_per_frame[k];
2199 dpte_row_width_luma_ub[k] = dpte_row_width_luma_ub_one_row_per_frame[k];
2200 PixelPTEBytesPerRowY[k] = PixelPTEBytesPerRowY_one_row_per_frame[k];
2201 dpte_row_height_chroma[k] = dpte_row_height_chroma_one_row_per_frame[k];
2202 dpte_row_width_chroma_ub[k] = dpte_row_width_chroma_ub_one_row_per_frame[k];
2203 PixelPTEBytesPerRowC[k] = PixelPTEBytesPerRowC_one_row_per_frame[k];
2204 PTEBufferSizeNotExceeded[k] = one_row_per_frame_fits_in_buffer[k];
2205 }
2206
2207 if (MetaRowByte[k] <= DCCMetaBufferSizeBytes)
2208 DCCMetaBufferSizeNotExceeded[k] = true;
2209 else
2210 DCCMetaBufferSizeNotExceeded[k] = false;
2211
2212 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY[k] + PixelPTEBytesPerRowC[k];
2213 if (use_one_row_for_frame[k])
2214 PixelPTEBytesPerRow[k] = PixelPTEBytesPerRow[k] / 2;
2215
2216 dml32_CalculateRowBandwidth(
2217 GPUVMEnable,
2218 myPipe[k].SourcePixelFormat,
2219 myPipe[k].VRatio,
2220 myPipe[k].VRatioChroma,
2221 myPipe[k].DCCEnable,
2222 myPipe[k].HTotal / myPipe[k].PixelClock,
2223 MetaRowByteY[k], MetaRowByteC[k],
2224 meta_row_height[k],
2225 meta_row_height_chroma[k],
2226 PixelPTEBytesPerRowY[k],
2227 PixelPTEBytesPerRowC[k],
2228 dpte_row_height_luma[k],
2229 dpte_row_height_chroma[k],
2230
2231 /* Output */
2232 &meta_row_bw[k],
2233 &dpte_row_bw[k]);
2234 #ifdef __DML_VBA_DEBUG__
2235 dml_print("DML::%s: k=%d, use_one_row_for_frame = %d\n", __func__, k, use_one_row_for_frame[k]);
2236 dml_print("DML::%s: k=%d, use_one_row_for_frame_flip = %d\n",
2237 __func__, k, use_one_row_for_frame_flip[k]);
2238 dml_print("DML::%s: k=%d, UseMALLForPStateChange = %d\n",
2239 __func__, k, UseMALLForPStateChange[k]);
2240 dml_print("DML::%s: k=%d, dpte_row_height_luma = %d\n", __func__, k, dpte_row_height_luma[k]);
2241 dml_print("DML::%s: k=%d, dpte_row_width_luma_ub = %d\n",
2242 __func__, k, dpte_row_width_luma_ub[k]);
2243 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowY = %d\n", __func__, k, PixelPTEBytesPerRowY[k]);
2244 dml_print("DML::%s: k=%d, dpte_row_height_chroma = %d\n",
2245 __func__, k, dpte_row_height_chroma[k]);
2246 dml_print("DML::%s: k=%d, dpte_row_width_chroma_ub = %d\n",
2247 __func__, k, dpte_row_width_chroma_ub[k]);
2248 dml_print("DML::%s: k=%d, PixelPTEBytesPerRowC = %d\n", __func__, k, PixelPTEBytesPerRowC[k]);
2249 dml_print("DML::%s: k=%d, PixelPTEBytesPerRow = %d\n", __func__, k, PixelPTEBytesPerRow[k]);
2250 dml_print("DML::%s: k=%d, PTEBufferSizeNotExceeded = %d\n",
2251 __func__, k, PTEBufferSizeNotExceeded[k]);
2252 dml_print("DML::%s: k=%d, PTE_BUFFER_MODE = %d\n", __func__, k, PTE_BUFFER_MODE[k]);
2253 dml_print("DML::%s: k=%d, BIGK_FRAGMENT_SIZE = %d\n", __func__, k, BIGK_FRAGMENT_SIZE[k]);
2254 #endif
2255 }
2256 } // CalculateVMRowAndSwath
2257
dml32_CalculateVMAndRowBytes(bool ViewportStationary,bool DCCEnable,unsigned int NumberOfDPPs,unsigned int BlockHeight256Bytes,unsigned int BlockWidth256Bytes,enum source_format_class SourcePixelFormat,unsigned int SurfaceTiling,unsigned int BytePerPixel,enum dm_rotation_angle SourceRotation,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,unsigned int GPUVMMaxPageTableLevels,unsigned int GPUVMMinPageSizeKBytes,unsigned int HostVMMinPageSize,unsigned int PTEBufferSizeInRequests,unsigned int Pitch,unsigned int DCCMetaPitch,unsigned int MacroTileWidth,unsigned int MacroTileHeight,unsigned int * MetaRowByte,unsigned int * PixelPTEBytesPerRow,unsigned int * dpte_row_width_ub,unsigned int * dpte_row_height,unsigned int * dpte_row_height_linear,unsigned int * PixelPTEBytesPerRow_one_row_per_frame,unsigned int * dpte_row_width_ub_one_row_per_frame,unsigned int * dpte_row_height_one_row_per_frame,unsigned int * MetaRequestWidth,unsigned int * MetaRequestHeight,unsigned int * meta_row_width,unsigned int * meta_row_height,unsigned int * PixelPTEReqWidth,unsigned int * PixelPTEReqHeight,unsigned int * PTERequestSize,unsigned int * DPDE0BytesFrame,unsigned int * MetaPTEBytesFrame)2258 unsigned int dml32_CalculateVMAndRowBytes(
2259 bool ViewportStationary,
2260 bool DCCEnable,
2261 unsigned int NumberOfDPPs,
2262 unsigned int BlockHeight256Bytes,
2263 unsigned int BlockWidth256Bytes,
2264 enum source_format_class SourcePixelFormat,
2265 unsigned int SurfaceTiling,
2266 unsigned int BytePerPixel,
2267 enum dm_rotation_angle SourceRotation,
2268 double SwathWidth,
2269 unsigned int ViewportHeight,
2270 unsigned int ViewportXStart,
2271 unsigned int ViewportYStart,
2272 bool GPUVMEnable,
2273 bool HostVMEnable,
2274 unsigned int HostVMMaxNonCachedPageTableLevels,
2275 unsigned int GPUVMMaxPageTableLevels,
2276 unsigned int GPUVMMinPageSizeKBytes,
2277 unsigned int HostVMMinPageSize,
2278 unsigned int PTEBufferSizeInRequests,
2279 unsigned int Pitch,
2280 unsigned int DCCMetaPitch,
2281 unsigned int MacroTileWidth,
2282 unsigned int MacroTileHeight,
2283
2284 /* Output */
2285 unsigned int *MetaRowByte,
2286 unsigned int *PixelPTEBytesPerRow,
2287 unsigned int *dpte_row_width_ub,
2288 unsigned int *dpte_row_height,
2289 unsigned int *dpte_row_height_linear,
2290 unsigned int *PixelPTEBytesPerRow_one_row_per_frame,
2291 unsigned int *dpte_row_width_ub_one_row_per_frame,
2292 unsigned int *dpte_row_height_one_row_per_frame,
2293 unsigned int *MetaRequestWidth,
2294 unsigned int *MetaRequestHeight,
2295 unsigned int *meta_row_width,
2296 unsigned int *meta_row_height,
2297 unsigned int *PixelPTEReqWidth,
2298 unsigned int *PixelPTEReqHeight,
2299 unsigned int *PTERequestSize,
2300 unsigned int *DPDE0BytesFrame,
2301 unsigned int *MetaPTEBytesFrame)
2302 {
2303 unsigned int MPDEBytesFrame;
2304 unsigned int DCCMetaSurfaceBytes;
2305 unsigned int ExtraDPDEBytesFrame;
2306 unsigned int PDEAndMetaPTEBytesFrame;
2307 unsigned int HostVMDynamicLevels = 0;
2308 unsigned int MacroTileSizeBytes;
2309 unsigned int vp_height_meta_ub;
2310 unsigned int vp_height_dpte_ub;
2311 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
2312
2313 if (GPUVMEnable == true && HostVMEnable == true) {
2314 if (HostVMMinPageSize < 2048)
2315 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
2316 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
2317 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
2318 else
2319 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
2320 }
2321
2322 *MetaRequestHeight = 8 * BlockHeight256Bytes;
2323 *MetaRequestWidth = 8 * BlockWidth256Bytes;
2324 if (SurfaceTiling == dm_sw_linear) {
2325 *meta_row_height = 32;
2326 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1, *MetaRequestWidth)
2327 - dml_floor(ViewportXStart, *MetaRequestWidth);
2328 } else if (!IsVertical(SourceRotation)) {
2329 *meta_row_height = *MetaRequestHeight;
2330 if (ViewportStationary && NumberOfDPPs == 1) {
2331 *meta_row_width = dml_floor(ViewportXStart + SwathWidth + *MetaRequestWidth - 1,
2332 *MetaRequestWidth) - dml_floor(ViewportXStart, *MetaRequestWidth);
2333 } else {
2334 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
2335 }
2336 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
2337 } else {
2338 *meta_row_height = *MetaRequestWidth;
2339 if (ViewportStationary && NumberOfDPPs == 1) {
2340 *meta_row_width = dml_floor(ViewportYStart + ViewportHeight + *MetaRequestHeight - 1,
2341 *MetaRequestHeight) - dml_floor(ViewportYStart, *MetaRequestHeight);
2342 } else {
2343 *meta_row_width = dml_ceil(SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
2344 }
2345 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
2346 }
2347
2348 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2349 vp_height_meta_ub = dml_floor(ViewportYStart + ViewportHeight + 64 * BlockHeight256Bytes - 1,
2350 64 * BlockHeight256Bytes) - dml_floor(ViewportYStart, 64 * BlockHeight256Bytes);
2351 } else if (!IsVertical(SourceRotation)) {
2352 vp_height_meta_ub = dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2353 } else {
2354 vp_height_meta_ub = dml_ceil(SwathWidth - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes;
2355 }
2356
2357 DCCMetaSurfaceBytes = DCCMetaPitch * vp_height_meta_ub * BytePerPixel / 256.0;
2358
2359 if (GPUVMEnable == true) {
2360 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) /
2361 (8 * 4.0 * 1024), 1) + 1) * 64;
2362 MPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 1);
2363 } else {
2364 *MetaPTEBytesFrame = 0;
2365 MPDEBytesFrame = 0;
2366 }
2367
2368 if (DCCEnable != true) {
2369 *MetaPTEBytesFrame = 0;
2370 MPDEBytesFrame = 0;
2371 *MetaRowByte = 0;
2372 }
2373
2374 MacroTileSizeBytes = MacroTileWidth * BytePerPixel * MacroTileHeight;
2375
2376 if (GPUVMEnable == true && GPUVMMaxPageTableLevels > 1) {
2377 if (ViewportStationary && (NumberOfDPPs == 1 || !IsVertical(SourceRotation))) {
2378 vp_height_dpte_ub = dml_floor(ViewportYStart + ViewportHeight +
2379 MacroTileHeight - 1, MacroTileHeight) -
2380 dml_floor(ViewportYStart, MacroTileHeight);
2381 } else if (!IsVertical(SourceRotation)) {
2382 vp_height_dpte_ub = dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight;
2383 } else {
2384 vp_height_dpte_ub = dml_ceil(SwathWidth - 1, MacroTileHeight) + MacroTileHeight;
2385 }
2386 *DPDE0BytesFrame = 64 * (dml_ceil((Pitch * vp_height_dpte_ub * BytePerPixel - MacroTileSizeBytes) /
2387 (8 * 2097152), 1) + 1);
2388 ExtraDPDEBytesFrame = 128 * (GPUVMMaxPageTableLevels - 2);
2389 } else {
2390 *DPDE0BytesFrame = 0;
2391 ExtraDPDEBytesFrame = 0;
2392 vp_height_dpte_ub = 0;
2393 }
2394
2395 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
2396
2397 #ifdef __DML_VBA_DEBUG__
2398 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
2399 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
2400 dml_print("DML::%s: SwModeLinear = %d\n", __func__, SurfaceTiling == dm_sw_linear);
2401 dml_print("DML::%s: BytePerPixel = %d\n", __func__, BytePerPixel);
2402 dml_print("DML::%s: GPUVMMaxPageTableLevels = %d\n", __func__, GPUVMMaxPageTableLevels);
2403 dml_print("DML::%s: BlockHeight256Bytes = %d\n", __func__, BlockHeight256Bytes);
2404 dml_print("DML::%s: BlockWidth256Bytes = %d\n", __func__, BlockWidth256Bytes);
2405 dml_print("DML::%s: MacroTileHeight = %d\n", __func__, MacroTileHeight);
2406 dml_print("DML::%s: MacroTileWidth = %d\n", __func__, MacroTileWidth);
2407 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
2408 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
2409 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
2410 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
2411 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
2412 dml_print("DML::%s: ViewportHeight = %d\n", __func__, ViewportHeight);
2413 dml_print("DML::%s: SwathWidth = %d\n", __func__, SwathWidth);
2414 dml_print("DML::%s: vp_height_dpte_ub = %d\n", __func__, vp_height_dpte_ub);
2415 #endif
2416
2417 if (HostVMEnable == true)
2418 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
2419
2420 if (SurfaceTiling == dm_sw_linear) {
2421 *PixelPTEReqHeight = 1;
2422 *PixelPTEReqWidth = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2423 PixelPTEReqWidth_linear = GPUVMMinPageSizeKBytes * 1024 * 8 / BytePerPixel;
2424 *PTERequestSize = 64;
2425 } else if (GPUVMMinPageSizeKBytes == 4) {
2426 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
2427 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
2428 *PTERequestSize = 128;
2429 } else {
2430 *PixelPTEReqHeight = MacroTileHeight;
2431 *PixelPTEReqWidth = 8 * 1024 * GPUVMMinPageSizeKBytes / (MacroTileHeight * BytePerPixel);
2432 *PTERequestSize = 64;
2433 }
2434 #ifdef __DML_VBA_DEBUG__
2435 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2436 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d (after HostVM factor)\n", __func__, PDEAndMetaPTEBytesFrame);
2437 dml_print("DML::%s: PixelPTEReqHeight = %d\n", __func__, *PixelPTEReqHeight);
2438 dml_print("DML::%s: PixelPTEReqWidth = %d\n", __func__, *PixelPTEReqWidth);
2439 dml_print("DML::%s: PixelPTEReqWidth_linear = %d\n", __func__, PixelPTEReqWidth_linear);
2440 dml_print("DML::%s: PTERequestSize = %d\n", __func__, *PTERequestSize);
2441 dml_print("DML::%s: Pitch = %d\n", __func__, Pitch);
2442 #endif
2443
2444 *dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
2445 *dpte_row_width_ub_one_row_per_frame = (dml_ceil(((double)Pitch * (double)*dpte_row_height_one_row_per_frame /
2446 (double) *PixelPTEReqHeight - 1) / (double) *PixelPTEReqWidth, 1) + 1) *
2447 (double) *PixelPTEReqWidth;
2448 *PixelPTEBytesPerRow_one_row_per_frame = *dpte_row_width_ub_one_row_per_frame / *PixelPTEReqWidth *
2449 *PTERequestSize;
2450
2451 if (SurfaceTiling == dm_sw_linear) {
2452 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2453 *PixelPTEReqWidth / Pitch), 1));
2454 #ifdef __DML_VBA_DEBUG__
2455 dml_print("DML::%s: dpte_row_height = %d (1)\n", __func__,
2456 PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch);
2457 dml_print("DML::%s: dpte_row_height = %f (2)\n", __func__,
2458 dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch));
2459 dml_print("DML::%s: dpte_row_height = %f (3)\n", __func__,
2460 dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
2461 dml_print("DML::%s: dpte_row_height = %d (4)\n", __func__,
2462 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2463 *PixelPTEReqWidth / Pitch), 1));
2464 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2465 #endif
2466 *dpte_row_width_ub = dml_ceil(((double) Pitch * (double) *dpte_row_height - 1),
2467 (double) *PixelPTEReqWidth) + *PixelPTEReqWidth;
2468 *PixelPTEBytesPerRow = *dpte_row_width_ub / (double)*PixelPTEReqWidth * (double)*PTERequestSize;
2469
2470 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
2471 *dpte_row_height_linear = 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests *
2472 PixelPTEReqWidth_linear / Pitch), 1);
2473 if (*dpte_row_height_linear > 128)
2474 *dpte_row_height_linear = 128;
2475
2476 } else if (!IsVertical(SourceRotation)) {
2477 *dpte_row_height = *PixelPTEReqHeight;
2478
2479 if (GPUVMMinPageSizeKBytes > 64) {
2480 *dpte_row_width_ub = (dml_ceil((Pitch * *dpte_row_height / *PixelPTEReqHeight - 1) /
2481 *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
2482 } else if (ViewportStationary && (NumberOfDPPs == 1)) {
2483 *dpte_row_width_ub = dml_floor(ViewportXStart + SwathWidth +
2484 *PixelPTEReqWidth - 1, *PixelPTEReqWidth) -
2485 dml_floor(ViewportXStart, *PixelPTEReqWidth);
2486 } else {
2487 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) *
2488 *PixelPTEReqWidth;
2489 }
2490
2491 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
2492 } else {
2493 *dpte_row_height = dml_min(*PixelPTEReqWidth, MacroTileWidth);
2494
2495 if (ViewportStationary && (NumberOfDPPs == 1)) {
2496 *dpte_row_width_ub = dml_floor(ViewportYStart + ViewportHeight + *PixelPTEReqHeight - 1,
2497 *PixelPTEReqHeight) - dml_floor(ViewportYStart, *PixelPTEReqHeight);
2498 } else {
2499 *dpte_row_width_ub = (dml_ceil((SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1)
2500 * *PixelPTEReqHeight;
2501 }
2502
2503 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
2504 }
2505
2506 if (GPUVMEnable != true)
2507 *PixelPTEBytesPerRow = 0;
2508 if (HostVMEnable == true)
2509 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
2510
2511 #ifdef __DML_VBA_DEBUG__
2512 dml_print("DML::%s: GPUVMMinPageSizeKBytes = %d\n", __func__, GPUVMMinPageSizeKBytes);
2513 dml_print("DML::%s: dpte_row_height = %d\n", __func__, *dpte_row_height);
2514 dml_print("DML::%s: dpte_row_height_linear = %d\n", __func__, *dpte_row_height_linear);
2515 dml_print("DML::%s: dpte_row_width_ub = %d\n", __func__, *dpte_row_width_ub);
2516 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, *PixelPTEBytesPerRow);
2517 dml_print("DML::%s: PTEBufferSizeInRequests = %d\n", __func__, PTEBufferSizeInRequests);
2518 dml_print("DML::%s: dpte_row_height_one_row_per_frame = %d\n", __func__, *dpte_row_height_one_row_per_frame);
2519 dml_print("DML::%s: dpte_row_width_ub_one_row_per_frame = %d\n",
2520 __func__, *dpte_row_width_ub_one_row_per_frame);
2521 dml_print("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %d\n",
2522 __func__, *PixelPTEBytesPerRow_one_row_per_frame);
2523 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n",
2524 *MetaPTEBytesFrame);
2525 #endif
2526
2527 return PDEAndMetaPTEBytesFrame;
2528 } // CalculateVMAndRowBytes
2529
dml32_CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dm_rotation_angle SourceRotation,bool ViewportStationary,double SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,double * VInitPreFill,unsigned int * MaxNumSwath)2530 double dml32_CalculatePrefetchSourceLines(
2531 double VRatio,
2532 unsigned int VTaps,
2533 bool Interlace,
2534 bool ProgressiveToInterlaceUnitInOPP,
2535 unsigned int SwathHeight,
2536 enum dm_rotation_angle SourceRotation,
2537 bool ViewportStationary,
2538 double SwathWidth,
2539 unsigned int ViewportHeight,
2540 unsigned int ViewportXStart,
2541 unsigned int ViewportYStart,
2542
2543 /* Output */
2544 double *VInitPreFill,
2545 unsigned int *MaxNumSwath)
2546 {
2547
2548 unsigned int vp_start_rot;
2549 unsigned int sw0_tmp;
2550 unsigned int MaxPartialSwath;
2551 double numLines;
2552
2553 #ifdef __DML_VBA_DEBUG__
2554 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
2555 dml_print("DML::%s: VTaps = %d\n", __func__, VTaps);
2556 dml_print("DML::%s: ViewportXStart = %d\n", __func__, ViewportXStart);
2557 dml_print("DML::%s: ViewportYStart = %d\n", __func__, ViewportYStart);
2558 dml_print("DML::%s: ViewportStationary = %d\n", __func__, ViewportStationary);
2559 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
2560 #endif
2561 if (ProgressiveToInterlaceUnitInOPP)
2562 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1) / 2.0, 1);
2563 else
2564 *VInitPreFill = dml_floor((VRatio + (double) VTaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
2565
2566 if (ViewportStationary) {
2567 if (SourceRotation == dm_rotation_180 || SourceRotation == dm_rotation_180m) {
2568 vp_start_rot = SwathHeight -
2569 (((unsigned int) (ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
2570 } else if (SourceRotation == dm_rotation_270 || SourceRotation == dm_rotation_90m) {
2571 vp_start_rot = ViewportXStart;
2572 } else if (SourceRotation == dm_rotation_90 || SourceRotation == dm_rotation_270m) {
2573 vp_start_rot = SwathHeight -
2574 (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
2575 } else {
2576 vp_start_rot = ViewportYStart;
2577 }
2578 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
2579 if (sw0_tmp < *VInitPreFill)
2580 *MaxNumSwath = dml_ceil((*VInitPreFill - sw0_tmp) / SwathHeight, 1) + 1;
2581 else
2582 *MaxNumSwath = 1;
2583 MaxPartialSwath = dml_max(1, (unsigned int) (vp_start_rot + *VInitPreFill - 1) % SwathHeight);
2584 } else {
2585 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1;
2586 if (*VInitPreFill > 1)
2587 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill - 2) % SwathHeight);
2588 else
2589 MaxPartialSwath = dml_max(1, (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight);
2590 }
2591 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
2592
2593 #ifdef __DML_VBA_DEBUG__
2594 dml_print("DML::%s: vp_start_rot = %d\n", __func__, vp_start_rot);
2595 dml_print("DML::%s: VInitPreFill = %d\n", __func__, *VInitPreFill);
2596 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
2597 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
2598 dml_print("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
2599 #endif
2600 return numLines;
2601
2602 } // CalculatePrefetchSourceLines
2603
dml32_CalculateMALLUseForStaticScreen(unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCNFinal,enum dm_use_mall_for_static_screen_mode * UseMALLForStaticScreen,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool UsesMALLForStaticScreen[])2604 void dml32_CalculateMALLUseForStaticScreen(
2605 unsigned int NumberOfActiveSurfaces,
2606 unsigned int MALLAllocatedForDCNFinal,
2607 enum dm_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
2608 unsigned int SurfaceSizeInMALL[],
2609 bool one_row_per_frame_fits_in_buffer[],
2610
2611 /* output */
2612 bool UsesMALLForStaticScreen[])
2613 {
2614 unsigned int k;
2615 unsigned int SurfaceToAddToMALL;
2616 bool CanAddAnotherSurfaceToMALL;
2617 unsigned int TotalSurfaceSizeInMALL;
2618
2619 TotalSurfaceSizeInMALL = 0;
2620 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2621 UsesMALLForStaticScreen[k] = (UseMALLForStaticScreen[k] == dm_use_mall_static_screen_enable);
2622 if (UsesMALLForStaticScreen[k])
2623 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
2624 #ifdef __DML_VBA_DEBUG__
2625 dml_print("DML::%s: k=%d, UsesMALLForStaticScreen = %d\n", __func__, k, UsesMALLForStaticScreen[k]);
2626 dml_print("DML::%s: k=%d, TotalSurfaceSizeInMALL = %d\n", __func__, k, TotalSurfaceSizeInMALL);
2627 #endif
2628 }
2629
2630 SurfaceToAddToMALL = 0;
2631 CanAddAnotherSurfaceToMALL = true;
2632 while (CanAddAnotherSurfaceToMALL) {
2633 CanAddAnotherSurfaceToMALL = false;
2634 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2635 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCNFinal * 1024 * 1024 &&
2636 !UsesMALLForStaticScreen[k] &&
2637 UseMALLForStaticScreen[k] != dm_use_mall_static_screen_disable &&
2638 one_row_per_frame_fits_in_buffer[k] &&
2639 (!CanAddAnotherSurfaceToMALL ||
2640 SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2641 CanAddAnotherSurfaceToMALL = true;
2642 SurfaceToAddToMALL = k;
2643 #ifdef __DML_VBA_DEBUG__
2644 dml_print("DML::%s: k=%d, UseMALLForStaticScreen = %d (dis, en, optimize)\n",
2645 __func__, k, UseMALLForStaticScreen[k]);
2646 #endif
2647 }
2648 }
2649 if (CanAddAnotherSurfaceToMALL) {
2650 UsesMALLForStaticScreen[SurfaceToAddToMALL] = true;
2651 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2652
2653 #ifdef __DML_VBA_DEBUG__
2654 dml_print("DML::%s: SurfaceToAddToMALL = %d\n", __func__, SurfaceToAddToMALL);
2655 dml_print("DML::%s: TotalSurfaceSizeInMALL = %d\n", __func__, TotalSurfaceSizeInMALL);
2656 #endif
2657
2658 }
2659 }
2660 }
2661
dml32_CalculateRowBandwidth(bool GPUVMEnable,enum source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int MetaRowByteLuma,unsigned int MetaRowByteChroma,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,double * meta_row_bw,double * dpte_row_bw)2662 void dml32_CalculateRowBandwidth(
2663 bool GPUVMEnable,
2664 enum source_format_class SourcePixelFormat,
2665 double VRatio,
2666 double VRatioChroma,
2667 bool DCCEnable,
2668 double LineTime,
2669 unsigned int MetaRowByteLuma,
2670 unsigned int MetaRowByteChroma,
2671 unsigned int meta_row_height_luma,
2672 unsigned int meta_row_height_chroma,
2673 unsigned int PixelPTEBytesPerRowLuma,
2674 unsigned int PixelPTEBytesPerRowChroma,
2675 unsigned int dpte_row_height_luma,
2676 unsigned int dpte_row_height_chroma,
2677 /* Output */
2678 double *meta_row_bw,
2679 double *dpte_row_bw)
2680 {
2681 if (DCCEnable != true) {
2682 *meta_row_bw = 0;
2683 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2684 SourcePixelFormat == dm_rgbe_alpha) {
2685 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma *
2686 MetaRowByteChroma / (meta_row_height_chroma * LineTime);
2687 } else {
2688 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
2689 }
2690
2691 if (GPUVMEnable != true) {
2692 *dpte_row_bw = 0;
2693 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 ||
2694 SourcePixelFormat == dm_rgbe_alpha) {
2695 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) +
2696 VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
2697 } else {
2698 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
2699 }
2700 }
2701
dml32_CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock)2702 double dml32_CalculateUrgentLatency(
2703 double UrgentLatencyPixelDataOnly,
2704 double UrgentLatencyPixelMixedWithVMData,
2705 double UrgentLatencyVMDataOnly,
2706 bool DoUrgentLatencyAdjustment,
2707 double UrgentLatencyAdjustmentFabricClockComponent,
2708 double UrgentLatencyAdjustmentFabricClockReference,
2709 double FabricClock)
2710 {
2711 double ret;
2712
2713 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
2714 if (DoUrgentLatencyAdjustment == true) {
2715 ret = ret + UrgentLatencyAdjustmentFabricClockComponent *
2716 (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
2717 }
2718 return ret;
2719 }
2720
dml32_CalculateUrgentBurstFactor(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double CursorBufferSize,unsigned int CursorWidth,unsigned int CursorBPP,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorCursor,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)2721 void dml32_CalculateUrgentBurstFactor(
2722 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
2723 unsigned int swath_width_luma_ub,
2724 unsigned int swath_width_chroma_ub,
2725 unsigned int SwathHeightY,
2726 unsigned int SwathHeightC,
2727 double LineTime,
2728 double UrgentLatency,
2729 double CursorBufferSize,
2730 unsigned int CursorWidth,
2731 unsigned int CursorBPP,
2732 double VRatio,
2733 double VRatioC,
2734 double BytePerPixelInDETY,
2735 double BytePerPixelInDETC,
2736 unsigned int DETBufferSizeY,
2737 unsigned int DETBufferSizeC,
2738 /* Output */
2739 double *UrgentBurstFactorCursor,
2740 double *UrgentBurstFactorLuma,
2741 double *UrgentBurstFactorChroma,
2742 bool *NotEnoughUrgentLatencyHiding)
2743 {
2744 double LinesInDETLuma;
2745 double LinesInDETChroma;
2746 unsigned int LinesInCursorBuffer;
2747 double CursorBufferSizeInTime;
2748 double DETBufferSizeInTimeLuma;
2749 double DETBufferSizeInTimeChroma;
2750
2751 *NotEnoughUrgentLatencyHiding = 0;
2752
2753 if (CursorWidth > 0) {
2754 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 /
2755 (CursorWidth * CursorBPP / 8.0)), 1.0);
2756 if (VRatio > 0) {
2757 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
2758 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
2759 *NotEnoughUrgentLatencyHiding = 1;
2760 *UrgentBurstFactorCursor = 0;
2761 } else {
2762 *UrgentBurstFactorCursor = CursorBufferSizeInTime /
2763 (CursorBufferSizeInTime - UrgentLatency);
2764 }
2765 } else {
2766 *UrgentBurstFactorCursor = 1;
2767 }
2768 }
2769
2770 LinesInDETLuma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ? 1024*1024 :
2771 DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
2772
2773 if (VRatio > 0) {
2774 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
2775 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
2776 *NotEnoughUrgentLatencyHiding = 1;
2777 *UrgentBurstFactorLuma = 0;
2778 } else {
2779 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
2780 }
2781 } else {
2782 *UrgentBurstFactorLuma = 1;
2783 }
2784
2785 if (BytePerPixelInDETC > 0) {
2786 LinesInDETChroma = (UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe ?
2787 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC
2788 / swath_width_chroma_ub;
2789
2790 if (VRatio > 0) {
2791 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
2792 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
2793 *NotEnoughUrgentLatencyHiding = 1;
2794 *UrgentBurstFactorChroma = 0;
2795 } else {
2796 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma
2797 / (DETBufferSizeInTimeChroma - UrgentLatency);
2798 }
2799 } else {
2800 *UrgentBurstFactorChroma = 1;
2801 }
2802 }
2803 } // CalculateUrgentBurstFactor
2804
dml32_CalculateDCFCLKDeepSleep(unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],double VRatio[],double VRatioChroma[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)2805 void dml32_CalculateDCFCLKDeepSleep(
2806 unsigned int NumberOfActiveSurfaces,
2807 unsigned int BytePerPixelY[],
2808 unsigned int BytePerPixelC[],
2809 double VRatio[],
2810 double VRatioChroma[],
2811 double SwathWidthY[],
2812 double SwathWidthC[],
2813 unsigned int DPPPerSurface[],
2814 double HRatio[],
2815 double HRatioChroma[],
2816 double PixelClock[],
2817 double PSCL_THROUGHPUT[],
2818 double PSCL_THROUGHPUT_CHROMA[],
2819 double Dppclk[],
2820 double ReadBandwidthLuma[],
2821 double ReadBandwidthChroma[],
2822 unsigned int ReturnBusWidth,
2823
2824 /* Output */
2825 double *DCFClkDeepSleep)
2826 {
2827 unsigned int k;
2828 double DisplayPipeLineDeliveryTimeLuma;
2829 double DisplayPipeLineDeliveryTimeChroma;
2830 double DCFClkDeepSleepPerSurface[DC__NUM_DPP__MAX];
2831 double ReadBandwidth = 0.0;
2832
2833 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
2834
2835 if (VRatio[k] <= 1) {
2836 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / HRatio[k]
2837 / PixelClock[k];
2838 } else {
2839 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
2840 }
2841 if (BytePerPixelC[k] == 0) {
2842 DisplayPipeLineDeliveryTimeChroma = 0;
2843 } else {
2844 if (VRatioChroma[k] <= 1) {
2845 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] *
2846 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
2847 } else {
2848 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k]
2849 / Dppclk[k];
2850 }
2851 }
2852
2853 if (BytePerPixelC[k] > 0) {
2854 DCFClkDeepSleepPerSurface[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] *
2855 BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
2856 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] /
2857 32.0 / DisplayPipeLineDeliveryTimeChroma);
2858 } else {
2859 DCFClkDeepSleepPerSurface[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] /
2860 64.0 / DisplayPipeLineDeliveryTimeLuma;
2861 }
2862 DCFClkDeepSleepPerSurface[k] = dml_max(DCFClkDeepSleepPerSurface[k], PixelClock[k] / 16);
2863
2864 #ifdef __DML_VBA_DEBUG__
2865 dml_print("DML::%s: k=%d, PixelClock = %f\n", __func__, k, PixelClock[k]);
2866 dml_print("DML::%s: k=%d, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
2867 #endif
2868 }
2869
2870 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2871 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
2872
2873 *DCFClkDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / (double) ReturnBusWidth);
2874
2875 #ifdef __DML_VBA_DEBUG__
2876 dml_print("DML::%s: __DML_MIN_DCFCLK_FACTOR__ = %f\n", __func__, __DML_MIN_DCFCLK_FACTOR__);
2877 dml_print("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
2878 dml_print("DML::%s: ReturnBusWidth = %d\n", __func__, ReturnBusWidth);
2879 dml_print("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
2880 #endif
2881
2882 for (k = 0; k < NumberOfActiveSurfaces; ++k)
2883 *DCFClkDeepSleep = dml_max(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
2884 #ifdef __DML_VBA_DEBUG__
2885 dml_print("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
2886 #endif
2887 } // CalculateDCFCLKDeepSleep
2888
dml32_CalculateWriteBackDelay(enum source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)2889 double dml32_CalculateWriteBackDelay(
2890 enum source_format_class WritebackPixelFormat,
2891 double WritebackHRatio,
2892 double WritebackVRatio,
2893 unsigned int WritebackVTaps,
2894 unsigned int WritebackDestinationWidth,
2895 unsigned int WritebackDestinationHeight,
2896 unsigned int WritebackSourceHeight,
2897 unsigned int HTotal)
2898 {
2899 double CalculateWriteBackDelay;
2900 double Line_length;
2901 double Output_lines_last_notclamped;
2902 double WritebackVInit;
2903
2904 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
2905 Line_length = dml_max((double) WritebackDestinationWidth,
2906 dml_ceil((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
2907 Output_lines_last_notclamped = WritebackDestinationHeight - 1 -
2908 dml_ceil(((double)WritebackSourceHeight -
2909 (double) WritebackVInit) / (double)WritebackVRatio, 1.0);
2910 if (Output_lines_last_notclamped < 0) {
2911 CalculateWriteBackDelay = 0;
2912 } else {
2913 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length +
2914 (HTotal - WritebackDestinationWidth) + 80;
2915 }
2916 return CalculateWriteBackDelay;
2917 }
2918
dml32_UseMinimumDCFCLK(enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool DRRDisplay[],bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,unsigned int MaxInterDCNTileRepeaters,unsigned int MaxPrefetchMode,double DRAMClockChangeLatencyFinal,double FCLKChangeLatency,double SREnterPlusExitTime,unsigned int ReturnBusWidth,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,unsigned int PixelChunkSizeInKByte,unsigned int MetaChunkSize,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels,bool DynamicMetadataVMEnabled,bool ImmediateFlipRequirement,bool ProgressiveToInterlaceUnitInOPP,double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,unsigned int VTotal[],unsigned int VActive[],unsigned int DynamicMetadataTransmittedBytes[],unsigned int DynamicMetadataLinesBeforeActiveRequired[],bool Interlace[],double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],double RequiredDISPCLK[][2],double UrgLatency[],unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],double ProjectedDCFClkDeepSleep[][2],double MaximumVStartup[][2][DC__NUM_DPP__MAX],unsigned int TotalNumberOfActiveDPP[][2],unsigned int TotalNumberOfDCCActiveDPP[][2],unsigned int dpte_group_bytes[],double PrefetchLinesY[][2][DC__NUM_DPP__MAX],double PrefetchLinesC[][2][DC__NUM_DPP__MAX],unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int HTotal[],double PixelClock[],double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],double MetaRowBytes[][2][DC__NUM_DPP__MAX],bool DynamicMetadataEnable[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double DCFCLKPerState[],double DCFCLKState[][2])2919 void dml32_UseMinimumDCFCLK(
2920 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
2921 bool DRRDisplay[],
2922 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
2923 unsigned int MaxInterDCNTileRepeaters,
2924 unsigned int MaxPrefetchMode,
2925 double DRAMClockChangeLatencyFinal,
2926 double FCLKChangeLatency,
2927 double SREnterPlusExitTime,
2928 unsigned int ReturnBusWidth,
2929 unsigned int RoundTripPingLatencyCycles,
2930 unsigned int ReorderingBytes,
2931 unsigned int PixelChunkSizeInKByte,
2932 unsigned int MetaChunkSize,
2933 bool GPUVMEnable,
2934 unsigned int GPUVMMaxPageTableLevels,
2935 bool HostVMEnable,
2936 unsigned int NumberOfActiveSurfaces,
2937 double HostVMMinPageSize,
2938 unsigned int HostVMMaxNonCachedPageTableLevels,
2939 bool DynamicMetadataVMEnabled,
2940 bool ImmediateFlipRequirement,
2941 bool ProgressiveToInterlaceUnitInOPP,
2942 double MaxAveragePercentOfIdealSDPPortBWDisplayCanUseInNormalSystemOperation,
2943 double PercentOfIdealSDPPortBWReceivedAfterUrgLatency,
2944 unsigned int VTotal[],
2945 unsigned int VActive[],
2946 unsigned int DynamicMetadataTransmittedBytes[],
2947 unsigned int DynamicMetadataLinesBeforeActiveRequired[],
2948 bool Interlace[],
2949 double RequiredDPPCLKPerSurface[][2][DC__NUM_DPP__MAX],
2950 double RequiredDISPCLK[][2],
2951 double UrgLatency[],
2952 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
2953 double ProjectedDCFClkDeepSleep[][2],
2954 double MaximumVStartup[][2][DC__NUM_DPP__MAX],
2955 unsigned int TotalNumberOfActiveDPP[][2],
2956 unsigned int TotalNumberOfDCCActiveDPP[][2],
2957 unsigned int dpte_group_bytes[],
2958 double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
2959 double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
2960 unsigned int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
2961 unsigned int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
2962 unsigned int BytePerPixelY[],
2963 unsigned int BytePerPixelC[],
2964 unsigned int HTotal[],
2965 double PixelClock[],
2966 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
2967 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
2968 double MetaRowBytes[][2][DC__NUM_DPP__MAX],
2969 bool DynamicMetadataEnable[],
2970 double ReadBandwidthLuma[],
2971 double ReadBandwidthChroma[],
2972 double DCFCLKPerState[],
2973 /* Output */
2974 double DCFCLKState[][2])
2975 {
2976 unsigned int i, j, k;
2977 unsigned int dummy1;
2978 double dummy2, dummy3;
2979 double NormalEfficiency;
2980 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
2981
2982 NormalEfficiency = PercentOfIdealSDPPortBWReceivedAfterUrgLatency / 100.0;
2983 for (i = 0; i < DC__VOLTAGE_STATES; ++i) {
2984 for (j = 0; j <= 1; ++j) {
2985 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
2986 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
2987 double DCFCLKRequiredForPeakBandwidthPerSurface[DC__NUM_DPP__MAX];
2988 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
2989 double MinimumTWait = 0.0;
2990 double DPTEBandwidth;
2991 double DCFCLKRequiredForAverageBandwidth;
2992 unsigned int ExtraLatencyBytes;
2993 double ExtraLatencyCycles;
2994 double DCFCLKRequiredForPeakBandwidth;
2995 unsigned int NoOfDPPState[DC__NUM_DPP__MAX];
2996 double MinimumTvmPlus2Tr0;
2997
2998 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
2999 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3000 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
3001 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k]
3002 / (15.75 * HTotal[k] / PixelClock[k]);
3003 }
3004
3005 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k)
3006 NoOfDPPState[k] = NoOfDPP[i][j][k];
3007
3008 DPTEBandwidth = TotalMaxPrefetchFlipDPTERowBandwidth[i][j];
3009 DCFCLKRequiredForAverageBandwidth = dml_max(ProjectedDCFClkDeepSleep[i][j], DPTEBandwidth / NormalEfficiency / ReturnBusWidth);
3010
3011 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(ReorderingBytes,
3012 TotalNumberOfActiveDPP[i][j], PixelChunkSizeInKByte,
3013 TotalNumberOfDCCActiveDPP[i][j], MetaChunkSize, GPUVMEnable, HostVMEnable,
3014 NumberOfActiveSurfaces, NoOfDPPState, dpte_group_bytes, 1, HostVMMinPageSize,
3015 HostVMMaxNonCachedPageTableLevels);
3016 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__
3017 + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
3018 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3019 double DCFCLKCyclesRequiredInPrefetch;
3020 double PrefetchTime;
3021
3022 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k]
3023 * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
3024 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k]
3025 * BytePerPixelC[k]) / NormalEfficiency
3026 / ReturnBusWidth;
3027 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
3028 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency
3029 / NormalEfficiency / ReturnBusWidth
3030 * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
3031 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency
3032 / ReturnBusWidth
3033 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth
3034 + PixelDCFCLKCyclesRequiredInPrefetch[k];
3035 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k])
3036 * HTotal[k] / PixelClock[k];
3037 DynamicMetadataVMExtraLatency[k] = (GPUVMEnable == true &&
3038 DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
3039 UrgLatency[i] * GPUVMMaxPageTableLevels *
3040 (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
3041
3042 MinimumTWait = dml32_CalculateTWait(MaxPrefetchMode,
3043 UseMALLForPStateChange[k],
3044 SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3045 DRRDisplay[k],
3046 DRAMClockChangeLatencyFinal,
3047 FCLKChangeLatency,
3048 UrgLatency[i],
3049 SREnterPlusExitTime);
3050
3051 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] -
3052 MinimumTWait - UrgLatency[i] *
3053 ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels :
3054 GPUVMMaxPageTableLevels - 2) * (HostVMEnable == true ?
3055 HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) -
3056 DynamicMetadataVMExtraLatency[k];
3057
3058 if (PrefetchTime > 0) {
3059 double ExpectedVRatioPrefetch;
3060
3061 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime *
3062 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3063 DCFCLKCyclesRequiredInPrefetch);
3064 DCFCLKRequiredForPeakBandwidthPerSurface[k] = NoOfDPPState[k] *
3065 PixelDCFCLKCyclesRequiredInPrefetch[k] /
3066 PrefetchPixelLinesTime[k] *
3067 dml_max(1.0, ExpectedVRatioPrefetch) *
3068 dml_max(1.0, ExpectedVRatioPrefetch / 4);
3069 if (HostVMEnable == true || ImmediateFlipRequirement == true) {
3070 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3071 DCFCLKRequiredForPeakBandwidthPerSurface[k] +
3072 NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency /
3073 NormalEfficiency / ReturnBusWidth;
3074 }
3075 } else {
3076 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3077 }
3078 if (DynamicMetadataEnable[k] == true) {
3079 double TSetupPipe;
3080 double TdmbfPipe;
3081 double TdmsksPipe;
3082 double TdmecPipe;
3083 double AllowedTimeForUrgentExtraLatency;
3084
3085 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3086 MaxInterDCNTileRepeaters,
3087 RequiredDPPCLKPerSurface[i][j][k],
3088 RequiredDISPCLK[i][j],
3089 ProjectedDCFClkDeepSleep[i][j],
3090 PixelClock[k],
3091 HTotal[k],
3092 VTotal[k] - VActive[k],
3093 DynamicMetadataTransmittedBytes[k],
3094 DynamicMetadataLinesBeforeActiveRequired[k],
3095 Interlace[k],
3096 ProgressiveToInterlaceUnitInOPP,
3097
3098 /* output */
3099 &TSetupPipe,
3100 &TdmbfPipe,
3101 &TdmecPipe,
3102 &TdmsksPipe,
3103 &dummy1,
3104 &dummy2,
3105 &dummy3);
3106 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] /
3107 PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe -
3108 TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
3109 if (AllowedTimeForUrgentExtraLatency > 0)
3110 DCFCLKRequiredForPeakBandwidthPerSurface[k] =
3111 dml_max(DCFCLKRequiredForPeakBandwidthPerSurface[k],
3112 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
3113 else
3114 DCFCLKRequiredForPeakBandwidthPerSurface[k] = DCFCLKPerState[i];
3115 }
3116 }
3117 DCFCLKRequiredForPeakBandwidth = 0;
3118 for (k = 0; k <= NumberOfActiveSurfaces - 1; ++k) {
3119 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth +
3120 DCFCLKRequiredForPeakBandwidthPerSurface[k];
3121 }
3122 MinimumTvmPlus2Tr0 = UrgLatency[i] * (GPUVMEnable == true ?
3123 (HostVMEnable == true ? (GPUVMMaxPageTableLevels + 2) *
3124 (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 0);
3125 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3126 double MaximumTvmPlus2Tr0PlusTsw;
3127
3128 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] /
3129 PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
3130 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
3131 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
3132 } else {
3133 DCFCLKRequiredForPeakBandwidth = dml_max3(DCFCLKRequiredForPeakBandwidth,
3134 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw -
3135 MinimumTvmPlus2Tr0 -
3136 PrefetchPixelLinesTime[k] / 4),
3137 (2 * ExtraLatencyCycles +
3138 PixelDCFCLKCyclesRequiredInPrefetch[k]) /
3139 (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
3140 }
3141 }
3142 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 *
3143 dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
3144 }
3145 }
3146 }
3147
dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3148 unsigned int dml32_CalculateExtraLatencyBytes(unsigned int ReorderingBytes,
3149 unsigned int TotalNumberOfActiveDPP,
3150 unsigned int PixelChunkSizeInKByte,
3151 unsigned int TotalNumberOfDCCActiveDPP,
3152 unsigned int MetaChunkSize,
3153 bool GPUVMEnable,
3154 bool HostVMEnable,
3155 unsigned int NumberOfActiveSurfaces,
3156 unsigned int NumberOfDPP[],
3157 unsigned int dpte_group_bytes[],
3158 double HostVMInefficiencyFactor,
3159 double HostVMMinPageSize,
3160 unsigned int HostVMMaxNonCachedPageTableLevels)
3161 {
3162 unsigned int k;
3163 double ret;
3164 unsigned int HostVMDynamicLevels;
3165
3166 if (GPUVMEnable == true && HostVMEnable == true) {
3167 if (HostVMMinPageSize < 2048)
3168 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
3169 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
3170 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
3171 else
3172 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
3173 } else {
3174 HostVMDynamicLevels = 0;
3175 }
3176
3177 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte +
3178 TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
3179
3180 if (GPUVMEnable == true) {
3181 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
3182 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] *
3183 (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
3184 }
3185 }
3186 return ret;
3187 }
3188
dml32_CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3189 void dml32_CalculateVUpdateAndDynamicMetadataParameters(
3190 unsigned int MaxInterDCNTileRepeaters,
3191 double Dppclk,
3192 double Dispclk,
3193 double DCFClkDeepSleep,
3194 double PixelClock,
3195 unsigned int HTotal,
3196 unsigned int VBlank,
3197 unsigned int DynamicMetadataTransmittedBytes,
3198 unsigned int DynamicMetadataLinesBeforeActiveRequired,
3199 unsigned int InterlaceEnable,
3200 bool ProgressiveToInterlaceUnitInOPP,
3201
3202 /* output */
3203 double *TSetup,
3204 double *Tdmbf,
3205 double *Tdmec,
3206 double *Tdmsks,
3207 unsigned int *VUpdateOffsetPix,
3208 double *VUpdateWidthPix,
3209 double *VReadyOffsetPix)
3210 {
3211 double TotalRepeaterDelayTime;
3212
3213 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
3214 *VUpdateWidthPix =
3215 dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0);
3216 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / Dppclk,
3217 TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0);
3218 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1.0);
3219 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3220 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
3221 *Tdmec = HTotal / PixelClock;
3222
3223 if (DynamicMetadataLinesBeforeActiveRequired == 0)
3224 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3225 else
3226 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3227
3228 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false)
3229 *Tdmsks = *Tdmsks / 2;
3230 #ifdef __DML_VBA_DEBUG__
3231 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3232 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3233 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3234
3235 dml_print("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %d\n",
3236 __func__, DynamicMetadataLinesBeforeActiveRequired);
3237 dml_print("DML::%s: VBlank = %d\n", __func__, VBlank);
3238 dml_print("DML::%s: HTotal = %d\n", __func__, HTotal);
3239 dml_print("DML::%s: PixelClock = %f\n", __func__, PixelClock);
3240 dml_print("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
3241 #endif
3242 }
3243
dml32_CalculateTWait(unsigned int PrefetchMode,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,bool DRRDisplay,double DRAMClockChangeLatency,double FCLKChangeLatency,double UrgentLatency,double SREnterPlusExitTime)3244 double dml32_CalculateTWait(
3245 unsigned int PrefetchMode,
3246 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange,
3247 bool SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
3248 bool DRRDisplay,
3249 double DRAMClockChangeLatency,
3250 double FCLKChangeLatency,
3251 double UrgentLatency,
3252 double SREnterPlusExitTime)
3253 {
3254 double TWait = 0.0;
3255
3256 if (PrefetchMode == 0 &&
3257 !(UseMALLForPStateChange == dm_use_mall_pstate_change_full_frame) &&
3258 !(UseMALLForPStateChange == dm_use_mall_pstate_change_sub_viewport) &&
3259 !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe) &&
3260 !(SynchronizeDRRDisplaysForUCLKPStateChangeFinal && DRRDisplay)) {
3261 TWait = dml_max3(DRAMClockChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3262 } else if (PrefetchMode <= 1 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3263 TWait = dml_max3(FCLKChangeLatency + UrgentLatency, SREnterPlusExitTime, UrgentLatency);
3264 } else if (PrefetchMode <= 2 && !(UseMALLForPStateChange == dm_use_mall_pstate_change_phantom_pipe)) {
3265 TWait = dml_max(SREnterPlusExitTime, UrgentLatency);
3266 } else {
3267 TWait = UrgentLatency;
3268 }
3269
3270 #ifdef __DML_VBA_DEBUG__
3271 dml_print("DML::%s: PrefetchMode = %d\n", __func__, PrefetchMode);
3272 dml_print("DML::%s: TWait = %f\n", __func__, TWait);
3273 #endif
3274 return TWait;
3275 } // CalculateTWait
3276
3277 // Function: get_return_bw_mbps
3278 // Megabyte per second
dml32_get_return_bw_mbps(const soc_bounding_box_st * soc,const int VoltageLevel,const bool HostVMEnable,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3279 double dml32_get_return_bw_mbps(const soc_bounding_box_st *soc,
3280 const int VoltageLevel,
3281 const bool HostVMEnable,
3282 const double DCFCLK,
3283 const double FabricClock,
3284 const double DRAMSpeed)
3285 {
3286 double ReturnBW = 0.;
3287 double IdealSDPPortBandwidth = soc->return_bus_width_bytes /*mode_lib->vba.ReturnBusWidth*/ * DCFCLK;
3288 double IdealFabricBandwidth = FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes;
3289 double IdealDRAMBandwidth = DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes;
3290 double PixelDataOnlyReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3291 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3292 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3293 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3294 double PixelMixedWithVMDataReturnBW = dml_min3(IdealSDPPortBandwidth * soc->pct_ideal_sdp_bw_after_urgent / 100,
3295 IdealFabricBandwidth * soc->pct_ideal_fabric_bw_after_urgent / 100,
3296 IdealDRAMBandwidth * (VoltageLevel < 2 ? soc->pct_ideal_dram_bw_after_urgent_strobe :
3297 soc->pct_ideal_dram_sdp_bw_after_urgent_pixel_only) / 100);
3298
3299 if (HostVMEnable != true)
3300 ReturnBW = PixelDataOnlyReturnBW;
3301 else
3302 ReturnBW = PixelMixedWithVMDataReturnBW;
3303
3304 #ifdef __DML_VBA_DEBUG__
3305 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3306 dml_print("DML::%s: HostVMEnable = %d\n", __func__, HostVMEnable);
3307 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3308 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3309 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3310 dml_print("DML::%s: IdealSDPPortBandwidth = %f\n", __func__, IdealSDPPortBandwidth);
3311 dml_print("DML::%s: IdealFabricBandwidth = %f\n", __func__, IdealFabricBandwidth);
3312 dml_print("DML::%s: IdealDRAMBandwidth = %f\n", __func__, IdealDRAMBandwidth);
3313 dml_print("DML::%s: PixelDataOnlyReturnBW = %f\n", __func__, PixelDataOnlyReturnBW);
3314 dml_print("DML::%s: PixelMixedWithVMDataReturnBW = %f\n", __func__, PixelMixedWithVMDataReturnBW);
3315 dml_print("DML::%s: ReturnBW = %f MBps\n", __func__, ReturnBW);
3316 #endif
3317 return ReturnBW;
3318 }
3319
3320 // Function: get_return_bw_mbps_vm_only
3321 // Megabyte per second
dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st * soc,const int VoltageLevel,const double DCFCLK,const double FabricClock,const double DRAMSpeed)3322 double dml32_get_return_bw_mbps_vm_only(const soc_bounding_box_st *soc,
3323 const int VoltageLevel,
3324 const double DCFCLK,
3325 const double FabricClock,
3326 const double DRAMSpeed)
3327 {
3328 double VMDataOnlyReturnBW = dml_min3(
3329 soc->return_bus_width_bytes * DCFCLK * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3330 FabricClock * soc->fabric_datapath_to_dcn_data_return_bytes
3331 * soc->pct_ideal_sdp_bw_after_urgent / 100.0,
3332 DRAMSpeed * soc->num_chans * soc->dram_channel_width_bytes
3333 * (VoltageLevel < 2 ?
3334 soc->pct_ideal_dram_bw_after_urgent_strobe :
3335 soc->pct_ideal_dram_sdp_bw_after_urgent_vm_only) / 100.0);
3336 #ifdef __DML_VBA_DEBUG__
3337 dml_print("DML::%s: VoltageLevel = %d\n", __func__, VoltageLevel);
3338 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
3339 dml_print("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3340 dml_print("DML::%s: DRAMSpeed = %f\n", __func__, DRAMSpeed);
3341 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
3342 #endif
3343 return VMDataOnlyReturnBW;
3344 }
3345
dml32_CalculateExtraLatency(unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,unsigned int TotalNumberOfActiveDPP,unsigned int PixelChunkSizeInKByte,unsigned int TotalNumberOfDCCActiveDPP,unsigned int MetaChunkSize,double ReturnBW,bool GPUVMEnable,bool HostVMEnable,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],double HostVMInefficiencyFactor,double HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)3346 double dml32_CalculateExtraLatency(
3347 unsigned int RoundTripPingLatencyCycles,
3348 unsigned int ReorderingBytes,
3349 double DCFCLK,
3350 unsigned int TotalNumberOfActiveDPP,
3351 unsigned int PixelChunkSizeInKByte,
3352 unsigned int TotalNumberOfDCCActiveDPP,
3353 unsigned int MetaChunkSize,
3354 double ReturnBW,
3355 bool GPUVMEnable,
3356 bool HostVMEnable,
3357 unsigned int NumberOfActiveSurfaces,
3358 unsigned int NumberOfDPP[],
3359 unsigned int dpte_group_bytes[],
3360 double HostVMInefficiencyFactor,
3361 double HostVMMinPageSize,
3362 unsigned int HostVMMaxNonCachedPageTableLevels)
3363 {
3364 double ExtraLatencyBytes;
3365 double ExtraLatency;
3366
3367 ExtraLatencyBytes = dml32_CalculateExtraLatencyBytes(
3368 ReorderingBytes,
3369 TotalNumberOfActiveDPP,
3370 PixelChunkSizeInKByte,
3371 TotalNumberOfDCCActiveDPP,
3372 MetaChunkSize,
3373 GPUVMEnable,
3374 HostVMEnable,
3375 NumberOfActiveSurfaces,
3376 NumberOfDPP,
3377 dpte_group_bytes,
3378 HostVMInefficiencyFactor,
3379 HostVMMinPageSize,
3380 HostVMMaxNonCachedPageTableLevels);
3381
3382 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
3383
3384 #ifdef __DML_VBA_DEBUG__
3385 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
3386 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
3387 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
3388 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
3389 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
3390 #endif
3391
3392 return ExtraLatency;
3393 } // CalculateExtraLatency
3394
dml32_CalculatePrefetchSchedule(struct vba_vars_st * v,unsigned int k,double HostVMInefficiencyFactor,DmlPipe * myPipe,unsigned int DSCDelay,unsigned int DPP_RECOUT_WIDTH,unsigned int VStartup,unsigned int MaxVStartup,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,unsigned int VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,unsigned int VInitPreFillC,unsigned int MaxNumSwathC,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double TPreReq,bool ExtendPrefetchIfPossible,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,unsigned int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)3395 bool dml32_CalculatePrefetchSchedule(
3396 struct vba_vars_st *v,
3397 unsigned int k,
3398 double HostVMInefficiencyFactor,
3399 DmlPipe *myPipe,
3400 unsigned int DSCDelay,
3401 unsigned int DPP_RECOUT_WIDTH,
3402 unsigned int VStartup,
3403 unsigned int MaxVStartup,
3404 double UrgentLatency,
3405 double UrgentExtraLatency,
3406 double TCalc,
3407 unsigned int PDEAndMetaPTEBytesFrame,
3408 unsigned int MetaRowByte,
3409 unsigned int PixelPTEBytesPerRow,
3410 double PrefetchSourceLinesY,
3411 unsigned int SwathWidthY,
3412 unsigned int VInitPreFillY,
3413 unsigned int MaxNumSwathY,
3414 double PrefetchSourceLinesC,
3415 unsigned int SwathWidthC,
3416 unsigned int VInitPreFillC,
3417 unsigned int MaxNumSwathC,
3418 unsigned int swath_width_luma_ub,
3419 unsigned int swath_width_chroma_ub,
3420 unsigned int SwathHeightY,
3421 unsigned int SwathHeightC,
3422 double TWait,
3423 double TPreReq,
3424 bool ExtendPrefetchIfPossible,
3425 /* Output */
3426 double *DSTXAfterScaler,
3427 double *DSTYAfterScaler,
3428 double *DestinationLinesForPrefetch,
3429 double *PrefetchBandwidth,
3430 double *DestinationLinesToRequestVMInVBlank,
3431 double *DestinationLinesToRequestRowInVBlank,
3432 double *VRatioPrefetchY,
3433 double *VRatioPrefetchC,
3434 double *RequiredPrefetchPixDataBWLuma,
3435 double *RequiredPrefetchPixDataBWChroma,
3436 bool *NotEnoughTimeForDynamicMetadata,
3437 double *Tno_bw,
3438 double *prefetch_vmrow_bw,
3439 double *Tdmdl_vm,
3440 double *Tdmdl,
3441 double *TSetup,
3442 unsigned int *VUpdateOffsetPix,
3443 double *VUpdateWidthPix,
3444 double *VReadyOffsetPix)
3445 {
3446 double DPPCLKDelaySubtotalPlusCNVCFormater = v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater;
3447 bool MyError = false;
3448 unsigned int DPPCycles, DISPCLKCycles;
3449 double DSTTotalPixelsAfterScaler;
3450 double LineTime;
3451 double dst_y_prefetch_equ;
3452 double prefetch_bw_oto;
3453 double Tvm_oto;
3454 double Tr0_oto;
3455 double Tvm_oto_lines;
3456 double Tr0_oto_lines;
3457 double dst_y_prefetch_oto;
3458 double TimeForFetchingMetaPTE = 0;
3459 double TimeForFetchingRowInVBlank = 0;
3460 double LinesToRequestPrefetchPixelData = 0;
3461 double LinesForPrefetchBandwidth = 0;
3462 unsigned int HostVMDynamicLevelsTrips;
3463 double trip_to_mem;
3464 double Tvm_trips;
3465 double Tr0_trips;
3466 double Tvm_trips_rounded;
3467 double Tr0_trips_rounded;
3468 double Lsw_oto;
3469 double Tpre_rounded;
3470 double prefetch_bw_equ;
3471 double Tvm_equ;
3472 double Tr0_equ;
3473 double Tdmbf;
3474 double Tdmec;
3475 double Tdmsks;
3476 double prefetch_sw_bytes;
3477 double bytes_pp;
3478 double dep_bytes;
3479 unsigned int max_vratio_pre = v->MaxVRatioPre;
3480 double min_Lsw;
3481 double Tsw_est1 = 0;
3482 double Tsw_est3 = 0;
3483
3484 if (v->GPUVMEnable == true && v->HostVMEnable == true)
3485 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3486 else
3487 HostVMDynamicLevelsTrips = 0;
3488 #ifdef __DML_VBA_DEBUG__
3489 dml_print("DML::%s: v->GPUVMEnable = %d\n", __func__, v->GPUVMEnable);
3490 dml_print("DML::%s: v->GPUVMMaxPageTableLevels = %d\n", __func__, v->GPUVMMaxPageTableLevels);
3491 dml_print("DML::%s: DCCEnable = %d\n", __func__, myPipe->DCCEnable);
3492 dml_print("DML::%s: v->HostVMEnable=%d HostVMInefficiencyFactor=%f\n",
3493 __func__, v->HostVMEnable, HostVMInefficiencyFactor);
3494 #endif
3495 dml32_CalculateVUpdateAndDynamicMetadataParameters(
3496 v->MaxInterDCNTileRepeaters,
3497 myPipe->Dppclk,
3498 myPipe->Dispclk,
3499 myPipe->DCFClkDeepSleep,
3500 myPipe->PixelClock,
3501 myPipe->HTotal,
3502 myPipe->VBlank,
3503 v->DynamicMetadataTransmittedBytes[k],
3504 v->DynamicMetadataLinesBeforeActiveRequired[k],
3505 myPipe->InterlaceEnable,
3506 myPipe->ProgressiveToInterlaceUnitInOPP,
3507 TSetup,
3508
3509 /* output */
3510 &Tdmbf,
3511 &Tdmec,
3512 &Tdmsks,
3513 VUpdateOffsetPix,
3514 VUpdateWidthPix,
3515 VReadyOffsetPix);
3516
3517 LineTime = myPipe->HTotal / myPipe->PixelClock;
3518 trip_to_mem = UrgentLatency;
3519 Tvm_trips = UrgentExtraLatency + trip_to_mem * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
3520
3521 if (v->DynamicMetadataVMEnabled == true)
3522 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
3523 else
3524 *Tdmdl = TWait + UrgentExtraLatency;
3525
3526 #ifdef __DML_VBA_ALLOW_DELTA__
3527 if (v->DynamicMetadataEnable[k] == false)
3528 *Tdmdl = 0.0;
3529 #endif
3530
3531 if (v->DynamicMetadataEnable[k] == true) {
3532 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
3533 *NotEnoughTimeForDynamicMetadata = true;
3534 #ifdef __DML_VBA_DEBUG__
3535 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
3536 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n",
3537 __func__, Tdmbf);
3538 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3539 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n",
3540 __func__, Tdmsks);
3541 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n",
3542 __func__, *Tdmdl);
3543 #endif
3544 } else {
3545 *NotEnoughTimeForDynamicMetadata = false;
3546 }
3547 } else {
3548 *NotEnoughTimeForDynamicMetadata = false;
3549 }
3550
3551 *Tdmdl_vm = (v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true &&
3552 v->GPUVMEnable == true ? TWait + Tvm_trips : 0);
3553
3554 if (myPipe->ScalerEnabled)
3555 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCL;
3556 else
3557 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + v->DPPCLKDelaySCLLBOnly;
3558
3559 DPPCycles = DPPCycles + myPipe->NumberOfCursors * v->DPPCLKDelayCNVCCursor;
3560
3561 DISPCLKCycles = v->DISPCLKDelaySubtotal;
3562
3563 if (myPipe->Dppclk == 0.0 || myPipe->Dispclk == 0.0)
3564 return true;
3565
3566 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->Dppclk + DISPCLKCycles *
3567 myPipe->PixelClock / myPipe->Dispclk + DSCDelay;
3568
3569 *DSTXAfterScaler = *DSTXAfterScaler + (myPipe->ODMMode != dm_odm_combine_mode_disabled ? 18 : 0)
3570 + (myPipe->DPPPerSurface - 1) * DPP_RECOUT_WIDTH
3571 + ((myPipe->ODMMode == dm_odm_split_mode_1to2 || myPipe->ODMMode == dm_odm_mode_mso_1to2) ?
3572 myPipe->HActive / 2 : 0)
3573 + ((myPipe->ODMMode == dm_odm_mode_mso_1to4) ? myPipe->HActive * 3 / 4 : 0);
3574
3575 #ifdef __DML_VBA_DEBUG__
3576 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
3577 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
3578 dml_print("DML::%s: Dppclk: %f\n", __func__, myPipe->Dppclk);
3579 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
3580 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->Dispclk);
3581 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
3582 dml_print("DML::%s: ODMMode: %d\n", __func__, myPipe->ODMMode);
3583 dml_print("DML::%s: DPP_RECOUT_WIDTH: %d\n", __func__, DPP_RECOUT_WIDTH);
3584 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
3585 #endif
3586
3587 if (v->OutputFormat[k] == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
3588 *DSTYAfterScaler = 1;
3589 else
3590 *DSTYAfterScaler = 0;
3591
3592 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
3593 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
3594 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
3595 #ifdef __DML_VBA_DEBUG__
3596 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
3597 dml_print("DML::%s: DSTYAfterScaler: %d (final)\n", __func__, *DSTYAfterScaler);
3598 #endif
3599
3600 MyError = false;
3601
3602 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
3603
3604 if (v->GPUVMEnable == true) {
3605 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1.0) / 4.0 * LineTime;
3606 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3607 if (v->GPUVMMaxPageTableLevels >= 3) {
3608 *Tno_bw = UrgentExtraLatency + trip_to_mem *
3609 (double) ((v->GPUVMMaxPageTableLevels - 2) * (HostVMDynamicLevelsTrips + 1) - 1);
3610 } else if (v->GPUVMMaxPageTableLevels == 1 && myPipe->DCCEnable != true) {
3611 Tr0_trips_rounded = dml_ceil(4.0 * UrgentExtraLatency / LineTime, 1.0) /
3612 4.0 * LineTime; // VBA_ERROR
3613 *Tno_bw = UrgentExtraLatency;
3614 } else {
3615 *Tno_bw = 0;
3616 }
3617 } else if (myPipe->DCCEnable == true) {
3618 Tvm_trips_rounded = LineTime / 4.0;
3619 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1.0) / 4.0 * LineTime;
3620 *Tno_bw = 0;
3621 } else {
3622 Tvm_trips_rounded = LineTime / 4.0;
3623 Tr0_trips_rounded = LineTime / 2.0;
3624 *Tno_bw = 0;
3625 }
3626 Tvm_trips_rounded = dml_max(Tvm_trips_rounded, LineTime / 4.0);
3627 Tr0_trips_rounded = dml_max(Tr0_trips_rounded, LineTime / 4.0);
3628
3629 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10
3630 || myPipe->SourcePixelFormat == dm_420_12) {
3631 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
3632 } else {
3633 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
3634 }
3635
3636 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY
3637 + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
3638 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerSurface,
3639 prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
3640
3641 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre;
3642 min_Lsw = dml_max(min_Lsw, 1.0);
3643 Lsw_oto = dml_ceil(4.0 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1.0) / 4.0;
3644
3645 if (v->GPUVMEnable == true) {
3646 Tvm_oto = dml_max3(
3647 Tvm_trips,
3648 *Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto,
3649 LineTime / 4.0);
3650 } else
3651 Tvm_oto = LineTime / 4.0;
3652
3653 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3654 Tr0_oto = dml_max4(
3655 Tr0_trips,
3656 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto,
3657 (LineTime - Tvm_oto)/2.0,
3658 LineTime / 4.0);
3659 #ifdef __DML_VBA_DEBUG__
3660 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__,
3661 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto);
3662 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, Tr0_trips);
3663 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, LineTime - Tvm_oto);
3664 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, LineTime / 4);
3665 #endif
3666 } else
3667 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
3668
3669 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
3670 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
3671 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
3672
3673 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
3674 (*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
3675
3676 dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
3677 #ifdef __DML_VBA_DEBUG__
3678 dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
3679 dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
3680 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *Tno_bw);
3681 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, UrgentExtraLatency);
3682 dml_print("DML::%s: trip_to_mem = %f\n", __func__, trip_to_mem);
3683 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
3684 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3685 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3686 dml_print("DML::%s: BytePerPixelC = %d\n", __func__, myPipe->BytePerPixelC);
3687 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3688 dml_print("DML::%s: swath_width_chroma_ub = %d\n", __func__, swath_width_chroma_ub);
3689 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, prefetch_sw_bytes);
3690 dml_print("DML::%s: bytes_pp = %f\n", __func__, bytes_pp);
3691 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
3692 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
3693 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
3694 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
3695 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
3696 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
3697 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
3698 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
3699 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
3700 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, Tvm_oto_lines);
3701 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, Tr0_oto_lines);
3702 dml_print("DML::%s: Lsw_oto = %f\n", __func__, Lsw_oto);
3703 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, dst_y_prefetch_oto);
3704 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, dst_y_prefetch_equ);
3705 #endif
3706
3707 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
3708 Tpre_rounded = dst_y_prefetch_equ * LineTime;
3709 #ifdef __DML_VBA_DEBUG__
3710 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, dst_y_prefetch_equ);
3711 dml_print("DML::%s: LineTime: %f\n", __func__, LineTime);
3712 dml_print("DML::%s: VStartup: %d\n", __func__, VStartup);
3713 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n",
3714 __func__, VStartup * LineTime);
3715 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *TSetup);
3716 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, TCalc);
3717 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
3718 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
3719 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd\n", __func__, *Tdmdl_vm);
3720 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl);
3721 dml_print("DML::%s: DSTYAfterScaler: %d lines - number of lines of pipeline and buffer delay after scaler\n",
3722 __func__, *DSTYAfterScaler);
3723 #endif
3724 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor,
3725 MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
3726
3727 if (prefetch_sw_bytes < dep_bytes)
3728 prefetch_sw_bytes = 2 * dep_bytes;
3729
3730 *PrefetchBandwidth = 0;
3731 *DestinationLinesToRequestVMInVBlank = 0;
3732 *DestinationLinesToRequestRowInVBlank = 0;
3733 *VRatioPrefetchY = 0;
3734 *VRatioPrefetchC = 0;
3735 *RequiredPrefetchPixDataBWLuma = 0;
3736 if (dst_y_prefetch_equ > 1 &&
3737 (Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
3738 double PrefetchBandwidth1;
3739 double PrefetchBandwidth2;
3740 double PrefetchBandwidth3;
3741 double PrefetchBandwidth4;
3742
3743 if (Tpre_rounded - *Tno_bw > 0) {
3744 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3745 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3746 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
3747 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
3748 } else
3749 PrefetchBandwidth1 = 0;
3750
3751 if (VStartup == MaxVStartup && (Tsw_est1 / LineTime < min_Lsw)
3752 && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
3753 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte
3754 + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3755 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
3756 }
3757
3758 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
3759 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) /
3760 (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
3761 else
3762 PrefetchBandwidth2 = 0;
3763
3764 if (Tpre_rounded - Tvm_trips_rounded > 0) {
3765 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
3766 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
3767 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
3768 } else
3769 PrefetchBandwidth3 = 0;
3770
3771
3772 if (VStartup == MaxVStartup &&
3773 (Tsw_est3 / LineTime < min_Lsw) && Tpre_rounded - min_Lsw * LineTime - 0.75 *
3774 LineTime - Tvm_trips_rounded > 0) {
3775 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3776 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
3777 }
3778
3779 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) {
3780 PrefetchBandwidth4 = prefetch_sw_bytes /
3781 (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
3782 } else {
3783 PrefetchBandwidth4 = 0;
3784 }
3785
3786 #ifdef __DML_VBA_DEBUG__
3787 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
3788 dml_print("DML::%s: Tno_bw: %f\n", __func__, *Tno_bw);
3789 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
3790 dml_print("DML::%s: Tsw_est1: %f\n", __func__, Tsw_est1);
3791 dml_print("DML::%s: Tsw_est3: %f\n", __func__, Tsw_est3);
3792 dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, PrefetchBandwidth1);
3793 dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, PrefetchBandwidth2);
3794 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
3795 dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, PrefetchBandwidth4);
3796 #endif
3797 {
3798 bool Case1OK;
3799 bool Case2OK;
3800 bool Case3OK;
3801
3802 if (PrefetchBandwidth1 > 0) {
3803 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1
3804 >= Tvm_trips_rounded
3805 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3806 / PrefetchBandwidth1 >= Tr0_trips_rounded) {
3807 Case1OK = true;
3808 } else {
3809 Case1OK = false;
3810 }
3811 } else {
3812 Case1OK = false;
3813 }
3814
3815 if (PrefetchBandwidth2 > 0) {
3816 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2
3817 >= Tvm_trips_rounded
3818 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor)
3819 / PrefetchBandwidth2 < Tr0_trips_rounded) {
3820 Case2OK = true;
3821 } else {
3822 Case2OK = false;
3823 }
3824 } else {
3825 Case2OK = false;
3826 }
3827
3828 if (PrefetchBandwidth3 > 0) {
3829 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 <
3830 Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow *
3831 HostVMInefficiencyFactor) / PrefetchBandwidth3 >=
3832 Tr0_trips_rounded) {
3833 Case3OK = true;
3834 } else {
3835 Case3OK = false;
3836 }
3837 } else {
3838 Case3OK = false;
3839 }
3840
3841 if (Case1OK)
3842 prefetch_bw_equ = PrefetchBandwidth1;
3843 else if (Case2OK)
3844 prefetch_bw_equ = PrefetchBandwidth2;
3845 else if (Case3OK)
3846 prefetch_bw_equ = PrefetchBandwidth3;
3847 else
3848 prefetch_bw_equ = PrefetchBandwidth4;
3849
3850 #ifdef __DML_VBA_DEBUG__
3851 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
3852 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
3853 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
3854 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
3855 #endif
3856
3857 if (prefetch_bw_equ > 0) {
3858 if (v->GPUVMEnable == true) {
3859 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame *
3860 HostVMInefficiencyFactor / prefetch_bw_equ,
3861 Tvm_trips, LineTime / 4);
3862 } else {
3863 Tvm_equ = LineTime / 4;
3864 }
3865
3866 if ((v->GPUVMEnable == true || myPipe->DCCEnable == true)) {
3867 Tr0_equ = dml_max4((MetaRowByte + PixelPTEBytesPerRow *
3868 HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips,
3869 (LineTime - Tvm_equ) / 2, LineTime / 4);
3870 } else {
3871 Tr0_equ = (LineTime - Tvm_equ) / 2;
3872 }
3873 } else {
3874 Tvm_equ = 0;
3875 Tr0_equ = 0;
3876 #ifdef __DML_VBA_DEBUG__
3877 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
3878 #endif
3879 }
3880 }
3881
3882 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
3883 if (dst_y_prefetch_oto * LineTime < TPreReq) {
3884 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3885 } else {
3886 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
3887 }
3888 TimeForFetchingMetaPTE = Tvm_oto;
3889 TimeForFetchingRowInVBlank = Tr0_oto;
3890 *PrefetchBandwidth = prefetch_bw_oto;
3891 /* Clamp to oto for bandwidth calculation */
3892 LinesForPrefetchBandwidth = dst_y_prefetch_oto;
3893 } else {
3894 /* For mode programming we want to extend the prefetch as much as possible
3895 * (up to oto, or as long as we can for equ) if we're not already applying
3896 * the 60us prefetch requirement. This is to avoid intermittent underflow
3897 * issues during prefetch.
3898 *
3899 * The prefetch extension is applied under the following scenarios:
3900 * 1. We're in prefetch mode > 0 (i.e. we don't support MCLK switch in blank)
3901 * 2. We're using subvp or drr methods of p-state switch, in which case we
3902 * we don't care if prefetch takes up more of the blanking time
3903 *
3904 * Mode programming typically chooses the smallest prefetch time possible
3905 * (i.e. highest bandwidth during prefetch) presumably to create margin between
3906 * p-states / c-states that happen in vblank and prefetch. Therefore we only
3907 * apply this prefetch extension when p-state in vblank is not required (UCLK
3908 * p-states take up the most vblank time).
3909 */
3910 if (ExtendPrefetchIfPossible && TPreReq == 0 && VStartup < MaxVStartup) {
3911 MyError = true;
3912 } else {
3913 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
3914 TimeForFetchingMetaPTE = Tvm_equ;
3915 TimeForFetchingRowInVBlank = Tr0_equ;
3916 *PrefetchBandwidth = prefetch_bw_equ;
3917 /* Clamp to equ for bandwidth calculation */
3918 LinesForPrefetchBandwidth = dst_y_prefetch_equ;
3919 }
3920 }
3921
3922 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
3923
3924 *DestinationLinesToRequestRowInVBlank =
3925 dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
3926
3927 LinesToRequestPrefetchPixelData = LinesForPrefetchBandwidth -
3928 *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
3929
3930 #ifdef __DML_VBA_DEBUG__
3931 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
3932 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
3933 __func__, *DestinationLinesToRequestVMInVBlank);
3934 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
3935 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
3936 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
3937 __func__, *DestinationLinesToRequestRowInVBlank);
3938 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3939 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
3940 #endif
3941
3942 if (LinesToRequestPrefetchPixelData >= 1 && prefetch_bw_equ > 0) {
3943 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
3944 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3945 #ifdef __DML_VBA_DEBUG__
3946 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3947 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
3948 dml_print("DML::%s: VInitPreFillY = %d\n", __func__, VInitPreFillY);
3949 #endif
3950 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
3951 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
3952 *VRatioPrefetchY =
3953 dml_max((double) PrefetchSourceLinesY /
3954 LinesToRequestPrefetchPixelData,
3955 (double) MaxNumSwathY * SwathHeightY /
3956 (LinesToRequestPrefetchPixelData -
3957 (VInitPreFillY - 3.0) / 2.0));
3958 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
3959 } else {
3960 MyError = true;
3961 *VRatioPrefetchY = 0;
3962 }
3963 #ifdef __DML_VBA_DEBUG__
3964 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
3965 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
3966 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
3967 #endif
3968 }
3969
3970 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
3971 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3972
3973 #ifdef __DML_VBA_DEBUG__
3974 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3975 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
3976 dml_print("DML::%s: VInitPreFillC = %d\n", __func__, VInitPreFillC);
3977 #endif
3978 if ((SwathHeightC > 4)) {
3979 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
3980 *VRatioPrefetchC =
3981 dml_max(*VRatioPrefetchC,
3982 (double) MaxNumSwathC * SwathHeightC /
3983 (LinesToRequestPrefetchPixelData -
3984 (VInitPreFillC - 3.0) / 2.0));
3985 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
3986 } else {
3987 MyError = true;
3988 *VRatioPrefetchC = 0;
3989 }
3990 #ifdef __DML_VBA_DEBUG__
3991 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
3992 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
3993 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
3994 #endif
3995 }
3996
3997 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY
3998 / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub
3999 / LineTime;
4000
4001 #ifdef __DML_VBA_DEBUG__
4002 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
4003 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
4004 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4005 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n",
4006 __func__, *RequiredPrefetchPixDataBWLuma);
4007 #endif
4008 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC /
4009 LinesToRequestPrefetchPixelData
4010 * myPipe->BytePerPixelC
4011 * swath_width_chroma_ub / LineTime;
4012 } else {
4013 MyError = true;
4014 #ifdef __DML_VBA_DEBUG__
4015 dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n",
4016 __func__, LinesToRequestPrefetchPixelData);
4017 #endif
4018 *VRatioPrefetchY = 0;
4019 *VRatioPrefetchC = 0;
4020 *RequiredPrefetchPixDataBWLuma = 0;
4021 *RequiredPrefetchPixDataBWChroma = 0;
4022 }
4023 #ifdef __DML_VBA_DEBUG__
4024 dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
4025 (double)LinesToRequestPrefetchPixelData * LineTime +
4026 2.0*TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
4027 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
4028 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
4029 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime);
4030 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
4031 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime -
4032 TimeForFetchingMetaPTE - 2*TimeForFetchingRowInVBlank - (*DSTYAfterScaler +
4033 ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
4034 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n",
4035 PixelPTEBytesPerRow);
4036 #endif
4037 } else {
4038 MyError = true;
4039 #ifdef __DML_VBA_DEBUG__
4040 dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n",
4041 __func__, dst_y_prefetch_equ);
4042 #endif
4043 }
4044
4045 {
4046 double prefetch_vm_bw;
4047 double prefetch_row_bw;
4048
4049 if (PDEAndMetaPTEBytesFrame == 0) {
4050 prefetch_vm_bw = 0;
4051 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
4052 #ifdef __DML_VBA_DEBUG__
4053 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
4054 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
4055 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n",
4056 __func__, *DestinationLinesToRequestVMInVBlank);
4057 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
4058 #endif
4059 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor /
4060 (*DestinationLinesToRequestVMInVBlank * LineTime);
4061 #ifdef __DML_VBA_DEBUG__
4062 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
4063 #endif
4064 } else {
4065 prefetch_vm_bw = 0;
4066 MyError = true;
4067 #ifdef __DML_VBA_DEBUG__
4068 dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n",
4069 __func__, *DestinationLinesToRequestVMInVBlank);
4070 #endif
4071 }
4072
4073 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
4074 prefetch_row_bw = 0;
4075 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
4076 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) /
4077 (*DestinationLinesToRequestRowInVBlank * LineTime);
4078
4079 #ifdef __DML_VBA_DEBUG__
4080 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
4081 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
4082 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n",
4083 __func__, *DestinationLinesToRequestRowInVBlank);
4084 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
4085 #endif
4086 } else {
4087 prefetch_row_bw = 0;
4088 MyError = true;
4089 #ifdef __DML_VBA_DEBUG__
4090 dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n",
4091 __func__, *DestinationLinesToRequestRowInVBlank);
4092 #endif
4093 }
4094
4095 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
4096 }
4097
4098 if (MyError) {
4099 *PrefetchBandwidth = 0;
4100 TimeForFetchingMetaPTE = 0;
4101 TimeForFetchingRowInVBlank = 0;
4102 *DestinationLinesToRequestVMInVBlank = 0;
4103 *DestinationLinesToRequestRowInVBlank = 0;
4104 *DestinationLinesForPrefetch = 0;
4105 LinesToRequestPrefetchPixelData = 0;
4106 *VRatioPrefetchY = 0;
4107 *VRatioPrefetchC = 0;
4108 *RequiredPrefetchPixDataBWLuma = 0;
4109 *RequiredPrefetchPixDataBWChroma = 0;
4110 }
4111
4112 return MyError;
4113 } // CalculatePrefetchSchedule
4114
dml32_CalculateFlipSchedule(double HostVMInefficiencyFactor,double UrgentExtraLatency,double UrgentLatency,unsigned int GPUVMMaxPageTableLevels,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,bool GPUVMEnable,double HostVMMinPageSize,double PDEAndMetaPTEBytesPerFrame,double MetaRowBytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw,bool DCCEnable,unsigned int dpte_row_height,unsigned int meta_row_height,unsigned int dpte_row_height_chroma,unsigned int meta_row_height_chroma,bool use_one_row_for_frame_flip,double * DestinationLinesToRequestVMInImmediateFlip,double * DestinationLinesToRequestRowInImmediateFlip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)4115 void dml32_CalculateFlipSchedule(
4116 double HostVMInefficiencyFactor,
4117 double UrgentExtraLatency,
4118 double UrgentLatency,
4119 unsigned int GPUVMMaxPageTableLevels,
4120 bool HostVMEnable,
4121 unsigned int HostVMMaxNonCachedPageTableLevels,
4122 bool GPUVMEnable,
4123 double HostVMMinPageSize,
4124 double PDEAndMetaPTEBytesPerFrame,
4125 double MetaRowBytes,
4126 double DPTEBytesPerRow,
4127 double BandwidthAvailableForImmediateFlip,
4128 unsigned int TotImmediateFlipBytes,
4129 enum source_format_class SourcePixelFormat,
4130 double LineTime,
4131 double VRatio,
4132 double VRatioChroma,
4133 double Tno_bw,
4134 bool DCCEnable,
4135 unsigned int dpte_row_height,
4136 unsigned int meta_row_height,
4137 unsigned int dpte_row_height_chroma,
4138 unsigned int meta_row_height_chroma,
4139 bool use_one_row_for_frame_flip,
4140
4141 /* Output */
4142 double *DestinationLinesToRequestVMInImmediateFlip,
4143 double *DestinationLinesToRequestRowInImmediateFlip,
4144 double *final_flip_bw,
4145 bool *ImmediateFlipSupportedForPipe)
4146 {
4147 double min_row_time = 0.0;
4148 unsigned int HostVMDynamicLevelsTrips;
4149 double TimeForFetchingMetaPTEImmediateFlip;
4150 double TimeForFetchingRowInVBlankImmediateFlip;
4151 double ImmediateFlipBW = 1.0;
4152
4153 if (GPUVMEnable == true && HostVMEnable == true)
4154 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
4155 else
4156 HostVMDynamicLevelsTrips = 0;
4157
4158 #ifdef __DML_VBA_DEBUG__
4159 dml_print("DML::%s: TotImmediateFlipBytes = %d\n", __func__, TotImmediateFlipBytes);
4160 dml_print("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
4161 #endif
4162
4163 if (TotImmediateFlipBytes > 0) {
4164 if (use_one_row_for_frame_flip) {
4165 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + 2 * DPTEBytesPerRow) *
4166 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4167 } else {
4168 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) *
4169 BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes;
4170 }
4171 if (GPUVMEnable == true) {
4172 TimeForFetchingMetaPTEImmediateFlip = dml_max3(Tno_bw + PDEAndMetaPTEBytesPerFrame *
4173 HostVMInefficiencyFactor / ImmediateFlipBW,
4174 UrgentExtraLatency + UrgentLatency *
4175 (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
4176 LineTime / 4.0);
4177 } else {
4178 TimeForFetchingMetaPTEImmediateFlip = 0;
4179 }
4180 if ((GPUVMEnable == true || DCCEnable == true)) {
4181 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
4182 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
4183 UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4.0);
4184 } else {
4185 TimeForFetchingRowInVBlankImmediateFlip = 0;
4186 }
4187
4188 *DestinationLinesToRequestVMInImmediateFlip =
4189 dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1.0) / 4.0;
4190 *DestinationLinesToRequestRowInImmediateFlip =
4191 dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1.0) / 4.0;
4192
4193 if (GPUVMEnable == true) {
4194 *final_flip_bw = dml_max(PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor /
4195 (*DestinationLinesToRequestVMInImmediateFlip * LineTime),
4196 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4197 (*DestinationLinesToRequestRowInImmediateFlip * LineTime));
4198 } else if ((GPUVMEnable == true || DCCEnable == true)) {
4199 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) /
4200 (*DestinationLinesToRequestRowInImmediateFlip * LineTime);
4201 } else {
4202 *final_flip_bw = 0;
4203 }
4204 } else {
4205 TimeForFetchingMetaPTEImmediateFlip = 0;
4206 TimeForFetchingRowInVBlankImmediateFlip = 0;
4207 *DestinationLinesToRequestVMInImmediateFlip = 0;
4208 *DestinationLinesToRequestRowInImmediateFlip = 0;
4209 *final_flip_bw = 0;
4210 }
4211
4212 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) {
4213 if (GPUVMEnable == true && DCCEnable != true) {
4214 min_row_time = dml_min(dpte_row_height *
4215 LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma);
4216 } else if (GPUVMEnable != true && DCCEnable == true) {
4217 min_row_time = dml_min(meta_row_height *
4218 LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma);
4219 } else {
4220 min_row_time = dml_min4(dpte_row_height * LineTime / VRatio, meta_row_height *
4221 LineTime / VRatio, dpte_row_height_chroma * LineTime /
4222 VRatioChroma, meta_row_height_chroma * LineTime / VRatioChroma);
4223 }
4224 } else {
4225 if (GPUVMEnable == true && DCCEnable != true) {
4226 min_row_time = dpte_row_height * LineTime / VRatio;
4227 } else if (GPUVMEnable != true && DCCEnable == true) {
4228 min_row_time = meta_row_height * LineTime / VRatio;
4229 } else {
4230 min_row_time =
4231 dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio);
4232 }
4233 }
4234
4235 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16
4236 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip
4237 > min_row_time) {
4238 *ImmediateFlipSupportedForPipe = false;
4239 } else {
4240 *ImmediateFlipSupportedForPipe = true;
4241 }
4242
4243 #ifdef __DML_VBA_DEBUG__
4244 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
4245 dml_print("DML::%s: DCCEnable = %d\n", __func__, DCCEnable);
4246 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n",
4247 __func__, *DestinationLinesToRequestVMInImmediateFlip);
4248 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n",
4249 __func__, *DestinationLinesToRequestRowInImmediateFlip);
4250 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
4251 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n",
4252 __func__, TimeForFetchingRowInVBlankImmediateFlip);
4253 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
4254 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe);
4255 #endif
4256 } // CalculateFlipSchedule
4257
dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct vba_vars_st * v,unsigned int PrefetchMode,double DCFCLK,double ReturnBW,SOCParametersList mmSOCParameters,double SOCCLK,double DCFClkDeepSleep,unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double SwathWidthY[],double SwathWidthC[],unsigned int DPPPerSurface[],double BytePerPixelDETY[],double BytePerPixelDETC[],double DSTXAfterScaler[],double DSTYAfterScaler[],bool UnboundedRequestEnabled,unsigned int CompressedBufferSizeInkByte,enum clock_change_support * DRAMClockChangeSupport,double MaxActiveDRAMClockChangeLatencySupported[],unsigned int SubViewportLinesNeededInMALL[],enum dm_fclock_change_support * FCLKChangeSupport,double * MinActiveFCLKChangeLatencySupported,bool * USRRetrainingSupport,double ActiveDRAMClockChangeLatencyMargin[])4258 void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
4259 struct vba_vars_st *v,
4260 unsigned int PrefetchMode,
4261 double DCFCLK,
4262 double ReturnBW,
4263 SOCParametersList mmSOCParameters,
4264 double SOCCLK,
4265 double DCFClkDeepSleep,
4266 unsigned int DETBufferSizeY[],
4267 unsigned int DETBufferSizeC[],
4268 unsigned int SwathHeightY[],
4269 unsigned int SwathHeightC[],
4270 double SwathWidthY[],
4271 double SwathWidthC[],
4272 unsigned int DPPPerSurface[],
4273 double BytePerPixelDETY[],
4274 double BytePerPixelDETC[],
4275 double DSTXAfterScaler[],
4276 double DSTYAfterScaler[],
4277 bool UnboundedRequestEnabled,
4278 unsigned int CompressedBufferSizeInkByte,
4279
4280 /* Output */
4281 enum clock_change_support *DRAMClockChangeSupport,
4282 double MaxActiveDRAMClockChangeLatencySupported[],
4283 unsigned int SubViewportLinesNeededInMALL[],
4284 enum dm_fclock_change_support *FCLKChangeSupport,
4285 double *MinActiveFCLKChangeLatencySupported,
4286 bool *USRRetrainingSupport,
4287 double ActiveDRAMClockChangeLatencyMargin[])
4288 {
4289 unsigned int i, j, k;
4290 unsigned int SurfaceWithMinActiveFCLKChangeMargin = 0;
4291 unsigned int DRAMClockChangeSupportNumber = 0;
4292 unsigned int LastSurfaceWithoutMargin = 0;
4293 unsigned int DRAMClockChangeMethod = 0;
4294 bool FoundFirstSurfaceWithMinActiveFCLKChangeMargin = false;
4295 double MinActiveFCLKChangeMargin = 0.;
4296 double SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = 0.;
4297 double ActiveClockChangeLatencyHidingY;
4298 double ActiveClockChangeLatencyHidingC;
4299 double ActiveClockChangeLatencyHiding;
4300 double EffectiveDETBufferSizeY;
4301 double ActiveFCLKChangeLatencyMargin[DC__NUM_DPP__MAX];
4302 double USRRetrainingLatencyMargin[DC__NUM_DPP__MAX];
4303 double TotalPixelBW = 0.0;
4304 bool SynchronizedSurfaces[DC__NUM_DPP__MAX][DC__NUM_DPP__MAX];
4305 double EffectiveLBLatencyHidingY;
4306 double EffectiveLBLatencyHidingC;
4307 double LinesInDETY[DC__NUM_DPP__MAX];
4308 double LinesInDETC[DC__NUM_DPP__MAX];
4309 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
4310 unsigned int LinesInDETCRoundedDownToSwath[DC__NUM_DPP__MAX];
4311 double FullDETBufferingTimeY;
4312 double FullDETBufferingTimeC;
4313 double WritebackDRAMClockChangeLatencyMargin;
4314 double WritebackFCLKChangeLatencyMargin;
4315 double WritebackLatencyHiding;
4316 bool SameTimingForFCLKChange;
4317
4318 unsigned int TotalActiveWriteback = 0;
4319 unsigned int LBLatencyHidingSourceLinesY[DC__NUM_DPP__MAX];
4320 unsigned int LBLatencyHidingSourceLinesC[DC__NUM_DPP__MAX];
4321
4322 v->Watermark.UrgentWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency;
4323 v->Watermark.USRRetrainingWatermark = mmSOCParameters.UrgentLatency + mmSOCParameters.ExtraLatency
4324 + mmSOCParameters.USRRetrainingLatency + mmSOCParameters.SMNLatency;
4325 v->Watermark.DRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency + v->Watermark.UrgentWatermark;
4326 v->Watermark.FCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency + v->Watermark.UrgentWatermark;
4327 v->Watermark.StutterExitWatermark = mmSOCParameters.SRExitTime + mmSOCParameters.ExtraLatency
4328 + 10 / DCFClkDeepSleep;
4329 v->Watermark.StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitTime + mmSOCParameters.ExtraLatency
4330 + 10 / DCFClkDeepSleep;
4331 v->Watermark.Z8StutterExitWatermark = mmSOCParameters.SRExitZ8Time + mmSOCParameters.ExtraLatency
4332 + 10 / DCFClkDeepSleep;
4333 v->Watermark.Z8StutterEnterPlusExitWatermark = mmSOCParameters.SREnterPlusExitZ8Time
4334 + mmSOCParameters.ExtraLatency + 10 / DCFClkDeepSleep;
4335
4336 #ifdef __DML_VBA_DEBUG__
4337 dml_print("DML::%s: UrgentLatency = %f\n", __func__, mmSOCParameters.UrgentLatency);
4338 dml_print("DML::%s: ExtraLatency = %f\n", __func__, mmSOCParameters.ExtraLatency);
4339 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, mmSOCParameters.DRAMClockChangeLatency);
4340 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->Watermark.UrgentWatermark);
4341 dml_print("DML::%s: USRRetrainingWatermark = %f\n", __func__, v->Watermark.USRRetrainingWatermark);
4342 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->Watermark.DRAMClockChangeWatermark);
4343 dml_print("DML::%s: FCLKChangeWatermark = %f\n", __func__, v->Watermark.FCLKChangeWatermark);
4344 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, v->Watermark.StutterExitWatermark);
4345 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, v->Watermark.StutterEnterPlusExitWatermark);
4346 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, v->Watermark.Z8StutterExitWatermark);
4347 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n",
4348 __func__, v->Watermark.Z8StutterEnterPlusExitWatermark);
4349 #endif
4350
4351
4352 TotalActiveWriteback = 0;
4353 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4354 if (v->WritebackEnable[k] == true)
4355 TotalActiveWriteback = TotalActiveWriteback + 1;
4356 }
4357
4358 if (TotalActiveWriteback <= 1) {
4359 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency;
4360 } else {
4361 v->Watermark.WritebackUrgentWatermark = mmSOCParameters.WritebackLatency
4362 + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4363 }
4364 if (v->USRRetrainingRequiredFinal)
4365 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4366 + mmSOCParameters.USRRetrainingLatency;
4367
4368 if (TotalActiveWriteback <= 1) {
4369 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4370 + mmSOCParameters.WritebackLatency;
4371 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4372 + mmSOCParameters.WritebackLatency;
4373 } else {
4374 v->Watermark.WritebackDRAMClockChangeWatermark = mmSOCParameters.DRAMClockChangeLatency
4375 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
4376 v->Watermark.WritebackFCLKChangeWatermark = mmSOCParameters.FCLKChangeLatency
4377 + mmSOCParameters.WritebackLatency + v->WritebackChunkSize * 1024 / 32 / SOCCLK;
4378 }
4379
4380 if (v->USRRetrainingRequiredFinal)
4381 v->Watermark.WritebackDRAMClockChangeWatermark = v->Watermark.WritebackDRAMClockChangeWatermark
4382 + mmSOCParameters.USRRetrainingLatency;
4383
4384 if (v->USRRetrainingRequiredFinal)
4385 v->Watermark.WritebackFCLKChangeWatermark = v->Watermark.WritebackFCLKChangeWatermark
4386 + mmSOCParameters.USRRetrainingLatency;
4387
4388 #ifdef __DML_VBA_DEBUG__
4389 dml_print("DML::%s: WritebackDRAMClockChangeWatermark = %f\n",
4390 __func__, v->Watermark.WritebackDRAMClockChangeWatermark);
4391 dml_print("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, v->Watermark.WritebackFCLKChangeWatermark);
4392 dml_print("DML::%s: WritebackUrgentWatermark = %f\n", __func__, v->Watermark.WritebackUrgentWatermark);
4393 dml_print("DML::%s: v->USRRetrainingRequiredFinal = %d\n", __func__, v->USRRetrainingRequiredFinal);
4394 dml_print("DML::%s: USRRetrainingLatency = %f\n", __func__, mmSOCParameters.USRRetrainingLatency);
4395 #endif
4396
4397 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4398 TotalPixelBW = TotalPixelBW + DPPPerSurface[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] +
4399 SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]);
4400 }
4401
4402 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4403
4404 LBLatencyHidingSourceLinesY[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
4405 LBLatencyHidingSourceLinesC[k] = dml_min((double) v->MaxLineBufferLines, dml_floor(v->LineBufferSizeFinal / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
4406
4407
4408 #ifdef __DML_VBA_DEBUG__
4409 dml_print("DML::%s: k=%d, v->MaxLineBufferLines = %d\n", __func__, k, v->MaxLineBufferLines);
4410 dml_print("DML::%s: k=%d, v->LineBufferSizeFinal = %d\n", __func__, k, v->LineBufferSizeFinal);
4411 dml_print("DML::%s: k=%d, v->LBBitPerPixel = %d\n", __func__, k, v->LBBitPerPixel[k]);
4412 dml_print("DML::%s: k=%d, v->HRatio = %f\n", __func__, k, v->HRatio[k]);
4413 dml_print("DML::%s: k=%d, v->vtaps = %d\n", __func__, k, v->vtaps[k]);
4414 #endif
4415
4416 EffectiveLBLatencyHidingY = LBLatencyHidingSourceLinesY[k] / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
4417 EffectiveLBLatencyHidingC = LBLatencyHidingSourceLinesC[k] / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
4418 EffectiveDETBufferSizeY = DETBufferSizeY[k];
4419
4420 if (UnboundedRequestEnabled) {
4421 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
4422 + CompressedBufferSizeInkByte * 1024
4423 * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k])
4424 / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
4425 }
4426
4427 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
4428 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
4429 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
4430
4431 ActiveClockChangeLatencyHidingY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
4432 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k];
4433
4434 if (v->NumberOfActiveSurfaces > 1) {
4435 ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
4436 - (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
4437 / v->PixelClock[k] / v->VRatio[k];
4438 }
4439
4440 if (BytePerPixelDETC[k] > 0) {
4441 LinesInDETC[k] = DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
4442 LinesInDETCRoundedDownToSwath[k] = dml_floor(LinesInDETC[k], SwathHeightC[k]);
4443 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k])
4444 / v->VRatioChroma[k];
4445 ActiveClockChangeLatencyHidingC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
4446 - (DSTXAfterScaler[k] / v->HTotal[k] + DSTYAfterScaler[k]) * v->HTotal[k]
4447 / v->PixelClock[k];
4448 if (v->NumberOfActiveSurfaces > 1) {
4449 ActiveClockChangeLatencyHidingC = ActiveClockChangeLatencyHidingC
4450 - (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightC[k] * v->HTotal[k]
4451 / v->PixelClock[k] / v->VRatioChroma[k];
4452 }
4453 ActiveClockChangeLatencyHiding = dml_min(ActiveClockChangeLatencyHidingY,
4454 ActiveClockChangeLatencyHidingC);
4455 } else {
4456 ActiveClockChangeLatencyHiding = ActiveClockChangeLatencyHidingY;
4457 }
4458
4459 ActiveDRAMClockChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4460 - v->Watermark.DRAMClockChangeWatermark;
4461 ActiveFCLKChangeLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.UrgentWatermark
4462 - v->Watermark.FCLKChangeWatermark;
4463 USRRetrainingLatencyMargin[k] = ActiveClockChangeLatencyHiding - v->Watermark.USRRetrainingWatermark;
4464
4465 if (v->WritebackEnable[k]) {
4466 WritebackLatencyHiding = v->WritebackInterfaceBufferSize * 1024
4467 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
4468 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
4469 if (v->WritebackPixelFormat[k] == dm_444_64)
4470 WritebackLatencyHiding = WritebackLatencyHiding / 2;
4471
4472 WritebackDRAMClockChangeLatencyMargin = WritebackLatencyHiding
4473 - v->Watermark.WritebackDRAMClockChangeWatermark;
4474
4475 WritebackFCLKChangeLatencyMargin = WritebackLatencyHiding
4476 - v->Watermark.WritebackFCLKChangeWatermark;
4477
4478 ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMargin[k],
4479 WritebackFCLKChangeLatencyMargin);
4480 ActiveFCLKChangeLatencyMargin[k] = dml_min(ActiveFCLKChangeLatencyMargin[k],
4481 WritebackDRAMClockChangeLatencyMargin);
4482 }
4483 MaxActiveDRAMClockChangeLatencySupported[k] =
4484 (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_phantom_pipe) ?
4485 0 :
4486 (ActiveDRAMClockChangeLatencyMargin[k]
4487 + mmSOCParameters.DRAMClockChangeLatency);
4488 }
4489
4490 for (i = 0; i < v->NumberOfActiveSurfaces; ++i) {
4491 for (j = 0; j < v->NumberOfActiveSurfaces; ++j) {
4492 if (i == j ||
4493 (v->BlendingAndTiming[i] == i && v->BlendingAndTiming[j] == i) ||
4494 (v->BlendingAndTiming[j] == j && v->BlendingAndTiming[i] == j) ||
4495 (v->BlendingAndTiming[i] == v->BlendingAndTiming[j] && v->BlendingAndTiming[i] != i) ||
4496 (v->SynchronizeTimingsFinal && v->PixelClock[i] == v->PixelClock[j] &&
4497 v->HTotal[i] == v->HTotal[j] && v->VTotal[i] == v->VTotal[j] &&
4498 v->VActive[i] == v->VActive[j]) || (v->SynchronizeDRRDisplaysForUCLKPStateChangeFinal &&
4499 (v->DRRDisplay[i] || v->DRRDisplay[j]))) {
4500 SynchronizedSurfaces[i][j] = true;
4501 } else {
4502 SynchronizedSurfaces[i][j] = false;
4503 }
4504 }
4505 }
4506
4507 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4508 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4509 (!FoundFirstSurfaceWithMinActiveFCLKChangeMargin ||
4510 ActiveFCLKChangeLatencyMargin[k] < MinActiveFCLKChangeMargin)) {
4511 FoundFirstSurfaceWithMinActiveFCLKChangeMargin = true;
4512 MinActiveFCLKChangeMargin = ActiveFCLKChangeLatencyMargin[k];
4513 SurfaceWithMinActiveFCLKChangeMargin = k;
4514 }
4515 }
4516
4517 *MinActiveFCLKChangeLatencySupported = MinActiveFCLKChangeMargin + mmSOCParameters.FCLKChangeLatency;
4518
4519 SameTimingForFCLKChange = true;
4520 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4521 if (!SynchronizedSurfaces[k][SurfaceWithMinActiveFCLKChangeMargin]) {
4522 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4523 (SameTimingForFCLKChange ||
4524 ActiveFCLKChangeLatencyMargin[k] <
4525 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank)) {
4526 SecondMinActiveFCLKChangeMarginOneDisplayInVBLank = ActiveFCLKChangeLatencyMargin[k];
4527 }
4528 SameTimingForFCLKChange = false;
4529 }
4530 }
4531
4532 if (MinActiveFCLKChangeMargin > 0) {
4533 *FCLKChangeSupport = dm_fclock_change_vactive;
4534 } else if ((SameTimingForFCLKChange || SecondMinActiveFCLKChangeMarginOneDisplayInVBLank > 0) &&
4535 (PrefetchMode <= 1)) {
4536 *FCLKChangeSupport = dm_fclock_change_vblank;
4537 } else {
4538 *FCLKChangeSupport = dm_fclock_change_unsupported;
4539 }
4540
4541 *USRRetrainingSupport = true;
4542 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4543 if ((v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) &&
4544 (USRRetrainingLatencyMargin[k] < 0)) {
4545 *USRRetrainingSupport = false;
4546 }
4547 }
4548
4549 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4550 if (v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_full_frame &&
4551 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_sub_viewport &&
4552 v->UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe &&
4553 ActiveDRAMClockChangeLatencyMargin[k] < 0) {
4554 if (PrefetchMode > 0) {
4555 DRAMClockChangeSupportNumber = 2;
4556 } else if (DRAMClockChangeSupportNumber == 0) {
4557 DRAMClockChangeSupportNumber = 1;
4558 LastSurfaceWithoutMargin = k;
4559 } else if (DRAMClockChangeSupportNumber == 1 &&
4560 !SynchronizedSurfaces[LastSurfaceWithoutMargin][k]) {
4561 DRAMClockChangeSupportNumber = 2;
4562 }
4563 }
4564 }
4565
4566 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4567 if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_full_frame)
4568 DRAMClockChangeMethod = 1;
4569 else if (v->UsesMALLForPStateChange[k] == dm_use_mall_pstate_change_sub_viewport)
4570 DRAMClockChangeMethod = 2;
4571 }
4572
4573 if (DRAMClockChangeMethod == 0) {
4574 if (DRAMClockChangeSupportNumber == 0)
4575 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
4576 else if (DRAMClockChangeSupportNumber == 1)
4577 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
4578 else
4579 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4580 } else if (DRAMClockChangeMethod == 1) {
4581 if (DRAMClockChangeSupportNumber == 0)
4582 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_full_frame;
4583 else if (DRAMClockChangeSupportNumber == 1)
4584 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_full_frame;
4585 else
4586 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4587 } else {
4588 if (DRAMClockChangeSupportNumber == 0)
4589 *DRAMClockChangeSupport = dm_dram_clock_change_vactive_w_mall_sub_vp;
4590 else if (DRAMClockChangeSupportNumber == 1)
4591 *DRAMClockChangeSupport = dm_dram_clock_change_vblank_w_mall_sub_vp;
4592 else
4593 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
4594 }
4595
4596 for (k = 0; k < v->NumberOfActiveSurfaces; ++k) {
4597 unsigned int dst_y_pstate;
4598 unsigned int src_y_pstate_l;
4599 unsigned int src_y_pstate_c;
4600 unsigned int src_y_ahead_l, src_y_ahead_c, sub_vp_lines_l, sub_vp_lines_c;
4601
4602 dst_y_pstate = dml_ceil((mmSOCParameters.DRAMClockChangeLatency + mmSOCParameters.UrgentLatency) / (v->HTotal[k] / v->PixelClock[k]), 1);
4603 src_y_pstate_l = dml_ceil(dst_y_pstate * v->VRatio[k], SwathHeightY[k]);
4604 src_y_ahead_l = dml_floor(DETBufferSizeY[k] / BytePerPixelDETY[k] / SwathWidthY[k], SwathHeightY[k]) + LBLatencyHidingSourceLinesY[k];
4605 sub_vp_lines_l = src_y_pstate_l + src_y_ahead_l + v->meta_row_height[k];
4606
4607 #ifdef __DML_VBA_DEBUG__
4608 dml_print("DML::%s: k=%d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
4609 dml_print("DML::%s: k=%d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
4610 dml_print("DML::%s: k=%d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
4611 dml_print("DML::%s: k=%d, SwathHeightY = %d\n", __func__, k, SwathHeightY[k]);
4612 dml_print("DML::%s: k=%d, LBLatencyHidingSourceLinesY = %d\n", __func__, k, LBLatencyHidingSourceLinesY[k]);
4613 dml_print("DML::%s: k=%d, dst_y_pstate = %d\n", __func__, k, dst_y_pstate);
4614 dml_print("DML::%s: k=%d, src_y_pstate_l = %d\n", __func__, k, src_y_pstate_l);
4615 dml_print("DML::%s: k=%d, src_y_ahead_l = %d\n", __func__, k, src_y_ahead_l);
4616 dml_print("DML::%s: k=%d, v->meta_row_height = %d\n", __func__, k, v->meta_row_height[k]);
4617 dml_print("DML::%s: k=%d, sub_vp_lines_l = %d\n", __func__, k, sub_vp_lines_l);
4618 #endif
4619 SubViewportLinesNeededInMALL[k] = sub_vp_lines_l;
4620
4621 if (BytePerPixelDETC[k] > 0) {
4622 src_y_pstate_c = dml_ceil(dst_y_pstate * v->VRatioChroma[k], SwathHeightC[k]);
4623 src_y_ahead_c = dml_floor(DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k], SwathHeightC[k]) + LBLatencyHidingSourceLinesC[k];
4624 sub_vp_lines_c = src_y_pstate_c + src_y_ahead_c + v->meta_row_height_chroma[k];
4625 SubViewportLinesNeededInMALL[k] = dml_max(sub_vp_lines_l, sub_vp_lines_c);
4626
4627 #ifdef __DML_VBA_DEBUG__
4628 dml_print("DML::%s: k=%d, src_y_pstate_c = %d\n", __func__, k, src_y_pstate_c);
4629 dml_print("DML::%s: k=%d, src_y_ahead_c = %d\n", __func__, k, src_y_ahead_c);
4630 dml_print("DML::%s: k=%d, v->meta_row_height_chroma = %d\n", __func__, k, v->meta_row_height_chroma[k]);
4631 dml_print("DML::%s: k=%d, sub_vp_lines_c = %d\n", __func__, k, sub_vp_lines_c);
4632 #endif
4633 }
4634 }
4635 #ifdef __DML_VBA_DEBUG__
4636 dml_print("DML::%s: DRAMClockChangeSupport = %d\n", __func__, *DRAMClockChangeSupport);
4637 dml_print("DML::%s: FCLKChangeSupport = %d\n", __func__, *FCLKChangeSupport);
4638 dml_print("DML::%s: MinActiveFCLKChangeLatencySupported = %f\n",
4639 __func__, *MinActiveFCLKChangeLatencySupported);
4640 dml_print("DML::%s: USRRetrainingSupport = %d\n", __func__, *USRRetrainingSupport);
4641 #endif
4642 } // CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport
4643
dml32_CalculateWriteBackDISPCLK(enum source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize,double DISPCLKDPPCLKVCOSpeed)4644 double dml32_CalculateWriteBackDISPCLK(
4645 enum source_format_class WritebackPixelFormat,
4646 double PixelClock,
4647 double WritebackHRatio,
4648 double WritebackVRatio,
4649 unsigned int WritebackHTaps,
4650 unsigned int WritebackVTaps,
4651 unsigned int WritebackSourceWidth,
4652 unsigned int WritebackDestinationWidth,
4653 unsigned int HTotal,
4654 unsigned int WritebackLineBufferSize,
4655 double DISPCLKDPPCLKVCOSpeed)
4656 {
4657 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4658
4659 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
4660 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
4661 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth *
4662 WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
4663 return dml32_RoundToDFSGranularity(dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB), 1, DISPCLKDPPCLKVCOSpeed);
4664 }
4665
dml32_CalculateMinAndMaxPrefetchMode(enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,unsigned int * MinPrefetchMode,unsigned int * MaxPrefetchMode)4666 void dml32_CalculateMinAndMaxPrefetchMode(
4667 enum dm_prefetch_modes AllowForPStateChangeOrStutterInVBlankFinal,
4668 unsigned int *MinPrefetchMode,
4669 unsigned int *MaxPrefetchMode)
4670 {
4671 if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_none) {
4672 *MinPrefetchMode = 3;
4673 *MaxPrefetchMode = 3;
4674 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_stutter) {
4675 *MinPrefetchMode = 2;
4676 *MaxPrefetchMode = 2;
4677 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_fclk_and_stutter) {
4678 *MinPrefetchMode = 1;
4679 *MaxPrefetchMode = 1;
4680 } else if (AllowForPStateChangeOrStutterInVBlankFinal == dm_prefetch_support_uclk_fclk_and_stutter) {
4681 *MinPrefetchMode = 0;
4682 *MaxPrefetchMode = 0;
4683 } else {
4684 *MinPrefetchMode = 0;
4685 *MaxPrefetchMode = 3;
4686 }
4687 } // CalculateMinAndMaxPrefetchMode
4688
dml32_CalculatePixelDeliveryTimes(unsigned int NumberOfActiveSurfaces,double VRatio[],double VRatioChroma[],double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],unsigned int DPPPerSurface[],double HRatio[],double HRatioChroma[],double PixelClock[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int NumberOfCursors[],unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesC[],unsigned int BlockHeight256BytesC[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[],double CursorRequestDeliveryTime[],double CursorRequestDeliveryTimePrefetch[])4689 void dml32_CalculatePixelDeliveryTimes(
4690 unsigned int NumberOfActiveSurfaces,
4691 double VRatio[],
4692 double VRatioChroma[],
4693 double VRatioPrefetchY[],
4694 double VRatioPrefetchC[],
4695 unsigned int swath_width_luma_ub[],
4696 unsigned int swath_width_chroma_ub[],
4697 unsigned int DPPPerSurface[],
4698 double HRatio[],
4699 double HRatioChroma[],
4700 double PixelClock[],
4701 double PSCL_THROUGHPUT[],
4702 double PSCL_THROUGHPUT_CHROMA[],
4703 double Dppclk[],
4704 unsigned int BytePerPixelC[],
4705 enum dm_rotation_angle SourceRotation[],
4706 unsigned int NumberOfCursors[],
4707 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
4708 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
4709 unsigned int BlockWidth256BytesY[],
4710 unsigned int BlockHeight256BytesY[],
4711 unsigned int BlockWidth256BytesC[],
4712 unsigned int BlockHeight256BytesC[],
4713
4714 /* Output */
4715 double DisplayPipeLineDeliveryTimeLuma[],
4716 double DisplayPipeLineDeliveryTimeChroma[],
4717 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
4718 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
4719 double DisplayPipeRequestDeliveryTimeLuma[],
4720 double DisplayPipeRequestDeliveryTimeChroma[],
4721 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
4722 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
4723 double CursorRequestDeliveryTime[],
4724 double CursorRequestDeliveryTimePrefetch[])
4725 {
4726 double req_per_swath_ub;
4727 unsigned int k;
4728
4729 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4730
4731 #ifdef __DML_VBA_DEBUG__
4732 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
4733 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
4734 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
4735 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
4736 dml_print("DML::%s: k=%d : swath_width_luma_ub = %d\n", __func__, k, swath_width_luma_ub[k]);
4737 dml_print("DML::%s: k=%d : swath_width_chroma_ub = %d\n", __func__, k, swath_width_chroma_ub[k]);
4738 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
4739 dml_print("DML::%s: k=%d : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
4740 dml_print("DML::%s: k=%d : DPPPerSurface = %d\n", __func__, k, DPPPerSurface[k]);
4741 dml_print("DML::%s: k=%d : PixelClock = %f\n", __func__, k, PixelClock[k]);
4742 dml_print("DML::%s: k=%d : Dppclk = %f\n", __func__, k, Dppclk[k]);
4743 #endif
4744
4745 if (VRatio[k] <= 1) {
4746 DisplayPipeLineDeliveryTimeLuma[k] =
4747 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4748 } else {
4749 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4750 }
4751
4752 if (BytePerPixelC[k] == 0) {
4753 DisplayPipeLineDeliveryTimeChroma[k] = 0;
4754 } else {
4755 if (VRatioChroma[k] <= 1) {
4756 DisplayPipeLineDeliveryTimeChroma[k] =
4757 swath_width_chroma_ub[k] * DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4758 } else {
4759 DisplayPipeLineDeliveryTimeChroma[k] =
4760 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4761 }
4762 }
4763
4764 if (VRatioPrefetchY[k] <= 1) {
4765 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4766 swath_width_luma_ub[k] * DPPPerSurface[k] / HRatio[k] / PixelClock[k];
4767 } else {
4768 DisplayPipeLineDeliveryTimeLumaPrefetch[k] =
4769 swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
4770 }
4771
4772 if (BytePerPixelC[k] == 0) {
4773 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
4774 } else {
4775 if (VRatioPrefetchC[k] <= 1) {
4776 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] *
4777 DPPPerSurface[k] / HRatioChroma[k] / PixelClock[k];
4778 } else {
4779 DisplayPipeLineDeliveryTimeChromaPrefetch[k] =
4780 swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
4781 }
4782 }
4783 #ifdef __DML_VBA_DEBUG__
4784 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n",
4785 __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
4786 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n",
4787 __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
4788 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n",
4789 __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
4790 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n",
4791 __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
4792 #endif
4793 }
4794
4795 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4796 if (!IsVertical(SourceRotation[k]))
4797 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
4798 else
4799 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
4800 #ifdef __DML_VBA_DEBUG__
4801 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Luma)\n", __func__, k, req_per_swath_ub);
4802 #endif
4803
4804 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
4805 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] =
4806 DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
4807 if (BytePerPixelC[k] == 0) {
4808 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
4809 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
4810 } else {
4811 if (!IsVertical(SourceRotation[k]))
4812 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
4813 else
4814 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
4815 #ifdef __DML_VBA_DEBUG__
4816 dml_print("DML::%s: k=%d : req_per_swath_ub = %f (Chroma)\n", __func__, k, req_per_swath_ub);
4817 #endif
4818 DisplayPipeRequestDeliveryTimeChroma[k] =
4819 DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
4820 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] =
4821 DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
4822 }
4823 #ifdef __DML_VBA_DEBUG__
4824 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n",
4825 __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
4826 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n",
4827 __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
4828 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n",
4829 __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
4830 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n",
4831 __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
4832 #endif
4833 }
4834
4835 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4836 unsigned int cursor_req_per_width;
4837
4838 cursor_req_per_width = dml_ceil((double) CursorWidth[k][0] * (double) CursorBPP[k][0] /
4839 256.0 / 8.0, 1.0);
4840 if (NumberOfCursors[k] > 0) {
4841 if (VRatio[k] <= 1) {
4842 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4843 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4844 } else {
4845 CursorRequestDeliveryTime[k] = (double) CursorWidth[k][0] /
4846 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4847 }
4848 if (VRatioPrefetchY[k] <= 1) {
4849 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4850 HRatio[k] / PixelClock[k] / cursor_req_per_width;
4851 } else {
4852 CursorRequestDeliveryTimePrefetch[k] = (double) CursorWidth[k][0] /
4853 PSCL_THROUGHPUT[k] / Dppclk[k] / cursor_req_per_width;
4854 }
4855 } else {
4856 CursorRequestDeliveryTime[k] = 0;
4857 CursorRequestDeliveryTimePrefetch[k] = 0;
4858 }
4859 #ifdef __DML_VBA_DEBUG__
4860 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n",
4861 __func__, k, NumberOfCursors[k]);
4862 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n",
4863 __func__, k, CursorRequestDeliveryTime[k]);
4864 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n",
4865 __func__, k, CursorRequestDeliveryTimePrefetch[k]);
4866 #endif
4867 }
4868 } // CalculatePixelDeliveryTimes
4869
dml32_CalculateMetaAndPTETimes(bool use_one_row_for_frame[],unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int MetaChunkSize,unsigned int MinMetaChunkSizeBytes,unsigned int HTotal[],double VRatio[],double VRatioChroma[],double DestinationLinesToRequestRowInVBlank[],double DestinationLinesToRequestRowInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],enum dm_rotation_angle SourceRotation[],unsigned int dpte_row_height[],unsigned int dpte_row_height_chroma[],unsigned int meta_row_width[],unsigned int meta_row_width_chroma[],unsigned int meta_row_height[],unsigned int meta_row_height_chroma[],unsigned int meta_req_width[],unsigned int meta_req_width_chroma[],unsigned int meta_req_height[],unsigned int meta_req_height_chroma[],unsigned int dpte_group_bytes[],unsigned int PTERequestSizeY[],unsigned int PTERequestSizeC[],unsigned int PixelPTEReqWidthY[],unsigned int PixelPTEReqHeightY[],unsigned int PixelPTEReqWidthC[],unsigned int PixelPTEReqHeightC[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],double DST_Y_PER_PTE_ROW_NOM_L[],double DST_Y_PER_PTE_ROW_NOM_C[],double DST_Y_PER_META_ROW_NOM_L[],double DST_Y_PER_META_ROW_NOM_C[],double TimePerMetaChunkNominal[],double TimePerChromaMetaChunkNominal[],double TimePerMetaChunkVBlank[],double TimePerChromaMetaChunkVBlank[],double TimePerMetaChunkFlip[],double TimePerChromaMetaChunkFlip[],double time_per_pte_group_nom_luma[],double time_per_pte_group_vblank_luma[],double time_per_pte_group_flip_luma[],double time_per_pte_group_nom_chroma[],double time_per_pte_group_vblank_chroma[],double time_per_pte_group_flip_chroma[])4870 void dml32_CalculateMetaAndPTETimes(
4871 bool use_one_row_for_frame[],
4872 unsigned int NumberOfActiveSurfaces,
4873 bool GPUVMEnable,
4874 unsigned int MetaChunkSize,
4875 unsigned int MinMetaChunkSizeBytes,
4876 unsigned int HTotal[],
4877 double VRatio[],
4878 double VRatioChroma[],
4879 double DestinationLinesToRequestRowInVBlank[],
4880 double DestinationLinesToRequestRowInImmediateFlip[],
4881 bool DCCEnable[],
4882 double PixelClock[],
4883 unsigned int BytePerPixelY[],
4884 unsigned int BytePerPixelC[],
4885 enum dm_rotation_angle SourceRotation[],
4886 unsigned int dpte_row_height[],
4887 unsigned int dpte_row_height_chroma[],
4888 unsigned int meta_row_width[],
4889 unsigned int meta_row_width_chroma[],
4890 unsigned int meta_row_height[],
4891 unsigned int meta_row_height_chroma[],
4892 unsigned int meta_req_width[],
4893 unsigned int meta_req_width_chroma[],
4894 unsigned int meta_req_height[],
4895 unsigned int meta_req_height_chroma[],
4896 unsigned int dpte_group_bytes[],
4897 unsigned int PTERequestSizeY[],
4898 unsigned int PTERequestSizeC[],
4899 unsigned int PixelPTEReqWidthY[],
4900 unsigned int PixelPTEReqHeightY[],
4901 unsigned int PixelPTEReqWidthC[],
4902 unsigned int PixelPTEReqHeightC[],
4903 unsigned int dpte_row_width_luma_ub[],
4904 unsigned int dpte_row_width_chroma_ub[],
4905
4906 /* Output */
4907 double DST_Y_PER_PTE_ROW_NOM_L[],
4908 double DST_Y_PER_PTE_ROW_NOM_C[],
4909 double DST_Y_PER_META_ROW_NOM_L[],
4910 double DST_Y_PER_META_ROW_NOM_C[],
4911 double TimePerMetaChunkNominal[],
4912 double TimePerChromaMetaChunkNominal[],
4913 double TimePerMetaChunkVBlank[],
4914 double TimePerChromaMetaChunkVBlank[],
4915 double TimePerMetaChunkFlip[],
4916 double TimePerChromaMetaChunkFlip[],
4917 double time_per_pte_group_nom_luma[],
4918 double time_per_pte_group_vblank_luma[],
4919 double time_per_pte_group_flip_luma[],
4920 double time_per_pte_group_nom_chroma[],
4921 double time_per_pte_group_vblank_chroma[],
4922 double time_per_pte_group_flip_chroma[])
4923 {
4924 unsigned int meta_chunk_width;
4925 unsigned int min_meta_chunk_width;
4926 unsigned int meta_chunk_per_row_int;
4927 unsigned int meta_row_remainder;
4928 unsigned int meta_chunk_threshold;
4929 unsigned int meta_chunks_per_row_ub;
4930 unsigned int meta_chunk_width_chroma;
4931 unsigned int min_meta_chunk_width_chroma;
4932 unsigned int meta_chunk_per_row_int_chroma;
4933 unsigned int meta_row_remainder_chroma;
4934 unsigned int meta_chunk_threshold_chroma;
4935 unsigned int meta_chunks_per_row_ub_chroma;
4936 unsigned int dpte_group_width_luma;
4937 unsigned int dpte_groups_per_row_luma_ub;
4938 unsigned int dpte_group_width_chroma;
4939 unsigned int dpte_groups_per_row_chroma_ub;
4940 unsigned int k;
4941
4942 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4943 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
4944 if (BytePerPixelC[k] == 0)
4945 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
4946 else
4947 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
4948 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
4949 if (BytePerPixelC[k] == 0)
4950 DST_Y_PER_META_ROW_NOM_C[k] = 0;
4951 else
4952 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
4953 }
4954
4955 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
4956 if (DCCEnable[k] == true) {
4957 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
4958 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
4959 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
4960 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
4961 if (!IsVertical(SourceRotation[k]))
4962 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
4963 else
4964 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
4965
4966 if (meta_row_remainder <= meta_chunk_threshold)
4967 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
4968 else
4969 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
4970
4971 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] *
4972 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4973 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
4974 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4975 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
4976 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
4977 if (BytePerPixelC[k] == 0) {
4978 TimePerChromaMetaChunkNominal[k] = 0;
4979 TimePerChromaMetaChunkVBlank[k] = 0;
4980 TimePerChromaMetaChunkFlip[k] = 0;
4981 } else {
4982 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] /
4983 meta_row_height_chroma[k];
4984 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] /
4985 meta_row_height_chroma[k];
4986 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] /
4987 meta_chunk_width_chroma;
4988 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
4989 if (!IsVertical(SourceRotation[k])) {
4990 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4991 meta_req_width_chroma[k];
4992 } else {
4993 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma -
4994 meta_req_height_chroma[k];
4995 }
4996 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma)
4997 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
4998 else
4999 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
5000
5001 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] *
5002 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5003 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] *
5004 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5005 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5006 HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
5007 }
5008 } else {
5009 TimePerMetaChunkNominal[k] = 0;
5010 TimePerMetaChunkVBlank[k] = 0;
5011 TimePerMetaChunkFlip[k] = 0;
5012 TimePerChromaMetaChunkNominal[k] = 0;
5013 TimePerChromaMetaChunkVBlank[k] = 0;
5014 TimePerChromaMetaChunkFlip[k] = 0;
5015 }
5016 }
5017
5018 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5019 if (GPUVMEnable == true) {
5020 if (!IsVertical(SourceRotation[k])) {
5021 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5022 (double) PTERequestSizeY[k] * PixelPTEReqWidthY[k];
5023 } else {
5024 dpte_group_width_luma = (double) dpte_group_bytes[k] /
5025 (double) PTERequestSizeY[k] * PixelPTEReqHeightY[k];
5026 }
5027
5028 if (use_one_row_for_frame[k]) {
5029 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5030 (double) dpte_group_width_luma / 2.0, 1.0);
5031 } else {
5032 dpte_groups_per_row_luma_ub = dml_ceil((double) dpte_row_width_luma_ub[k] /
5033 (double) dpte_group_width_luma, 1.0);
5034 }
5035 #ifdef __DML_VBA_DEBUG__
5036 dml_print("DML::%s: k=%0d, use_one_row_for_frame = %d\n",
5037 __func__, k, use_one_row_for_frame[k]);
5038 dml_print("DML::%s: k=%0d, dpte_group_bytes = %d\n",
5039 __func__, k, dpte_group_bytes[k]);
5040 dml_print("DML::%s: k=%0d, PTERequestSizeY = %d\n",
5041 __func__, k, PTERequestSizeY[k]);
5042 dml_print("DML::%s: k=%0d, PixelPTEReqWidthY = %d\n",
5043 __func__, k, PixelPTEReqWidthY[k]);
5044 dml_print("DML::%s: k=%0d, PixelPTEReqHeightY = %d\n",
5045 __func__, k, PixelPTEReqHeightY[k]);
5046 dml_print("DML::%s: k=%0d, dpte_row_width_luma_ub = %d\n",
5047 __func__, k, dpte_row_width_luma_ub[k]);
5048 dml_print("DML::%s: k=%0d, dpte_group_width_luma = %d\n",
5049 __func__, k, dpte_group_width_luma);
5050 dml_print("DML::%s: k=%0d, dpte_groups_per_row_luma_ub = %d\n",
5051 __func__, k, dpte_groups_per_row_luma_ub);
5052 #endif
5053
5054 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] *
5055 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5056 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] *
5057 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5058 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5059 HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
5060 if (BytePerPixelC[k] == 0) {
5061 time_per_pte_group_nom_chroma[k] = 0;
5062 time_per_pte_group_vblank_chroma[k] = 0;
5063 time_per_pte_group_flip_chroma[k] = 0;
5064 } else {
5065 if (!IsVertical(SourceRotation[k])) {
5066 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5067 (double) PTERequestSizeC[k] * PixelPTEReqWidthC[k];
5068 } else {
5069 dpte_group_width_chroma = (double) dpte_group_bytes[k] /
5070 (double) PTERequestSizeC[k] * PixelPTEReqHeightC[k];
5071 }
5072
5073 if (use_one_row_for_frame[k]) {
5074 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5075 (double) dpte_group_width_chroma / 2.0, 1.0);
5076 } else {
5077 dpte_groups_per_row_chroma_ub = dml_ceil((double) dpte_row_width_chroma_ub[k] /
5078 (double) dpte_group_width_chroma, 1.0);
5079 }
5080 #ifdef __DML_VBA_DEBUG__
5081 dml_print("DML::%s: k=%0d, dpte_row_width_chroma_ub = %d\n",
5082 __func__, k, dpte_row_width_chroma_ub[k]);
5083 dml_print("DML::%s: k=%0d, dpte_group_width_chroma = %d\n",
5084 __func__, k, dpte_group_width_chroma);
5085 dml_print("DML::%s: k=%0d, dpte_groups_per_row_chroma_ub = %d\n",
5086 __func__, k, dpte_groups_per_row_chroma_ub);
5087 #endif
5088 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] *
5089 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5090 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] *
5091 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5092 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] *
5093 HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
5094 }
5095 } else {
5096 time_per_pte_group_nom_luma[k] = 0;
5097 time_per_pte_group_vblank_luma[k] = 0;
5098 time_per_pte_group_flip_luma[k] = 0;
5099 time_per_pte_group_nom_chroma[k] = 0;
5100 time_per_pte_group_vblank_chroma[k] = 0;
5101 time_per_pte_group_flip_chroma[k] = 0;
5102 }
5103 #ifdef __DML_VBA_DEBUG__
5104 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInVBlank = %f\n",
5105 __func__, k, DestinationLinesToRequestRowInVBlank[k]);
5106 dml_print("DML::%s: k=%0d, DestinationLinesToRequestRowInImmediateFlip = %f\n",
5107 __func__, k, DestinationLinesToRequestRowInImmediateFlip[k]);
5108 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_L = %f\n",
5109 __func__, k, DST_Y_PER_PTE_ROW_NOM_L[k]);
5110 dml_print("DML::%s: k=%0d, DST_Y_PER_PTE_ROW_NOM_C = %f\n",
5111 __func__, k, DST_Y_PER_PTE_ROW_NOM_C[k]);
5112 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_L = %f\n",
5113 __func__, k, DST_Y_PER_META_ROW_NOM_L[k]);
5114 dml_print("DML::%s: k=%0d, DST_Y_PER_META_ROW_NOM_C = %f\n",
5115 __func__, k, DST_Y_PER_META_ROW_NOM_C[k]);
5116 dml_print("DML::%s: k=%0d, TimePerMetaChunkNominal = %f\n",
5117 __func__, k, TimePerMetaChunkNominal[k]);
5118 dml_print("DML::%s: k=%0d, TimePerMetaChunkVBlank = %f\n",
5119 __func__, k, TimePerMetaChunkVBlank[k]);
5120 dml_print("DML::%s: k=%0d, TimePerMetaChunkFlip = %f\n",
5121 __func__, k, TimePerMetaChunkFlip[k]);
5122 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkNominal = %f\n",
5123 __func__, k, TimePerChromaMetaChunkNominal[k]);
5124 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkVBlank = %f\n",
5125 __func__, k, TimePerChromaMetaChunkVBlank[k]);
5126 dml_print("DML::%s: k=%0d, TimePerChromaMetaChunkFlip = %f\n",
5127 __func__, k, TimePerChromaMetaChunkFlip[k]);
5128 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_luma = %f\n",
5129 __func__, k, time_per_pte_group_nom_luma[k]);
5130 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_luma = %f\n",
5131 __func__, k, time_per_pte_group_vblank_luma[k]);
5132 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_luma = %f\n",
5133 __func__, k, time_per_pte_group_flip_luma[k]);
5134 dml_print("DML::%s: k=%0d, time_per_pte_group_nom_chroma = %f\n",
5135 __func__, k, time_per_pte_group_nom_chroma[k]);
5136 dml_print("DML::%s: k=%0d, time_per_pte_group_vblank_chroma = %f\n",
5137 __func__, k, time_per_pte_group_vblank_chroma[k]);
5138 dml_print("DML::%s: k=%0d, time_per_pte_group_flip_chroma = %f\n",
5139 __func__, k, time_per_pte_group_flip_chroma[k]);
5140 #endif
5141 }
5142 } // CalculateMetaAndPTETimes
5143
dml32_CalculateVMGroupAndRequestTimes(unsigned int NumberOfActiveSurfaces,bool GPUVMEnable,unsigned int GPUVMMaxPageTableLevels,unsigned int HTotal[],unsigned int BytePerPixelC[],double DestinationLinesToRequestVMInVBlank[],double DestinationLinesToRequestVMInImmediateFlip[],bool DCCEnable[],double PixelClock[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])5144 void dml32_CalculateVMGroupAndRequestTimes(
5145 unsigned int NumberOfActiveSurfaces,
5146 bool GPUVMEnable,
5147 unsigned int GPUVMMaxPageTableLevels,
5148 unsigned int HTotal[],
5149 unsigned int BytePerPixelC[],
5150 double DestinationLinesToRequestVMInVBlank[],
5151 double DestinationLinesToRequestVMInImmediateFlip[],
5152 bool DCCEnable[],
5153 double PixelClock[],
5154 unsigned int dpte_row_width_luma_ub[],
5155 unsigned int dpte_row_width_chroma_ub[],
5156 unsigned int vm_group_bytes[],
5157 unsigned int dpde0_bytes_per_frame_ub_l[],
5158 unsigned int dpde0_bytes_per_frame_ub_c[],
5159 unsigned int meta_pte_bytes_per_frame_ub_l[],
5160 unsigned int meta_pte_bytes_per_frame_ub_c[],
5161
5162 /* Output */
5163 double TimePerVMGroupVBlank[],
5164 double TimePerVMGroupFlip[],
5165 double TimePerVMRequestVBlank[],
5166 double TimePerVMRequestFlip[])
5167 {
5168 unsigned int k;
5169 unsigned int num_group_per_lower_vm_stage;
5170 unsigned int num_req_per_lower_vm_stage;
5171
5172 #ifdef __DML_VBA_DEBUG__
5173 dml_print("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, NumberOfActiveSurfaces);
5174 dml_print("DML::%s: GPUVMEnable = %d\n", __func__, GPUVMEnable);
5175 #endif
5176 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5177
5178 #ifdef __DML_VBA_DEBUG__
5179 dml_print("DML::%s: k=%0d, DCCEnable = %d\n", __func__, k, DCCEnable[k]);
5180 dml_print("DML::%s: k=%0d, vm_group_bytes = %d\n", __func__, k, vm_group_bytes[k]);
5181 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_l = %d\n",
5182 __func__, k, dpde0_bytes_per_frame_ub_l[k]);
5183 dml_print("DML::%s: k=%0d, dpde0_bytes_per_frame_ub_c = %d\n",
5184 __func__, k, dpde0_bytes_per_frame_ub_c[k]);
5185 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_l = %d\n",
5186 __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
5187 dml_print("DML::%s: k=%0d, meta_pte_bytes_per_frame_ub_c = %d\n",
5188 __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
5189 #endif
5190
5191 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
5192 if (DCCEnable[k] == false) {
5193 if (BytePerPixelC[k] > 0) {
5194 num_group_per_lower_vm_stage = dml_ceil(
5195 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5196 (double) (vm_group_bytes[k]), 1.0) +
5197 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5198 (double) (vm_group_bytes[k]), 1.0);
5199 } else {
5200 num_group_per_lower_vm_stage = dml_ceil(
5201 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5202 (double) (vm_group_bytes[k]), 1.0);
5203 }
5204 } else {
5205 if (GPUVMMaxPageTableLevels == 1) {
5206 if (BytePerPixelC[k] > 0) {
5207 num_group_per_lower_vm_stage = dml_ceil(
5208 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5209 (double) (vm_group_bytes[k]), 1.0) +
5210 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5211 (double) (vm_group_bytes[k]), 1.0);
5212 } else {
5213 num_group_per_lower_vm_stage = dml_ceil(
5214 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5215 (double) (vm_group_bytes[k]), 1.0);
5216 }
5217 } else {
5218 if (BytePerPixelC[k] > 0) {
5219 num_group_per_lower_vm_stage = 2 + dml_ceil(
5220 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5221 (double) (vm_group_bytes[k]), 1) +
5222 dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) /
5223 (double) (vm_group_bytes[k]), 1) +
5224 dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) /
5225 (double) (vm_group_bytes[k]), 1) +
5226 dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) /
5227 (double) (vm_group_bytes[k]), 1);
5228 } else {
5229 num_group_per_lower_vm_stage = 1 + dml_ceil(
5230 (double) (dpde0_bytes_per_frame_ub_l[k]) /
5231 (double) (vm_group_bytes[k]), 1) + dml_ceil(
5232 (double) (meta_pte_bytes_per_frame_ub_l[k]) /
5233 (double) (vm_group_bytes[k]), 1);
5234 }
5235 }
5236 }
5237
5238 if (DCCEnable[k] == false) {
5239 if (BytePerPixelC[k] > 0) {
5240 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 +
5241 dpde0_bytes_per_frame_ub_c[k] / 64;
5242 } else {
5243 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
5244 }
5245 } else {
5246 if (GPUVMMaxPageTableLevels == 1) {
5247 if (BytePerPixelC[k] > 0) {
5248 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 +
5249 meta_pte_bytes_per_frame_ub_c[k] / 64;
5250 } else {
5251 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
5252 }
5253 } else {
5254 if (BytePerPixelC[k] > 0) {
5255 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5256 64 + dpde0_bytes_per_frame_ub_c[k] / 64 +
5257 meta_pte_bytes_per_frame_ub_l[k] / 64 +
5258 meta_pte_bytes_per_frame_ub_c[k] / 64;
5259 } else {
5260 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] /
5261 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
5262 }
5263 }
5264 }
5265
5266 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5267 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5268 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5269 HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
5270 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] *
5271 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5272 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] *
5273 HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
5274
5275 if (GPUVMMaxPageTableLevels > 2) {
5276 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
5277 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
5278 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
5279 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
5280 }
5281
5282 } else {
5283 TimePerVMGroupVBlank[k] = 0;
5284 TimePerVMGroupFlip[k] = 0;
5285 TimePerVMRequestVBlank[k] = 0;
5286 TimePerVMRequestFlip[k] = 0;
5287 }
5288
5289 #ifdef __DML_VBA_DEBUG__
5290 dml_print("DML::%s: k=%0d, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
5291 dml_print("DML::%s: k=%0d, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
5292 dml_print("DML::%s: k=%0d, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
5293 dml_print("DML::%s: k=%0d, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
5294 #endif
5295 }
5296 } // CalculateVMGroupAndRequestTimes
5297
dml32_CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dm_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dm_rotation_angle SourceRotation,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)5298 void dml32_CalculateDCCConfiguration(
5299 bool DCCEnabled,
5300 bool DCCProgrammingAssumesScanDirectionUnknown,
5301 enum source_format_class SourcePixelFormat,
5302 unsigned int SurfaceWidthLuma,
5303 unsigned int SurfaceWidthChroma,
5304 unsigned int SurfaceHeightLuma,
5305 unsigned int SurfaceHeightChroma,
5306 unsigned int nomDETInKByte,
5307 unsigned int RequestHeight256ByteLuma,
5308 unsigned int RequestHeight256ByteChroma,
5309 enum dm_swizzle_mode TilingFormat,
5310 unsigned int BytePerPixelY,
5311 unsigned int BytePerPixelC,
5312 double BytePerPixelDETY,
5313 double BytePerPixelDETC,
5314 enum dm_rotation_angle SourceRotation,
5315 /* Output */
5316 unsigned int *MaxUncompressedBlockLuma,
5317 unsigned int *MaxUncompressedBlockChroma,
5318 unsigned int *MaxCompressedBlockLuma,
5319 unsigned int *MaxCompressedBlockChroma,
5320 unsigned int *IndependentBlockLuma,
5321 unsigned int *IndependentBlockChroma)
5322 {
5323 typedef enum {
5324 REQ_256Bytes,
5325 REQ_128BytesNonContiguous,
5326 REQ_128BytesContiguous,
5327 REQ_NA
5328 } RequestType;
5329
5330 RequestType RequestLuma;
5331 RequestType RequestChroma;
5332
5333 unsigned int segment_order_horz_contiguous_luma;
5334 unsigned int segment_order_horz_contiguous_chroma;
5335 unsigned int segment_order_vert_contiguous_luma;
5336 unsigned int segment_order_vert_contiguous_chroma;
5337 unsigned int req128_horz_wc_l;
5338 unsigned int req128_horz_wc_c;
5339 unsigned int req128_vert_wc_l;
5340 unsigned int req128_vert_wc_c;
5341 unsigned int MAS_vp_horz_limit;
5342 unsigned int MAS_vp_vert_limit;
5343 unsigned int max_vp_horz_width;
5344 unsigned int max_vp_vert_height;
5345 unsigned int eff_surf_width_l;
5346 unsigned int eff_surf_width_c;
5347 unsigned int eff_surf_height_l;
5348 unsigned int eff_surf_height_c;
5349 unsigned int full_swath_bytes_horz_wc_l;
5350 unsigned int full_swath_bytes_horz_wc_c;
5351 unsigned int full_swath_bytes_vert_wc_l;
5352 unsigned int full_swath_bytes_vert_wc_c;
5353 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
5354
5355 unsigned int yuv420;
5356 unsigned int horz_div_l;
5357 unsigned int horz_div_c;
5358 unsigned int vert_div_l;
5359 unsigned int vert_div_c;
5360
5361 unsigned int swath_buf_size;
5362 double detile_buf_vp_horz_limit;
5363 double detile_buf_vp_vert_limit;
5364
5365 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 ||
5366 SourcePixelFormat == dm_420_12) ? 1 : 0);
5367 horz_div_l = 1;
5368 horz_div_c = 1;
5369 vert_div_l = 1;
5370 vert_div_c = 1;
5371
5372 if (BytePerPixelY == 1)
5373 vert_div_l = 0;
5374 if (BytePerPixelC == 1)
5375 vert_div_c = 0;
5376
5377 if (BytePerPixelC == 0) {
5378 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
5379 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5380 BytePerPixelY / (1 + horz_div_l));
5381 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5382 (1 + vert_div_l));
5383 } else {
5384 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
5385 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma *
5386 BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma *
5387 BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
5388 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma /
5389 (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma /
5390 (1 + vert_div_c) / (1 + yuv420));
5391 }
5392
5393 if (SourcePixelFormat == dm_420_10) {
5394 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
5395 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
5396 }
5397
5398 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
5399 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
5400
5401 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 6144;
5402 MAS_vp_vert_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
5403 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
5404 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
5405 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
5406 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
5407 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
5408 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
5409
5410 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
5411 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
5412 if (BytePerPixelC > 0) {
5413 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
5414 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
5415 } else {
5416 full_swath_bytes_horz_wc_c = 0;
5417 full_swath_bytes_vert_wc_c = 0;
5418 }
5419
5420 if (SourcePixelFormat == dm_420_10) {
5421 full_swath_bytes_horz_wc_l = dml_ceil((double) full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0);
5422 full_swath_bytes_horz_wc_c = dml_ceil((double) full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0);
5423 full_swath_bytes_vert_wc_l = dml_ceil((double) full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0);
5424 full_swath_bytes_vert_wc_c = dml_ceil((double) full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0);
5425 }
5426
5427 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5428 req128_horz_wc_l = 0;
5429 req128_horz_wc_c = 0;
5430 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l +
5431 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5432 req128_horz_wc_l = 0;
5433 req128_horz_wc_c = 1;
5434 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 *
5435 full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
5436 req128_horz_wc_l = 1;
5437 req128_horz_wc_c = 0;
5438 } else {
5439 req128_horz_wc_l = 1;
5440 req128_horz_wc_c = 1;
5441 }
5442
5443 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5444 req128_vert_wc_l = 0;
5445 req128_vert_wc_c = 0;
5446 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 *
5447 full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5448 req128_vert_wc_l = 0;
5449 req128_vert_wc_c = 1;
5450 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c &&
5451 full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
5452 req128_vert_wc_l = 1;
5453 req128_vert_wc_c = 0;
5454 } else {
5455 req128_vert_wc_l = 1;
5456 req128_vert_wc_c = 1;
5457 }
5458
5459 if (BytePerPixelY == 2) {
5460 segment_order_horz_contiguous_luma = 0;
5461 segment_order_vert_contiguous_luma = 1;
5462 } else {
5463 segment_order_horz_contiguous_luma = 1;
5464 segment_order_vert_contiguous_luma = 0;
5465 }
5466
5467 if (BytePerPixelC == 2) {
5468 segment_order_horz_contiguous_chroma = 0;
5469 segment_order_vert_contiguous_chroma = 1;
5470 } else {
5471 segment_order_horz_contiguous_chroma = 1;
5472 segment_order_vert_contiguous_chroma = 0;
5473 }
5474 #ifdef __DML_VBA_DEBUG__
5475 dml_print("DML::%s: DCCEnabled = %d\n", __func__, DCCEnabled);
5476 dml_print("DML::%s: nomDETInKByte = %d\n", __func__, nomDETInKByte);
5477 dml_print("DML::%s: DETBufferSizeForDCC = %d\n", __func__, DETBufferSizeForDCC);
5478 dml_print("DML::%s: req128_horz_wc_l = %d\n", __func__, req128_horz_wc_l);
5479 dml_print("DML::%s: req128_horz_wc_c = %d\n", __func__, req128_horz_wc_c);
5480 dml_print("DML::%s: full_swath_bytes_horz_wc_l = %d\n", __func__, full_swath_bytes_horz_wc_l);
5481 dml_print("DML::%s: full_swath_bytes_vert_wc_c = %d\n", __func__, full_swath_bytes_vert_wc_c);
5482 dml_print("DML::%s: segment_order_horz_contiguous_luma = %d\n", __func__, segment_order_horz_contiguous_luma);
5483 dml_print("DML::%s: segment_order_horz_contiguous_chroma = %d\n",
5484 __func__, segment_order_horz_contiguous_chroma);
5485 #endif
5486
5487 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
5488 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0)
5489 RequestLuma = REQ_256Bytes;
5490 else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) ||
5491 (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0))
5492 RequestLuma = REQ_128BytesNonContiguous;
5493 else
5494 RequestLuma = REQ_128BytesContiguous;
5495
5496 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0)
5497 RequestChroma = REQ_256Bytes;
5498 else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) ||
5499 (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0))
5500 RequestChroma = REQ_128BytesNonContiguous;
5501 else
5502 RequestChroma = REQ_128BytesContiguous;
5503
5504 } else if (!IsVertical(SourceRotation)) {
5505 if (req128_horz_wc_l == 0)
5506 RequestLuma = REQ_256Bytes;
5507 else if (segment_order_horz_contiguous_luma == 0)
5508 RequestLuma = REQ_128BytesNonContiguous;
5509 else
5510 RequestLuma = REQ_128BytesContiguous;
5511
5512 if (req128_horz_wc_c == 0)
5513 RequestChroma = REQ_256Bytes;
5514 else if (segment_order_horz_contiguous_chroma == 0)
5515 RequestChroma = REQ_128BytesNonContiguous;
5516 else
5517 RequestChroma = REQ_128BytesContiguous;
5518
5519 } else {
5520 if (req128_vert_wc_l == 0)
5521 RequestLuma = REQ_256Bytes;
5522 else if (segment_order_vert_contiguous_luma == 0)
5523 RequestLuma = REQ_128BytesNonContiguous;
5524 else
5525 RequestLuma = REQ_128BytesContiguous;
5526
5527 if (req128_vert_wc_c == 0)
5528 RequestChroma = REQ_256Bytes;
5529 else if (segment_order_vert_contiguous_chroma == 0)
5530 RequestChroma = REQ_128BytesNonContiguous;
5531 else
5532 RequestChroma = REQ_128BytesContiguous;
5533 }
5534
5535 if (RequestLuma == REQ_256Bytes) {
5536 *MaxUncompressedBlockLuma = 256;
5537 *MaxCompressedBlockLuma = 256;
5538 *IndependentBlockLuma = 0;
5539 } else if (RequestLuma == REQ_128BytesContiguous) {
5540 *MaxUncompressedBlockLuma = 256;
5541 *MaxCompressedBlockLuma = 128;
5542 *IndependentBlockLuma = 128;
5543 } else {
5544 *MaxUncompressedBlockLuma = 256;
5545 *MaxCompressedBlockLuma = 64;
5546 *IndependentBlockLuma = 64;
5547 }
5548
5549 if (RequestChroma == REQ_256Bytes) {
5550 *MaxUncompressedBlockChroma = 256;
5551 *MaxCompressedBlockChroma = 256;
5552 *IndependentBlockChroma = 0;
5553 } else if (RequestChroma == REQ_128BytesContiguous) {
5554 *MaxUncompressedBlockChroma = 256;
5555 *MaxCompressedBlockChroma = 128;
5556 *IndependentBlockChroma = 128;
5557 } else {
5558 *MaxUncompressedBlockChroma = 256;
5559 *MaxCompressedBlockChroma = 64;
5560 *IndependentBlockChroma = 64;
5561 }
5562
5563 if (DCCEnabled != true || BytePerPixelC == 0) {
5564 *MaxUncompressedBlockChroma = 0;
5565 *MaxCompressedBlockChroma = 0;
5566 *IndependentBlockChroma = 0;
5567 }
5568
5569 if (DCCEnabled != true) {
5570 *MaxUncompressedBlockLuma = 0;
5571 *MaxCompressedBlockLuma = 0;
5572 *IndependentBlockLuma = 0;
5573 }
5574
5575 #ifdef __DML_VBA_DEBUG__
5576 dml_print("DML::%s: MaxUncompressedBlockLuma = %d\n", __func__, *MaxUncompressedBlockLuma);
5577 dml_print("DML::%s: MaxCompressedBlockLuma = %d\n", __func__, *MaxCompressedBlockLuma);
5578 dml_print("DML::%s: IndependentBlockLuma = %d\n", __func__, *IndependentBlockLuma);
5579 dml_print("DML::%s: MaxUncompressedBlockChroma = %d\n", __func__, *MaxUncompressedBlockChroma);
5580 dml_print("DML::%s: MaxCompressedBlockChroma = %d\n", __func__, *MaxCompressedBlockChroma);
5581 dml_print("DML::%s: IndependentBlockChroma = %d\n", __func__, *IndependentBlockChroma);
5582 #endif
5583
5584 } // CalculateDCCConfiguration
5585
dml32_CalculateStutterEfficiency(unsigned int CompressedBufferSizeInkByte,enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],bool UnboundedRequestEnabled,unsigned int MetaFIFOSizeInKEntries,unsigned int ZeroSizeBufferEntries,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int ROBBufferSizeInKByte,double TotalDataReadBandwidth,double DCFCLK,double ReturnBW,unsigned int CompbufReservedSpace64B,unsigned int CompbufReservedSpaceZs,double SRExitTime,double SRExitZ8Time,bool SynchronizeTimingsFinal,unsigned int BlendingAndTiming[],double StutterEnterPlusExitWatermark,double Z8StutterEnterPlusExitWatermark,bool ProgressiveToInterlaceUnitInOPP,bool Interlace[],double MinTTUVBlank[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeY[],unsigned int BytePerPixelY[],double BytePerPixelDETY[],double SwathWidthY[],unsigned int SwathHeightY[],unsigned int SwathHeightC[],double NetDCCRateLuma[],double NetDCCRateChroma[],double DCCFractionOfZeroSizeRequestsLuma[],double DCCFractionOfZeroSizeRequestsChroma[],unsigned int HTotal[],unsigned int VTotal[],double PixelClock[],double VRatio[],enum dm_rotation_angle SourceRotation[],unsigned int BlockHeight256BytesY[],unsigned int BlockWidth256BytesY[],unsigned int BlockHeight256BytesC[],unsigned int BlockWidth256BytesC[],unsigned int DCCYMaxUncompressedBlock[],unsigned int DCCCMaxUncompressedBlock[],unsigned int VActive[],bool DCCEnable[],bool WritebackEnable[],double ReadBandwidthSurfaceLuma[],double ReadBandwidthSurfaceChroma[],double meta_row_bw[],double dpte_row_bw[],double * StutterEfficiencyNotIncludingVBlank,double * StutterEfficiency,unsigned int * NumberOfStutterBurstsPerFrame,double * Z8StutterEfficiencyNotIncludingVBlank,double * Z8StutterEfficiency,unsigned int * Z8NumberOfStutterBurstsPerFrame,double * StutterPeriod,bool * DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)5586 void dml32_CalculateStutterEfficiency(
5587 unsigned int CompressedBufferSizeInkByte,
5588 enum dm_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
5589 bool UnboundedRequestEnabled,
5590 unsigned int MetaFIFOSizeInKEntries,
5591 unsigned int ZeroSizeBufferEntries,
5592 unsigned int PixelChunkSizeInKByte,
5593 unsigned int NumberOfActiveSurfaces,
5594 unsigned int ROBBufferSizeInKByte,
5595 double TotalDataReadBandwidth,
5596 double DCFCLK,
5597 double ReturnBW,
5598 unsigned int CompbufReservedSpace64B,
5599 unsigned int CompbufReservedSpaceZs,
5600 double SRExitTime,
5601 double SRExitZ8Time,
5602 bool SynchronizeTimingsFinal,
5603 unsigned int BlendingAndTiming[],
5604 double StutterEnterPlusExitWatermark,
5605 double Z8StutterEnterPlusExitWatermark,
5606 bool ProgressiveToInterlaceUnitInOPP,
5607 bool Interlace[],
5608 double MinTTUVBlank[],
5609 unsigned int DPPPerSurface[],
5610 unsigned int DETBufferSizeY[],
5611 unsigned int BytePerPixelY[],
5612 double BytePerPixelDETY[],
5613 double SwathWidthY[],
5614 unsigned int SwathHeightY[],
5615 unsigned int SwathHeightC[],
5616 double NetDCCRateLuma[],
5617 double NetDCCRateChroma[],
5618 double DCCFractionOfZeroSizeRequestsLuma[],
5619 double DCCFractionOfZeroSizeRequestsChroma[],
5620 unsigned int HTotal[],
5621 unsigned int VTotal[],
5622 double PixelClock[],
5623 double VRatio[],
5624 enum dm_rotation_angle SourceRotation[],
5625 unsigned int BlockHeight256BytesY[],
5626 unsigned int BlockWidth256BytesY[],
5627 unsigned int BlockHeight256BytesC[],
5628 unsigned int BlockWidth256BytesC[],
5629 unsigned int DCCYMaxUncompressedBlock[],
5630 unsigned int DCCCMaxUncompressedBlock[],
5631 unsigned int VActive[],
5632 bool DCCEnable[],
5633 bool WritebackEnable[],
5634 double ReadBandwidthSurfaceLuma[],
5635 double ReadBandwidthSurfaceChroma[],
5636 double meta_row_bw[],
5637 double dpte_row_bw[],
5638
5639 /* Output */
5640 double *StutterEfficiencyNotIncludingVBlank,
5641 double *StutterEfficiency,
5642 unsigned int *NumberOfStutterBurstsPerFrame,
5643 double *Z8StutterEfficiencyNotIncludingVBlank,
5644 double *Z8StutterEfficiency,
5645 unsigned int *Z8NumberOfStutterBurstsPerFrame,
5646 double *StutterPeriod,
5647 bool *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE)
5648 {
5649
5650 bool FoundCriticalSurface = false;
5651 unsigned int SwathSizeCriticalSurface = 0;
5652 unsigned int LastChunkOfSwathSize;
5653 unsigned int MissingPartOfLastSwathOfDETSize;
5654 double LastZ8StutterPeriod = 0.0;
5655 double LastStutterPeriod = 0.0;
5656 unsigned int TotalNumberOfActiveOTG = 0;
5657 double doublePixelClock = 0;
5658 unsigned int doubleHTotal = 0;
5659 unsigned int doubleVTotal = 0;
5660 bool SameTiming = true;
5661 double DETBufferingTimeY;
5662 double SwathWidthYCriticalSurface = 0.0;
5663 double SwathHeightYCriticalSurface = 0.0;
5664 double VActiveTimeCriticalSurface = 0.0;
5665 double FrameTimeCriticalSurface = 0.0;
5666 unsigned int BytePerPixelYCriticalSurface = 0;
5667 double LinesToFinishSwathTransferStutterCriticalSurface = 0.0;
5668 unsigned int DETBufferSizeYCriticalSurface = 0;
5669 double MinTTUVBlankCriticalSurface = 0.0;
5670 unsigned int BlockWidth256BytesYCriticalSurface = 0;
5671 bool doublePlaneCriticalSurface = 0;
5672 bool doublePipeCriticalSurface = 0;
5673 double TotalCompressedReadBandwidth;
5674 double TotalRowReadBandwidth;
5675 double AverageDCCCompressionRate;
5676 double EffectiveCompressedBufferSize;
5677 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
5678 double StutterBurstTime;
5679 unsigned int TotalActiveWriteback;
5680 double LinesInDETY;
5681 double LinesInDETYRoundedDownToSwath;
5682 double MaximumEffectiveCompressionLuma;
5683 double MaximumEffectiveCompressionChroma;
5684 double TotalZeroSizeRequestReadBandwidth;
5685 double TotalZeroSizeCompressedReadBandwidth;
5686 double AverageDCCZeroSizeFraction;
5687 double AverageZeroSizeCompressionRate;
5688 unsigned int k;
5689
5690 TotalZeroSizeRequestReadBandwidth = 0;
5691 TotalZeroSizeCompressedReadBandwidth = 0;
5692 TotalRowReadBandwidth = 0;
5693 TotalCompressedReadBandwidth = 0;
5694
5695 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5696 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5697 if (DCCEnable[k] == true) {
5698 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesY[k] > SwathHeightY[k])
5699 || (!IsVertical(SourceRotation[k])
5700 && BlockHeight256BytesY[k] > SwathHeightY[k])
5701 || DCCYMaxUncompressedBlock[k] < 256) {
5702 MaximumEffectiveCompressionLuma = 2;
5703 } else {
5704 MaximumEffectiveCompressionLuma = 4;
5705 }
5706 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5707 + ReadBandwidthSurfaceLuma[k]
5708 / dml_min(NetDCCRateLuma[k],
5709 MaximumEffectiveCompressionLuma);
5710 #ifdef __DML_VBA_DEBUG__
5711 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5712 __func__, k, ReadBandwidthSurfaceLuma[k]);
5713 dml_print("DML::%s: k=%0d, NetDCCRateLuma = %f\n",
5714 __func__, k, NetDCCRateLuma[k]);
5715 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionLuma = %f\n",
5716 __func__, k, MaximumEffectiveCompressionLuma);
5717 #endif
5718 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5719 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
5720 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5721 + ReadBandwidthSurfaceLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]
5722 / MaximumEffectiveCompressionLuma;
5723
5724 if (ReadBandwidthSurfaceChroma[k] > 0) {
5725 if ((IsVertical(SourceRotation[k]) && BlockWidth256BytesC[k] > SwathHeightC[k])
5726 || (!IsVertical(SourceRotation[k])
5727 && BlockHeight256BytesC[k] > SwathHeightC[k])
5728 || DCCCMaxUncompressedBlock[k] < 256) {
5729 MaximumEffectiveCompressionChroma = 2;
5730 } else {
5731 MaximumEffectiveCompressionChroma = 4;
5732 }
5733 TotalCompressedReadBandwidth =
5734 TotalCompressedReadBandwidth
5735 + ReadBandwidthSurfaceChroma[k]
5736 / dml_min(NetDCCRateChroma[k],
5737 MaximumEffectiveCompressionChroma);
5738 #ifdef __DML_VBA_DEBUG__
5739 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceChroma = %f\n",
5740 __func__, k, ReadBandwidthSurfaceChroma[k]);
5741 dml_print("DML::%s: k=%0d, NetDCCRateChroma = %f\n",
5742 __func__, k, NetDCCRateChroma[k]);
5743 dml_print("DML::%s: k=%0d, MaximumEffectiveCompressionChroma = %f\n",
5744 __func__, k, MaximumEffectiveCompressionChroma);
5745 #endif
5746 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth
5747 + ReadBandwidthSurfaceChroma[k]
5748 * DCCFractionOfZeroSizeRequestsChroma[k];
5749 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
5750 + ReadBandwidthSurfaceChroma[k]
5751 * DCCFractionOfZeroSizeRequestsChroma[k]
5752 / MaximumEffectiveCompressionChroma;
5753 }
5754 } else {
5755 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
5756 + ReadBandwidthSurfaceLuma[k] + ReadBandwidthSurfaceChroma[k];
5757 }
5758 TotalRowReadBandwidth = TotalRowReadBandwidth
5759 + DPPPerSurface[k] * (meta_row_bw[k] + dpte_row_bw[k]);
5760 }
5761 }
5762
5763 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
5764 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
5765
5766 #ifdef __DML_VBA_DEBUG__
5767 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, UnboundedRequestEnabled);
5768 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
5769 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
5770 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n",
5771 __func__, TotalZeroSizeCompressedReadBandwidth);
5772 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
5773 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
5774 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5775 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
5776 dml_print("DML::%s: CompbufReservedSpace64B = %d\n", __func__, CompbufReservedSpace64B);
5777 dml_print("DML::%s: CompbufReservedSpaceZs = %d\n", __func__, CompbufReservedSpaceZs);
5778 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
5779 #endif
5780 if (AverageDCCZeroSizeFraction == 1) {
5781 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5782 / TotalZeroSizeCompressedReadBandwidth;
5783 EffectiveCompressedBufferSize = (double) MetaFIFOSizeInKEntries * 1024 * 64
5784 * AverageZeroSizeCompressionRate
5785 + ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5786 * AverageZeroSizeCompressionRate;
5787 } else if (AverageDCCZeroSizeFraction > 0) {
5788 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth
5789 / TotalZeroSizeCompressedReadBandwidth;
5790 EffectiveCompressedBufferSize = dml_min(
5791 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5792 (double) MetaFIFOSizeInKEntries * 1024 * 64
5793 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate
5794 + 1 / AverageDCCCompressionRate))
5795 + dml_min(((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5796 * AverageDCCCompressionRate,
5797 ((double) ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64
5798 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5799
5800 #ifdef __DML_VBA_DEBUG__
5801 dml_print("DML::%s: min 1 = %f\n", __func__,
5802 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5803 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 /
5804 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 /
5805 AverageDCCCompressionRate));
5806 dml_print("DML::%s: min 3 = %f\n", __func__, (ROBBufferSizeInKByte * 1024 -
5807 CompbufReservedSpace64B * 64) * AverageDCCCompressionRate);
5808 dml_print("DML::%s: min 4 = %f\n", __func__, (ZeroSizeBufferEntries - CompbufReservedSpaceZs) * 64 /
5809 (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
5810 #endif
5811 } else {
5812 EffectiveCompressedBufferSize = dml_min(
5813 (double) CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
5814 (double) MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate)
5815 + ((double) ROBBufferSizeInKByte * 1024 - CompbufReservedSpace64B * 64)
5816 * AverageDCCCompressionRate;
5817
5818 #ifdef __DML_VBA_DEBUG__
5819 dml_print("DML::%s: min 1 = %f\n", __func__,
5820 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
5821 dml_print("DML::%s: min 2 = %f\n", __func__,
5822 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
5823 #endif
5824 }
5825
5826 #ifdef __DML_VBA_DEBUG__
5827 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
5828 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
5829 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5830 #endif
5831
5832 *StutterPeriod = 0;
5833
5834 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5835 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5836 LinesInDETY = ((double) DETBufferSizeY[k]
5837 + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0)
5838 * ReadBandwidthSurfaceLuma[k] / TotalDataReadBandwidth)
5839 / BytePerPixelDETY[k] / SwathWidthY[k];
5840 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
5841 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * ((double) HTotal[k] / PixelClock[k])
5842 / VRatio[k];
5843 #ifdef __DML_VBA_DEBUG__
5844 dml_print("DML::%s: k=%0d, DETBufferSizeY = %d\n", __func__, k, DETBufferSizeY[k]);
5845 dml_print("DML::%s: k=%0d, BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
5846 dml_print("DML::%s: k=%0d, SwathWidthY = %d\n", __func__, k, SwathWidthY[k]);
5847 dml_print("DML::%s: k=%0d, ReadBandwidthSurfaceLuma = %f\n",
5848 __func__, k, ReadBandwidthSurfaceLuma[k]);
5849 dml_print("DML::%s: k=%0d, TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
5850 dml_print("DML::%s: k=%0d, LinesInDETY = %f\n", __func__, k, LinesInDETY);
5851 dml_print("DML::%s: k=%0d, LinesInDETYRoundedDownToSwath = %f\n",
5852 __func__, k, LinesInDETYRoundedDownToSwath);
5853 dml_print("DML::%s: k=%0d, HTotal = %d\n", __func__, k, HTotal[k]);
5854 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5855 dml_print("DML::%s: k=%0d, VRatio = %f\n", __func__, k, VRatio[k]);
5856 dml_print("DML::%s: k=%0d, DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
5857 dml_print("DML::%s: k=%0d, PixelClock = %f\n", __func__, k, PixelClock[k]);
5858 #endif
5859
5860 if (!FoundCriticalSurface || DETBufferingTimeY < *StutterPeriod) {
5861 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
5862
5863 FoundCriticalSurface = true;
5864 *StutterPeriod = DETBufferingTimeY;
5865 FrameTimeCriticalSurface = (
5866 isInterlaceTiming ?
5867 dml_floor((double) VTotal[k] / 2.0, 1.0) : VTotal[k])
5868 * (double) HTotal[k] / PixelClock[k];
5869 VActiveTimeCriticalSurface = (
5870 isInterlaceTiming ?
5871 dml_floor((double) VActive[k] / 2.0, 1.0) : VActive[k])
5872 * (double) HTotal[k] / PixelClock[k];
5873 BytePerPixelYCriticalSurface = BytePerPixelY[k];
5874 SwathWidthYCriticalSurface = SwathWidthY[k];
5875 SwathHeightYCriticalSurface = SwathHeightY[k];
5876 BlockWidth256BytesYCriticalSurface = BlockWidth256BytesY[k];
5877 LinesToFinishSwathTransferStutterCriticalSurface = SwathHeightY[k]
5878 - (LinesInDETY - LinesInDETYRoundedDownToSwath);
5879 DETBufferSizeYCriticalSurface = DETBufferSizeY[k];
5880 MinTTUVBlankCriticalSurface = MinTTUVBlank[k];
5881 doublePlaneCriticalSurface = (ReadBandwidthSurfaceChroma[k] == 0);
5882 doublePipeCriticalSurface = (DPPPerSurface[k] == 1);
5883
5884 #ifdef __DML_VBA_DEBUG__
5885 dml_print("DML::%s: k=%0d, FoundCriticalSurface = %d\n",
5886 __func__, k, FoundCriticalSurface);
5887 dml_print("DML::%s: k=%0d, StutterPeriod = %f\n",
5888 __func__, k, *StutterPeriod);
5889 dml_print("DML::%s: k=%0d, MinTTUVBlankCriticalSurface = %f\n",
5890 __func__, k, MinTTUVBlankCriticalSurface);
5891 dml_print("DML::%s: k=%0d, FrameTimeCriticalSurface = %f\n",
5892 __func__, k, FrameTimeCriticalSurface);
5893 dml_print("DML::%s: k=%0d, VActiveTimeCriticalSurface = %f\n",
5894 __func__, k, VActiveTimeCriticalSurface);
5895 dml_print("DML::%s: k=%0d, BytePerPixelYCriticalSurface = %d\n",
5896 __func__, k, BytePerPixelYCriticalSurface);
5897 dml_print("DML::%s: k=%0d, SwathWidthYCriticalSurface = %f\n",
5898 __func__, k, SwathWidthYCriticalSurface);
5899 dml_print("DML::%s: k=%0d, SwathHeightYCriticalSurface = %f\n",
5900 __func__, k, SwathHeightYCriticalSurface);
5901 dml_print("DML::%s: k=%0d, BlockWidth256BytesYCriticalSurface = %d\n",
5902 __func__, k, BlockWidth256BytesYCriticalSurface);
5903 dml_print("DML::%s: k=%0d, doublePlaneCriticalSurface = %d\n",
5904 __func__, k, doublePlaneCriticalSurface);
5905 dml_print("DML::%s: k=%0d, doublePipeCriticalSurface = %d\n",
5906 __func__, k, doublePipeCriticalSurface);
5907 dml_print("DML::%s: k=%0d, LinesToFinishSwathTransferStutterCriticalSurface = %f\n",
5908 __func__, k, LinesToFinishSwathTransferStutterCriticalSurface);
5909 #endif
5910 }
5911 }
5912 }
5913
5914 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth,
5915 EffectiveCompressedBufferSize);
5916 #ifdef __DML_VBA_DEBUG__
5917 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
5918 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
5919 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5920 __func__, *StutterPeriod * TotalDataReadBandwidth);
5921 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
5922 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__,
5923 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
5924 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
5925 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
5926 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
5927 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
5928 #endif
5929
5930 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate
5931 / ReturnBW
5932 + (*StutterPeriod * TotalDataReadBandwidth
5933 - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
5934 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
5935 #ifdef __DML_VBA_DEBUG__
5936 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer /
5937 AverageDCCCompressionRate / ReturnBW);
5938 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n",
5939 __func__, (*StutterPeriod * TotalDataReadBandwidth));
5940 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth -
5941 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
5942 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
5943 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
5944 #endif
5945 StutterBurstTime = dml_max(StutterBurstTime,
5946 LinesToFinishSwathTransferStutterCriticalSurface * BytePerPixelYCriticalSurface
5947 * SwathWidthYCriticalSurface / ReturnBW);
5948
5949 #ifdef __DML_VBA_DEBUG__
5950 dml_print("DML::%s: Time to finish residue swath=%f\n",
5951 __func__,
5952 LinesToFinishSwathTransferStutterCriticalSurface *
5953 BytePerPixelYCriticalSurface * SwathWidthYCriticalSurface / ReturnBW);
5954 #endif
5955
5956 TotalActiveWriteback = 0;
5957 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5958 if (WritebackEnable[k])
5959 TotalActiveWriteback = TotalActiveWriteback + 1;
5960 }
5961
5962 if (TotalActiveWriteback == 0) {
5963 #ifdef __DML_VBA_DEBUG__
5964 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
5965 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
5966 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
5967 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
5968 #endif
5969 *StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5970 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
5971 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0.,
5972 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
5973 *NumberOfStutterBurstsPerFrame = (
5974 *StutterEfficiencyNotIncludingVBlank > 0 ?
5975 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5976 *Z8NumberOfStutterBurstsPerFrame = (
5977 *Z8StutterEfficiencyNotIncludingVBlank > 0 ?
5978 dml_ceil(VActiveTimeCriticalSurface / *StutterPeriod, 1) : 0);
5979 } else {
5980 *StutterEfficiencyNotIncludingVBlank = 0.;
5981 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
5982 *NumberOfStutterBurstsPerFrame = 0;
5983 *Z8NumberOfStutterBurstsPerFrame = 0;
5984 }
5985 #ifdef __DML_VBA_DEBUG__
5986 dml_print("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, VActiveTimeCriticalSurface);
5987 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
5988 __func__, *StutterEfficiencyNotIncludingVBlank);
5989 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n",
5990 __func__, *Z8StutterEfficiencyNotIncludingVBlank);
5991 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
5992 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
5993 #endif
5994
5995 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
5996 if (UseMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
5997 if (BlendingAndTiming[k] == k) {
5998 if (TotalNumberOfActiveOTG == 0) {
5999 doublePixelClock = PixelClock[k];
6000 doubleHTotal = HTotal[k];
6001 doubleVTotal = VTotal[k];
6002 } else if (doublePixelClock != PixelClock[k] || doubleHTotal != HTotal[k]
6003 || doubleVTotal != VTotal[k]) {
6004 SameTiming = false;
6005 }
6006 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6007 }
6008 }
6009 }
6010
6011 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6012 LastStutterPeriod = VActiveTimeCriticalSurface - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6013
6014 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming
6015 && LastStutterPeriod + MinTTUVBlankCriticalSurface > StutterEnterPlusExitWatermark) {
6016 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime
6017 + StutterBurstTime * VActiveTimeCriticalSurface
6018 / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6019 } else {
6020 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6021 }
6022 } else {
6023 *StutterEfficiency = 0;
6024 }
6025
6026 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6027 LastZ8StutterPeriod = VActiveTimeCriticalSurface
6028 - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6029 if ((SynchronizeTimingsFinal || TotalNumberOfActiveOTG == 1) && SameTiming && LastZ8StutterPeriod +
6030 MinTTUVBlankCriticalSurface > Z8StutterEnterPlusExitWatermark) {
6031 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime
6032 * VActiveTimeCriticalSurface / *StutterPeriod) / FrameTimeCriticalSurface) * 100;
6033 } else {
6034 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6035 }
6036 } else {
6037 *Z8StutterEfficiency = 0.;
6038 }
6039
6040 #ifdef __DML_VBA_DEBUG__
6041 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6042 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6043 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6044 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6045 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6046 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6047 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n",
6048 __func__, *StutterEfficiencyNotIncludingVBlank);
6049 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6050 #endif
6051
6052 SwathSizeCriticalSurface = BytePerPixelYCriticalSurface * SwathHeightYCriticalSurface
6053 * dml_ceil(SwathWidthYCriticalSurface, BlockWidth256BytesYCriticalSurface);
6054 LastChunkOfSwathSize = SwathSizeCriticalSurface % (PixelChunkSizeInKByte * 1024);
6055 MissingPartOfLastSwathOfDETSize = dml_ceil(DETBufferSizeYCriticalSurface, SwathSizeCriticalSurface)
6056 - DETBufferSizeYCriticalSurface;
6057
6058 *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!UnboundedRequestEnabled && (NumberOfActiveSurfaces == 1)
6059 && doublePlaneCriticalSurface && doublePipeCriticalSurface && (LastChunkOfSwathSize > 0)
6060 && (LastChunkOfSwathSize <= 4096) && (MissingPartOfLastSwathOfDETSize > 0)
6061 && (MissingPartOfLastSwathOfDETSize <= LastChunkOfSwathSize));
6062
6063 #ifdef __DML_VBA_DEBUG__
6064 dml_print("DML::%s: SwathSizeCriticalSurface = %d\n", __func__, SwathSizeCriticalSurface);
6065 dml_print("DML::%s: LastChunkOfSwathSize = %d\n", __func__, LastChunkOfSwathSize);
6066 dml_print("DML::%s: MissingPartOfLastSwathOfDETSize = %d\n", __func__, MissingPartOfLastSwathOfDETSize);
6067 dml_print("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %d\n", __func__, *DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
6068 #endif
6069 } // CalculateStutterEfficiency
6070
dml32_CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,bool nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)6071 void dml32_CalculateMaxDETAndMinCompressedBufferSize(
6072 unsigned int ConfigReturnBufferSizeInKByte,
6073 unsigned int ROBBufferSizeInKByte,
6074 unsigned int MaxNumDPP,
6075 bool nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
6076 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
6077
6078 /* Output */
6079 unsigned int *MaxTotalDETInKByte,
6080 unsigned int *nomDETInKByte,
6081 unsigned int *MinCompressedBufferSizeInKByte)
6082 {
6083 bool det_buff_size_override_en = nomDETInKByteOverrideEnable;
6084 unsigned int det_buff_size_override_val = nomDETInKByteOverrideValue;
6085
6086 *MaxTotalDETInKByte = dml_ceil(((double)ConfigReturnBufferSizeInKByte +
6087 (double) ROBBufferSizeInKByte) * 4.0 / 5.0, 64);
6088 *nomDETInKByte = dml_floor((double) *MaxTotalDETInKByte / (double) MaxNumDPP, 64);
6089 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
6090
6091 #ifdef __DML_VBA_DEBUG__
6092 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %0d\n", __func__, ConfigReturnBufferSizeInKByte);
6093 dml_print("DML::%s: ROBBufferSizeInKByte = %0d\n", __func__, ROBBufferSizeInKByte);
6094 dml_print("DML::%s: MaxNumDPP = %0d\n", __func__, MaxNumDPP);
6095 dml_print("DML::%s: MaxTotalDETInKByte = %0d\n", __func__, *MaxTotalDETInKByte);
6096 dml_print("DML::%s: nomDETInKByte = %0d\n", __func__, *nomDETInKByte);
6097 dml_print("DML::%s: MinCompressedBufferSizeInKByte = %0d\n", __func__, *MinCompressedBufferSizeInKByte);
6098 #endif
6099
6100 if (det_buff_size_override_en) {
6101 *nomDETInKByte = det_buff_size_override_val;
6102 #ifdef __DML_VBA_DEBUG__
6103 dml_print("DML::%s: nomDETInKByte = %0d (override)\n", __func__, *nomDETInKByte);
6104 #endif
6105 }
6106 } // CalculateMaxDETAndMinCompressedBufferSize
6107
dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[])6108 bool dml32_CalculateVActiveBandwithSupport(unsigned int NumberOfActiveSurfaces,
6109 double ReturnBW,
6110 bool NotUrgentLatencyHiding[],
6111 double ReadBandwidthLuma[],
6112 double ReadBandwidthChroma[],
6113 double cursor_bw[],
6114 double meta_row_bandwidth[],
6115 double dpte_row_bandwidth[],
6116 unsigned int NumberOfDPP[],
6117 double UrgentBurstFactorLuma[],
6118 double UrgentBurstFactorChroma[],
6119 double UrgentBurstFactorCursor[])
6120 {
6121 unsigned int k;
6122 bool NotEnoughUrgentLatencyHiding = false;
6123 bool CalculateVActiveBandwithSupport_val = false;
6124 double VActiveBandwith = 0;
6125
6126 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6127 if (NotUrgentLatencyHiding[k]) {
6128 NotEnoughUrgentLatencyHiding = true;
6129 }
6130 }
6131
6132 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6133 VActiveBandwith = VActiveBandwith + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * meta_row_bandwidth[k] + NumberOfDPP[k] * dpte_row_bandwidth[k];
6134 }
6135
6136 CalculateVActiveBandwithSupport_val = (VActiveBandwith <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6137
6138 #ifdef __DML_VBA_DEBUG__
6139 dml_print("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, NotEnoughUrgentLatencyHiding);
6140 dml_print("DML::%s: VActiveBandwith = %f\n", __func__, VActiveBandwith);
6141 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6142 dml_print("DML::%s: CalculateVActiveBandwithSupport_val = %d\n", __func__, CalculateVActiveBandwithSupport_val);
6143 #endif
6144 return CalculateVActiveBandwithSupport_val;
6145 }
6146
dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,bool NotUrgentLatencyHiding[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double PrefetchBW[],double VRatio[],double MaxVRatioPre,double * MaxPrefetchBandwidth,double * FractionOfUrgentBandwidth,bool * PrefetchBandwidthSupport)6147 void dml32_CalculatePrefetchBandwithSupport(unsigned int NumberOfActiveSurfaces,
6148 double ReturnBW,
6149 bool NotUrgentLatencyHiding[],
6150 double ReadBandwidthLuma[],
6151 double ReadBandwidthChroma[],
6152 double PrefetchBandwidthLuma[],
6153 double PrefetchBandwidthChroma[],
6154 double cursor_bw[],
6155 double meta_row_bandwidth[],
6156 double dpte_row_bandwidth[],
6157 double cursor_bw_pre[],
6158 double prefetch_vmrow_bw[],
6159 unsigned int NumberOfDPP[],
6160 double UrgentBurstFactorLuma[],
6161 double UrgentBurstFactorChroma[],
6162 double UrgentBurstFactorCursor[],
6163 double UrgentBurstFactorLumaPre[],
6164 double UrgentBurstFactorChromaPre[],
6165 double UrgentBurstFactorCursorPre[],
6166 double PrefetchBW[],
6167 double VRatio[],
6168 double MaxVRatioPre,
6169
6170 /* output */
6171 double *MaxPrefetchBandwidth,
6172 double *FractionOfUrgentBandwidth,
6173 bool *PrefetchBandwidthSupport)
6174 {
6175 unsigned int k;
6176 double ActiveBandwidthPerSurface;
6177 bool NotEnoughUrgentLatencyHiding = false;
6178 double TotalActiveBandwidth = 0;
6179 double TotalPrefetchBandwidth = 0;
6180
6181 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6182 if (NotUrgentLatencyHiding[k]) {
6183 NotEnoughUrgentLatencyHiding = true;
6184 }
6185 }
6186
6187 *MaxPrefetchBandwidth = 0;
6188 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6189 ActiveBandwidthPerSurface = ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k] + NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]);
6190
6191 TotalActiveBandwidth += ActiveBandwidthPerSurface;
6192
6193 TotalPrefetchBandwidth = TotalPrefetchBandwidth + PrefetchBW[k] * VRatio[k];
6194
6195 *MaxPrefetchBandwidth = *MaxPrefetchBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6196 ActiveBandwidthPerSurface,
6197 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6198 }
6199
6200 if (MaxVRatioPre == __DML_MAX_VRATIO_PRE__)
6201 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && (TotalPrefetchBandwidth <= TotalActiveBandwidth * __DML_MAX_BW_RATIO_PRE__) && !NotEnoughUrgentLatencyHiding;
6202 else
6203 *PrefetchBandwidthSupport = (*MaxPrefetchBandwidth <= ReturnBW) && !NotEnoughUrgentLatencyHiding;
6204
6205 *FractionOfUrgentBandwidth = *MaxPrefetchBandwidth / ReturnBW;
6206 }
6207
dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,double ReturnBW,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double cursor_bw_pre[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[])6208 double dml32_CalculateBandwidthAvailableForImmediateFlip(unsigned int NumberOfActiveSurfaces,
6209 double ReturnBW,
6210 double ReadBandwidthLuma[],
6211 double ReadBandwidthChroma[],
6212 double PrefetchBandwidthLuma[],
6213 double PrefetchBandwidthChroma[],
6214 double cursor_bw[],
6215 double cursor_bw_pre[],
6216 unsigned int NumberOfDPP[],
6217 double UrgentBurstFactorLuma[],
6218 double UrgentBurstFactorChroma[],
6219 double UrgentBurstFactorCursor[],
6220 double UrgentBurstFactorLumaPre[],
6221 double UrgentBurstFactorChromaPre[],
6222 double UrgentBurstFactorCursorPre[])
6223 {
6224 unsigned int k;
6225 double CalculateBandwidthAvailableForImmediateFlip_val = ReturnBW;
6226
6227 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6228 CalculateBandwidthAvailableForImmediateFlip_val = CalculateBandwidthAvailableForImmediateFlip_val - dml_max(ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6229 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6230 }
6231
6232 return CalculateBandwidthAvailableForImmediateFlip_val;
6233 }
6234
dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,double ReturnBW,enum immediate_flip_requirement ImmediateFlipRequirement[],double final_flip_bw[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double cursor_bw[],double meta_row_bandwidth[],double dpte_row_bandwidth[],double cursor_bw_pre[],double prefetch_vmrow_bw[],unsigned int NumberOfDPP[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double * TotalBandwidth,double * FractionOfUrgentBandwidth,bool * ImmediateFlipBandwidthSupport)6235 void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurfaces,
6236 double ReturnBW,
6237 enum immediate_flip_requirement ImmediateFlipRequirement[],
6238 double final_flip_bw[],
6239 double ReadBandwidthLuma[],
6240 double ReadBandwidthChroma[],
6241 double PrefetchBandwidthLuma[],
6242 double PrefetchBandwidthChroma[],
6243 double cursor_bw[],
6244 double meta_row_bandwidth[],
6245 double dpte_row_bandwidth[],
6246 double cursor_bw_pre[],
6247 double prefetch_vmrow_bw[],
6248 unsigned int NumberOfDPP[],
6249 double UrgentBurstFactorLuma[],
6250 double UrgentBurstFactorChroma[],
6251 double UrgentBurstFactorCursor[],
6252 double UrgentBurstFactorLumaPre[],
6253 double UrgentBurstFactorChromaPre[],
6254 double UrgentBurstFactorCursorPre[],
6255
6256 /* output */
6257 double *TotalBandwidth,
6258 double *FractionOfUrgentBandwidth,
6259 bool *ImmediateFlipBandwidthSupport)
6260 {
6261 unsigned int k;
6262 *TotalBandwidth = 0;
6263 for (k = 0; k < NumberOfActiveSurfaces; ++k) {
6264 if (ImmediateFlipRequirement[k] != dm_immediate_flip_not_required) {
6265 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6266 NumberOfDPP[k] * final_flip_bw[k] + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6267 NumberOfDPP[k] * (final_flip_bw[k] + PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6268 } else {
6269 *TotalBandwidth = *TotalBandwidth + dml_max3(NumberOfDPP[k] * prefetch_vmrow_bw[k],
6270 NumberOfDPP[k] * (meta_row_bandwidth[k] + dpte_row_bandwidth[k]) + ReadBandwidthLuma[k] * UrgentBurstFactorLuma[k] + ReadBandwidthChroma[k] * UrgentBurstFactorChroma[k] + cursor_bw[k] * UrgentBurstFactorCursor[k],
6271 NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * UrgentBurstFactorLumaPre[k] + PrefetchBandwidthChroma[k] * UrgentBurstFactorChromaPre[k]) + cursor_bw_pre[k] * UrgentBurstFactorCursorPre[k]);
6272 }
6273 }
6274 *ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
6275 *FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
6276 }
6277
dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,double ReturnBW,double UrgentLatency,unsigned int SwathHeightY[],unsigned int SwathHeightC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],double BytePerPixelInDETY[],double BytePerPixelInDETC[],unsigned int DETBufferSizeY[],unsigned int DETBufferSizeC[],unsigned int NumOfDPP[],unsigned int HTotal[],double PixelClock[],double VRatioY[],double VRatioC[],enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],enum unbounded_requesting_policy UseUnboundedRequesting)6278 bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
6279 double ReturnBW,
6280 double UrgentLatency,
6281 unsigned int SwathHeightY[],
6282 unsigned int SwathHeightC[],
6283 unsigned int SwathWidthY[],
6284 unsigned int SwathWidthC[],
6285 double BytePerPixelInDETY[],
6286 double BytePerPixelInDETC[],
6287 unsigned int DETBufferSizeY[],
6288 unsigned int DETBufferSizeC[],
6289 unsigned int NumOfDPP[],
6290 unsigned int HTotal[],
6291 double PixelClock[],
6292 double VRatioY[],
6293 double VRatioC[],
6294 enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[],
6295 enum unbounded_requesting_policy UseUnboundedRequesting)
6296 {
6297 int k;
6298 double SwathSizeAllSurfaces = 0;
6299 double SwathSizeAllSurfacesInFetchTimeUs;
6300 double DETSwathLatencyHidingUs;
6301 double DETSwathLatencyHidingYUs;
6302 double DETSwathLatencyHidingCUs;
6303 double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
6304 double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
6305 bool NotEnoughDETSwathFillLatencyHiding = false;
6306
6307 if (UseUnboundedRequesting == dm_unbounded_requesting)
6308 return false;
6309
6310 /* calculate sum of single swath size for all pipes in bytes */
6311 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6312 SwathSizePerSurfaceY[k] = SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
6313
6314 if (SwathHeightC[k] != 0)
6315 SwathSizePerSurfaceC[k] = SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
6316 else
6317 SwathSizePerSurfaceC[k] = 0;
6318
6319 SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
6320 }
6321
6322 SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
6323
6324 /* ensure all DET - 1 swath can hide a fetch for all surfaces */
6325 for (k = 0; k < NumberOfActiveSurfaces; k++) {
6326 double LineTime = HTotal[k] / PixelClock[k];
6327
6328 /* only care if surface is not phantom */
6329 if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
6330 DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
6331
6332 if (SwathHeightC[k] != 0) {
6333 DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
6334
6335 DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
6336 } else {
6337 DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
6338 }
6339
6340 /* DET must be able to hide time to fetch 1 swath for each surface */
6341 if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
6342 NotEnoughDETSwathFillLatencyHiding = true;
6343 break;
6344 }
6345 }
6346 }
6347
6348 return NotEnoughDETSwathFillLatencyHiding;
6349 }
6350