1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: AMD
23 *
24 */
25
26 #include "dc.h"
27 #include "../display_mode_lib.h"
28 #include "../dcn30/display_mode_vba_30.h"
29 #include "display_mode_vba_31.h"
30 #include "../dml_inline_defs.h"
31
32 /*
33 * NOTE:
34 * This file is gcc-parsable HW gospel, coming straight from HW engineers.
35 *
36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd
37 * ways. Unless there is something clearly wrong with it the code should
38 * remain as-is as it provides us with a guarantee from HW that it is correct.
39 */
40
41 #define BPP_INVALID 0
42 #define BPP_BLENDED_PIPE 0xffffffff
43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184
44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096
45 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128
46 #define DCN3_15_MAX_DET_SIZE 384
47
48 // For DML-C changes that hasn't been propagated to VBA yet
49 //#define __DML_VBA_ALLOW_DELTA__
50
51 // Move these to ip paramaters/constant
52
53 // At which vstartup the DML start to try if the mode can be supported
54 #define __DML_VBA_MIN_VSTARTUP__ 9
55
56 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET)
57 #define __DML_ARB_TO_RET_DELAY__ (7 + 95)
58
59 // fudge factor for min dcfclk calclation
60 #define __DML_MIN_DCFCLK_FACTOR__ 1.15
61
62 typedef struct {
63 double DPPCLK;
64 double DISPCLK;
65 double PixelClock;
66 double DCFCLKDeepSleep;
67 unsigned int DPPPerPlane;
68 bool ScalerEnabled;
69 double VRatio;
70 double VRatioChroma;
71 enum scan_direction_class SourceScan;
72 unsigned int BlockWidth256BytesY;
73 unsigned int BlockHeight256BytesY;
74 unsigned int BlockWidth256BytesC;
75 unsigned int BlockHeight256BytesC;
76 unsigned int InterlaceEnable;
77 unsigned int NumberOfCursors;
78 unsigned int VBlank;
79 unsigned int HTotal;
80 unsigned int DCCEnable;
81 bool ODMCombineIsEnabled;
82 enum source_format_class SourcePixelFormat;
83 int BytePerPixelY;
84 int BytePerPixelC;
85 bool ProgressiveToInterlaceUnitInOPP;
86 } Pipe;
87
88 #define BPP_INVALID 0
89 #define BPP_BLENDED_PIPE 0xffffffff
90
91 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib);
92 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib);
93 static unsigned int dscceComputeDelay(
94 unsigned int bpc,
95 double BPP,
96 unsigned int sliceWidth,
97 unsigned int numSlices,
98 enum output_format_class pixelFormat,
99 enum output_encoder_class Output);
100 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output);
101 static bool CalculatePrefetchSchedule(
102 struct display_mode_lib *mode_lib,
103 double HostVMInefficiencyFactor,
104 Pipe *myPipe,
105 unsigned int DSCDelay,
106 double DPPCLKDelaySubtotalPlusCNVCFormater,
107 double DPPCLKDelaySCL,
108 double DPPCLKDelaySCLLBOnly,
109 double DPPCLKDelayCNVCCursor,
110 double DISPCLKDelaySubtotal,
111 unsigned int DPP_RECOUT_WIDTH,
112 enum output_format_class OutputFormat,
113 unsigned int MaxInterDCNTileRepeaters,
114 unsigned int VStartup,
115 unsigned int MaxVStartup,
116 unsigned int GPUVMPageTableLevels,
117 bool GPUVMEnable,
118 bool HostVMEnable,
119 unsigned int HostVMMaxNonCachedPageTableLevels,
120 double HostVMMinPageSize,
121 bool DynamicMetadataEnable,
122 bool DynamicMetadataVMEnabled,
123 int DynamicMetadataLinesBeforeActiveRequired,
124 unsigned int DynamicMetadataTransmittedBytes,
125 double UrgentLatency,
126 double UrgentExtraLatency,
127 double TCalc,
128 unsigned int PDEAndMetaPTEBytesFrame,
129 unsigned int MetaRowByte,
130 unsigned int PixelPTEBytesPerRow,
131 double PrefetchSourceLinesY,
132 unsigned int SwathWidthY,
133 double VInitPreFillY,
134 unsigned int MaxNumSwathY,
135 double PrefetchSourceLinesC,
136 unsigned int SwathWidthC,
137 double VInitPreFillC,
138 unsigned int MaxNumSwathC,
139 int swath_width_luma_ub,
140 int swath_width_chroma_ub,
141 unsigned int SwathHeightY,
142 unsigned int SwathHeightC,
143 double TWait,
144 double *DSTXAfterScaler,
145 double *DSTYAfterScaler,
146 double *DestinationLinesForPrefetch,
147 double *PrefetchBandwidth,
148 double *DestinationLinesToRequestVMInVBlank,
149 double *DestinationLinesToRequestRowInVBlank,
150 double *VRatioPrefetchY,
151 double *VRatioPrefetchC,
152 double *RequiredPrefetchPixDataBWLuma,
153 double *RequiredPrefetchPixDataBWChroma,
154 bool *NotEnoughTimeForDynamicMetadata,
155 double *Tno_bw,
156 double *prefetch_vmrow_bw,
157 double *Tdmdl_vm,
158 double *Tdmdl,
159 double *TSetup,
160 int *VUpdateOffsetPix,
161 double *VUpdateWidthPix,
162 double *VReadyOffsetPix);
163 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed);
164 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed);
165 static void CalculateDCCConfiguration(
166 bool DCCEnabled,
167 bool DCCProgrammingAssumesScanDirectionUnknown,
168 enum source_format_class SourcePixelFormat,
169 unsigned int SurfaceWidthLuma,
170 unsigned int SurfaceWidthChroma,
171 unsigned int SurfaceHeightLuma,
172 unsigned int SurfaceHeightChroma,
173 double DETBufferSize,
174 unsigned int RequestHeight256ByteLuma,
175 unsigned int RequestHeight256ByteChroma,
176 enum dm_swizzle_mode TilingFormat,
177 unsigned int BytePerPixelY,
178 unsigned int BytePerPixelC,
179 double BytePerPixelDETY,
180 double BytePerPixelDETC,
181 enum scan_direction_class ScanOrientation,
182 unsigned int *MaxUncompressedBlockLuma,
183 unsigned int *MaxUncompressedBlockChroma,
184 unsigned int *MaxCompressedBlockLuma,
185 unsigned int *MaxCompressedBlockChroma,
186 unsigned int *IndependentBlockLuma,
187 unsigned int *IndependentBlockChroma);
188 static double CalculatePrefetchSourceLines(
189 struct display_mode_lib *mode_lib,
190 double VRatio,
191 double vtaps,
192 bool Interlace,
193 bool ProgressiveToInterlaceUnitInOPP,
194 unsigned int SwathHeight,
195 unsigned int ViewportYStart,
196 double *VInitPreFill,
197 unsigned int *MaxNumSwath);
198 static unsigned int CalculateVMAndRowBytes(
199 struct display_mode_lib *mode_lib,
200 bool DCCEnable,
201 unsigned int BlockHeight256Bytes,
202 unsigned int BlockWidth256Bytes,
203 enum source_format_class SourcePixelFormat,
204 unsigned int SurfaceTiling,
205 unsigned int BytePerPixel,
206 enum scan_direction_class ScanDirection,
207 unsigned int SwathWidth,
208 unsigned int ViewportHeight,
209 bool GPUVMEnable,
210 bool HostVMEnable,
211 unsigned int HostVMMaxNonCachedPageTableLevels,
212 unsigned int GPUVMMinPageSize,
213 unsigned int HostVMMinPageSize,
214 unsigned int PTEBufferSizeInRequests,
215 unsigned int Pitch,
216 unsigned int DCCMetaPitch,
217 unsigned int *MacroTileWidth,
218 unsigned int *MetaRowByte,
219 unsigned int *PixelPTEBytesPerRow,
220 bool *PTEBufferSizeNotExceeded,
221 int *dpte_row_width_ub,
222 unsigned int *dpte_row_height,
223 unsigned int *MetaRequestWidth,
224 unsigned int *MetaRequestHeight,
225 unsigned int *meta_row_width,
226 unsigned int *meta_row_height,
227 int *vm_group_bytes,
228 unsigned int *dpte_group_bytes,
229 unsigned int *PixelPTEReqWidth,
230 unsigned int *PixelPTEReqHeight,
231 unsigned int *PTERequestSize,
232 int *DPDE0BytesFrame,
233 int *MetaPTEBytesFrame);
234 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime);
235 static void CalculateRowBandwidth(
236 bool GPUVMEnable,
237 enum source_format_class SourcePixelFormat,
238 double VRatio,
239 double VRatioChroma,
240 bool DCCEnable,
241 double LineTime,
242 unsigned int MetaRowByteLuma,
243 unsigned int MetaRowByteChroma,
244 unsigned int meta_row_height_luma,
245 unsigned int meta_row_height_chroma,
246 unsigned int PixelPTEBytesPerRowLuma,
247 unsigned int PixelPTEBytesPerRowChroma,
248 unsigned int dpte_row_height_luma,
249 unsigned int dpte_row_height_chroma,
250 double *meta_row_bw,
251 double *dpte_row_bw);
252
253 static void CalculateFlipSchedule(
254 struct display_mode_lib *mode_lib,
255 unsigned int k,
256 double HostVMInefficiencyFactor,
257 double UrgentExtraLatency,
258 double UrgentLatency,
259 double PDEAndMetaPTEBytesPerFrame,
260 double MetaRowBytes,
261 double DPTEBytesPerRow);
262 static double CalculateWriteBackDelay(
263 enum source_format_class WritebackPixelFormat,
264 double WritebackHRatio,
265 double WritebackVRatio,
266 unsigned int WritebackVTaps,
267 int WritebackDestinationWidth,
268 int WritebackDestinationHeight,
269 int WritebackSourceHeight,
270 unsigned int HTotal);
271
272 static void CalculateVupdateAndDynamicMetadataParameters(
273 int MaxInterDCNTileRepeaters,
274 double DPPCLK,
275 double DISPCLK,
276 double DCFClkDeepSleep,
277 double PixelClock,
278 int HTotal,
279 int VBlank,
280 int DynamicMetadataTransmittedBytes,
281 int DynamicMetadataLinesBeforeActiveRequired,
282 int InterlaceEnable,
283 bool ProgressiveToInterlaceUnitInOPP,
284 double *TSetup,
285 double *Tdmbf,
286 double *Tdmec,
287 double *Tdmsks,
288 int *VUpdateOffsetPix,
289 double *VUpdateWidthPix,
290 double *VReadyOffsetPix);
291
292 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
293 struct display_mode_lib *mode_lib,
294 unsigned int PrefetchMode,
295 double DCFCLK,
296 double ReturnBW,
297 double UrgentLatency,
298 double ExtraLatency,
299 double SOCCLK,
300 double DCFCLKDeepSleep,
301 unsigned int DETBufferSizeY[],
302 unsigned int DETBufferSizeC[],
303 unsigned int SwathHeightY[],
304 unsigned int SwathHeightC[],
305 double SwathWidthY[],
306 double SwathWidthC[],
307 unsigned int DPPPerPlane[],
308 double BytePerPixelDETY[],
309 double BytePerPixelDETC[],
310 bool UnboundedRequestEnabled,
311 int unsigned CompressedBufferSizeInkByte,
312 enum clock_change_support *DRAMClockChangeSupport,
313 double *StutterExitWatermark,
314 double *StutterEnterPlusExitWatermark,
315 double *Z8StutterExitWatermark,
316 double *Z8StutterEnterPlusExitWatermark);
317
318 static void CalculateDCFCLKDeepSleep(
319 struct display_mode_lib *mode_lib,
320 unsigned int NumberOfActivePlanes,
321 int BytePerPixelY[],
322 int BytePerPixelC[],
323 double VRatio[],
324 double VRatioChroma[],
325 double SwathWidthY[],
326 double SwathWidthC[],
327 unsigned int DPPPerPlane[],
328 double HRatio[],
329 double HRatioChroma[],
330 double PixelClock[],
331 double PSCL_THROUGHPUT[],
332 double PSCL_THROUGHPUT_CHROMA[],
333 double DPPCLK[],
334 double ReadBandwidthLuma[],
335 double ReadBandwidthChroma[],
336 int ReturnBusWidth,
337 double *DCFCLKDeepSleep);
338
339 static void CalculateUrgentBurstFactor(
340 int swath_width_luma_ub,
341 int swath_width_chroma_ub,
342 unsigned int SwathHeightY,
343 unsigned int SwathHeightC,
344 double LineTime,
345 double UrgentLatency,
346 double CursorBufferSize,
347 unsigned int CursorWidth,
348 unsigned int CursorBPP,
349 double VRatio,
350 double VRatioC,
351 double BytePerPixelInDETY,
352 double BytePerPixelInDETC,
353 double DETBufferSizeY,
354 double DETBufferSizeC,
355 double *UrgentBurstFactorCursor,
356 double *UrgentBurstFactorLuma,
357 double *UrgentBurstFactorChroma,
358 bool *NotEnoughUrgentLatencyHiding);
359
360 static void UseMinimumDCFCLK(
361 struct display_mode_lib *mode_lib,
362 int MaxPrefetchMode,
363 int ReorderingBytes);
364
365 static void CalculatePixelDeliveryTimes(
366 unsigned int NumberOfActivePlanes,
367 double VRatio[],
368 double VRatioChroma[],
369 double VRatioPrefetchY[],
370 double VRatioPrefetchC[],
371 unsigned int swath_width_luma_ub[],
372 unsigned int swath_width_chroma_ub[],
373 unsigned int DPPPerPlane[],
374 double HRatio[],
375 double HRatioChroma[],
376 double PixelClock[],
377 double PSCL_THROUGHPUT[],
378 double PSCL_THROUGHPUT_CHROMA[],
379 double DPPCLK[],
380 int BytePerPixelC[],
381 enum scan_direction_class SourceScan[],
382 unsigned int NumberOfCursors[],
383 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
384 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
385 unsigned int BlockWidth256BytesY[],
386 unsigned int BlockHeight256BytesY[],
387 unsigned int BlockWidth256BytesC[],
388 unsigned int BlockHeight256BytesC[],
389 double DisplayPipeLineDeliveryTimeLuma[],
390 double DisplayPipeLineDeliveryTimeChroma[],
391 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
392 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
393 double DisplayPipeRequestDeliveryTimeLuma[],
394 double DisplayPipeRequestDeliveryTimeChroma[],
395 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
396 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
397 double CursorRequestDeliveryTime[],
398 double CursorRequestDeliveryTimePrefetch[]);
399
400 static void CalculateMetaAndPTETimes(
401 int NumberOfActivePlanes,
402 bool GPUVMEnable,
403 int MetaChunkSize,
404 int MinMetaChunkSizeBytes,
405 int HTotal[],
406 double VRatio[],
407 double VRatioChroma[],
408 double DestinationLinesToRequestRowInVBlank[],
409 double DestinationLinesToRequestRowInImmediateFlip[],
410 bool DCCEnable[],
411 double PixelClock[],
412 int BytePerPixelY[],
413 int BytePerPixelC[],
414 enum scan_direction_class SourceScan[],
415 int dpte_row_height[],
416 int dpte_row_height_chroma[],
417 int meta_row_width[],
418 int meta_row_width_chroma[],
419 int meta_row_height[],
420 int meta_row_height_chroma[],
421 int meta_req_width[],
422 int meta_req_width_chroma[],
423 int meta_req_height[],
424 int meta_req_height_chroma[],
425 int dpte_group_bytes[],
426 int PTERequestSizeY[],
427 int PTERequestSizeC[],
428 int PixelPTEReqWidthY[],
429 int PixelPTEReqHeightY[],
430 int PixelPTEReqWidthC[],
431 int PixelPTEReqHeightC[],
432 int dpte_row_width_luma_ub[],
433 int dpte_row_width_chroma_ub[],
434 double DST_Y_PER_PTE_ROW_NOM_L[],
435 double DST_Y_PER_PTE_ROW_NOM_C[],
436 double DST_Y_PER_META_ROW_NOM_L[],
437 double DST_Y_PER_META_ROW_NOM_C[],
438 double TimePerMetaChunkNominal[],
439 double TimePerChromaMetaChunkNominal[],
440 double TimePerMetaChunkVBlank[],
441 double TimePerChromaMetaChunkVBlank[],
442 double TimePerMetaChunkFlip[],
443 double TimePerChromaMetaChunkFlip[],
444 double time_per_pte_group_nom_luma[],
445 double time_per_pte_group_vblank_luma[],
446 double time_per_pte_group_flip_luma[],
447 double time_per_pte_group_nom_chroma[],
448 double time_per_pte_group_vblank_chroma[],
449 double time_per_pte_group_flip_chroma[]);
450
451 static void CalculateVMGroupAndRequestTimes(
452 unsigned int NumberOfActivePlanes,
453 bool GPUVMEnable,
454 unsigned int GPUVMMaxPageTableLevels,
455 unsigned int HTotal[],
456 int BytePerPixelC[],
457 double DestinationLinesToRequestVMInVBlank[],
458 double DestinationLinesToRequestVMInImmediateFlip[],
459 bool DCCEnable[],
460 double PixelClock[],
461 int dpte_row_width_luma_ub[],
462 int dpte_row_width_chroma_ub[],
463 int vm_group_bytes[],
464 unsigned int dpde0_bytes_per_frame_ub_l[],
465 unsigned int dpde0_bytes_per_frame_ub_c[],
466 int meta_pte_bytes_per_frame_ub_l[],
467 int meta_pte_bytes_per_frame_ub_c[],
468 double TimePerVMGroupVBlank[],
469 double TimePerVMGroupFlip[],
470 double TimePerVMRequestVBlank[],
471 double TimePerVMRequestFlip[]);
472
473 static void CalculateStutterEfficiency(
474 struct display_mode_lib *mode_lib,
475 int CompressedBufferSizeInkByte,
476 bool UnboundedRequestEnabled,
477 int ConfigReturnBufferSizeInKByte,
478 int MetaFIFOSizeInKEntries,
479 int ZeroSizeBufferEntries,
480 int NumberOfActivePlanes,
481 int ROBBufferSizeInKByte,
482 double TotalDataReadBandwidth,
483 double DCFCLK,
484 double ReturnBW,
485 double COMPBUF_RESERVED_SPACE_64B,
486 double COMPBUF_RESERVED_SPACE_ZS,
487 double SRExitTime,
488 double SRExitZ8Time,
489 bool SynchronizedVBlank,
490 double Z8StutterEnterPlusExitWatermark,
491 double StutterEnterPlusExitWatermark,
492 bool ProgressiveToInterlaceUnitInOPP,
493 bool Interlace[],
494 double MinTTUVBlank[],
495 int DPPPerPlane[],
496 unsigned int DETBufferSizeY[],
497 int BytePerPixelY[],
498 double BytePerPixelDETY[],
499 double SwathWidthY[],
500 int SwathHeightY[],
501 int SwathHeightC[],
502 double NetDCCRateLuma[],
503 double NetDCCRateChroma[],
504 double DCCFractionOfZeroSizeRequestsLuma[],
505 double DCCFractionOfZeroSizeRequestsChroma[],
506 int HTotal[],
507 int VTotal[],
508 double PixelClock[],
509 double VRatio[],
510 enum scan_direction_class SourceScan[],
511 int BlockHeight256BytesY[],
512 int BlockWidth256BytesY[],
513 int BlockHeight256BytesC[],
514 int BlockWidth256BytesC[],
515 int DCCYMaxUncompressedBlock[],
516 int DCCCMaxUncompressedBlock[],
517 int VActive[],
518 bool DCCEnable[],
519 bool WritebackEnable[],
520 double ReadBandwidthPlaneLuma[],
521 double ReadBandwidthPlaneChroma[],
522 double meta_row_bw[],
523 double dpte_row_bw[],
524 double *StutterEfficiencyNotIncludingVBlank,
525 double *StutterEfficiency,
526 int *NumberOfStutterBurstsPerFrame,
527 double *Z8StutterEfficiencyNotIncludingVBlank,
528 double *Z8StutterEfficiency,
529 int *Z8NumberOfStutterBurstsPerFrame,
530 double *StutterPeriod);
531
532 static void CalculateSwathAndDETConfiguration(
533 bool ForceSingleDPP,
534 int NumberOfActivePlanes,
535 bool DETSharedByAllDPP,
536 unsigned int DETBufferSizeInKByte[],
537 double MaximumSwathWidthLuma[],
538 double MaximumSwathWidthChroma[],
539 enum scan_direction_class SourceScan[],
540 enum source_format_class SourcePixelFormat[],
541 enum dm_swizzle_mode SurfaceTiling[],
542 int ViewportWidth[],
543 int ViewportHeight[],
544 int SurfaceWidthY[],
545 int SurfaceWidthC[],
546 int SurfaceHeightY[],
547 int SurfaceHeightC[],
548 int Read256BytesBlockHeightY[],
549 int Read256BytesBlockHeightC[],
550 int Read256BytesBlockWidthY[],
551 int Read256BytesBlockWidthC[],
552 enum odm_combine_mode ODMCombineEnabled[],
553 int BlendingAndTiming[],
554 int BytePerPixY[],
555 int BytePerPixC[],
556 double BytePerPixDETY[],
557 double BytePerPixDETC[],
558 int HActive[],
559 double HRatio[],
560 double HRatioChroma[],
561 int DPPPerPlane[],
562 int swath_width_luma_ub[],
563 int swath_width_chroma_ub[],
564 double SwathWidth[],
565 double SwathWidthChroma[],
566 int SwathHeightY[],
567 int SwathHeightC[],
568 unsigned int DETBufferSizeY[],
569 unsigned int DETBufferSizeC[],
570 bool ViewportSizeSupportPerPlane[],
571 bool *ViewportSizeSupport);
572 static void CalculateSwathWidth(
573 bool ForceSingleDPP,
574 int NumberOfActivePlanes,
575 enum source_format_class SourcePixelFormat[],
576 enum scan_direction_class SourceScan[],
577 int ViewportWidth[],
578 int ViewportHeight[],
579 int SurfaceWidthY[],
580 int SurfaceWidthC[],
581 int SurfaceHeightY[],
582 int SurfaceHeightC[],
583 enum odm_combine_mode ODMCombineEnabled[],
584 int BytePerPixY[],
585 int BytePerPixC[],
586 int Read256BytesBlockHeightY[],
587 int Read256BytesBlockHeightC[],
588 int Read256BytesBlockWidthY[],
589 int Read256BytesBlockWidthC[],
590 int BlendingAndTiming[],
591 int HActive[],
592 double HRatio[],
593 int DPPPerPlane[],
594 double SwathWidthSingleDPPY[],
595 double SwathWidthSingleDPPC[],
596 double SwathWidthY[],
597 double SwathWidthC[],
598 int MaximumSwathHeightY[],
599 int MaximumSwathHeightC[],
600 int swath_width_luma_ub[],
601 int swath_width_chroma_ub[]);
602
603 static double CalculateExtraLatency(
604 int RoundTripPingLatencyCycles,
605 int ReorderingBytes,
606 double DCFCLK,
607 int TotalNumberOfActiveDPP,
608 int PixelChunkSizeInKByte,
609 int TotalNumberOfDCCActiveDPP,
610 int MetaChunkSize,
611 double ReturnBW,
612 bool GPUVMEnable,
613 bool HostVMEnable,
614 int NumberOfActivePlanes,
615 int NumberOfDPP[],
616 int dpte_group_bytes[],
617 double HostVMInefficiencyFactor,
618 double HostVMMinPageSize,
619 int HostVMMaxNonCachedPageTableLevels);
620
621 static double CalculateExtraLatencyBytes(
622 int ReorderingBytes,
623 int TotalNumberOfActiveDPP,
624 int PixelChunkSizeInKByte,
625 int TotalNumberOfDCCActiveDPP,
626 int MetaChunkSize,
627 bool GPUVMEnable,
628 bool HostVMEnable,
629 int NumberOfActivePlanes,
630 int NumberOfDPP[],
631 int dpte_group_bytes[],
632 double HostVMInefficiencyFactor,
633 double HostVMMinPageSize,
634 int HostVMMaxNonCachedPageTableLevels);
635
636 static double CalculateUrgentLatency(
637 double UrgentLatencyPixelDataOnly,
638 double UrgentLatencyPixelMixedWithVMData,
639 double UrgentLatencyVMDataOnly,
640 bool DoUrgentLatencyAdjustment,
641 double UrgentLatencyAdjustmentFabricClockComponent,
642 double UrgentLatencyAdjustmentFabricClockReference,
643 double FabricClockSingle);
644
645 static void CalculateUnboundedRequestAndCompressedBufferSize(
646 unsigned int DETBufferSizeInKByte,
647 int ConfigReturnBufferSizeInKByte,
648 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
649 int TotalActiveDPP,
650 bool NoChromaPlanes,
651 int MaxNumDPP,
652 int CompressedBufferSegmentSizeInkByteFinal,
653 enum output_encoder_class *Output,
654 bool *UnboundedRequestEnabled,
655 int *CompressedBufferSizeInkByte);
656
657 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output);
658
dml31_recalculate(struct display_mode_lib * mode_lib)659 void dml31_recalculate(struct display_mode_lib *mode_lib)
660 {
661 ModeSupportAndSystemConfiguration(mode_lib);
662 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib);
663 DisplayPipeConfiguration(mode_lib);
664 #ifdef __DML_VBA_DEBUG__
665 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__);
666 #endif
667 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib);
668 }
669
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum output_format_class pixelFormat,enum output_encoder_class Output)670 static unsigned int dscceComputeDelay(
671 unsigned int bpc,
672 double BPP,
673 unsigned int sliceWidth,
674 unsigned int numSlices,
675 enum output_format_class pixelFormat,
676 enum output_encoder_class Output)
677 {
678 // valid bpc = source bits per component in the set of {8, 10, 12}
679 // valid bpp = increments of 1/16 of a bit
680 // min = 6/7/8 in N420/N422/444, respectively
681 // max = such that compression is 1:1
682 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
683 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
684 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
685
686 // fixed value
687 unsigned int rcModelSize = 8192;
688
689 // N422/N420 operate at 2 pixels per clock
690 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels;
691
692 if (pixelFormat == dm_420)
693 pixelsPerClock = 2;
694 else if (pixelFormat == dm_444)
695 pixelsPerClock = 1;
696 else if (pixelFormat == dm_n422)
697 pixelsPerClock = 2;
698 // #all other modes operate at 1 pixel per clock
699 else
700 pixelsPerClock = 1;
701
702 //initial transmit delay as per PPS
703 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock);
704
705 //compute ssm delay
706 if (bpc == 8)
707 D = 81;
708 else if (bpc == 10)
709 D = 89;
710 else
711 D = 113;
712
713 //divide by pixel per cycle to compute slice width as seen by DSC
714 w = sliceWidth / pixelsPerClock;
715
716 //422 mode has an additional cycle of delay
717 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422)
718 s = 0;
719 else
720 s = 1;
721
722 //main calculation for the dscce
723 ix = initalXmitDelay + 45;
724 wx = (w + 2) / 3;
725 P = 3 * wx - w;
726 l0 = ix / w;
727 a = ix + P * l0;
728 ax = (a + 2) / 3 + D + 6 + 1;
729 L = (ax + wx - 1) / wx;
730 if ((ix % w) == 0 && P != 0)
731 lstall = 1;
732 else
733 lstall = 0;
734 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;
735
736 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
737 pixels = Delay * 3 * pixelsPerClock;
738 return pixels;
739 }
740
dscComputeDelay(enum output_format_class pixelFormat,enum output_encoder_class Output)741 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output)
742 {
743 unsigned int Delay = 0;
744
745 if (pixelFormat == dm_420) {
746 // sfr
747 Delay = Delay + 2;
748 // dsccif
749 Delay = Delay + 0;
750 // dscc - input deserializer
751 Delay = Delay + 3;
752 // dscc gets pixels every other cycle
753 Delay = Delay + 2;
754 // dscc - input cdc fifo
755 Delay = Delay + 12;
756 // dscc gets pixels every other cycle
757 Delay = Delay + 13;
758 // dscc - cdc uncertainty
759 Delay = Delay + 2;
760 // dscc - output cdc fifo
761 Delay = Delay + 7;
762 // dscc gets pixels every other cycle
763 Delay = Delay + 3;
764 // dscc - cdc uncertainty
765 Delay = Delay + 2;
766 // dscc - output serializer
767 Delay = Delay + 1;
768 // sft
769 Delay = Delay + 1;
770 } else if (pixelFormat == dm_n422) {
771 // sfr
772 Delay = Delay + 2;
773 // dsccif
774 Delay = Delay + 1;
775 // dscc - input deserializer
776 Delay = Delay + 5;
777 // dscc - input cdc fifo
778 Delay = Delay + 25;
779 // dscc - cdc uncertainty
780 Delay = Delay + 2;
781 // dscc - output cdc fifo
782 Delay = Delay + 10;
783 // dscc - cdc uncertainty
784 Delay = Delay + 2;
785 // dscc - output serializer
786 Delay = Delay + 1;
787 // sft
788 Delay = Delay + 1;
789 } else {
790 // sfr
791 Delay = Delay + 2;
792 // dsccif
793 Delay = Delay + 0;
794 // dscc - input deserializer
795 Delay = Delay + 3;
796 // dscc - input cdc fifo
797 Delay = Delay + 12;
798 // dscc - cdc uncertainty
799 Delay = Delay + 2;
800 // dscc - output cdc fifo
801 Delay = Delay + 7;
802 // dscc - output serializer
803 Delay = Delay + 1;
804 // dscc - cdc uncertainty
805 Delay = Delay + 2;
806 // sft
807 Delay = Delay + 1;
808 }
809
810 return Delay;
811 }
812
CalculatePrefetchSchedule(struct display_mode_lib * mode_lib,double HostVMInefficiencyFactor,Pipe * myPipe,unsigned int DSCDelay,double DPPCLKDelaySubtotalPlusCNVCFormater,double DPPCLKDelaySCL,double DPPCLKDelaySCLLBOnly,double DPPCLKDelayCNVCCursor,double DISPCLKDelaySubtotal,unsigned int DPP_RECOUT_WIDTH,enum output_format_class OutputFormat,unsigned int MaxInterDCNTileRepeaters,unsigned int VStartup,unsigned int MaxVStartup,unsigned int GPUVMPageTableLevels,bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMaxNonCachedPageTableLevels,double HostVMMinPageSize,bool DynamicMetadataEnable,bool DynamicMetadataVMEnabled,int DynamicMetadataLinesBeforeActiveRequired,unsigned int DynamicMetadataTransmittedBytes,double UrgentLatency,double UrgentExtraLatency,double TCalc,unsigned int PDEAndMetaPTEBytesFrame,unsigned int MetaRowByte,unsigned int PixelPTEBytesPerRow,double PrefetchSourceLinesY,unsigned int SwathWidthY,double VInitPreFillY,unsigned int MaxNumSwathY,double PrefetchSourceLinesC,unsigned int SwathWidthC,double VInitPreFillC,unsigned int MaxNumSwathC,int swath_width_luma_ub,int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double TWait,double * DSTXAfterScaler,double * DSTYAfterScaler,double * DestinationLinesForPrefetch,double * PrefetchBandwidth,double * DestinationLinesToRequestVMInVBlank,double * DestinationLinesToRequestRowInVBlank,double * VRatioPrefetchY,double * VRatioPrefetchC,double * RequiredPrefetchPixDataBWLuma,double * RequiredPrefetchPixDataBWChroma,bool * NotEnoughTimeForDynamicMetadata,double * Tno_bw,double * prefetch_vmrow_bw,double * Tdmdl_vm,double * Tdmdl,double * TSetup,int * VUpdateOffsetPix,double * VUpdateWidthPix,double * VReadyOffsetPix)813 static bool CalculatePrefetchSchedule(
814 struct display_mode_lib *mode_lib,
815 double HostVMInefficiencyFactor,
816 Pipe *myPipe,
817 unsigned int DSCDelay,
818 double DPPCLKDelaySubtotalPlusCNVCFormater,
819 double DPPCLKDelaySCL,
820 double DPPCLKDelaySCLLBOnly,
821 double DPPCLKDelayCNVCCursor,
822 double DISPCLKDelaySubtotal,
823 unsigned int DPP_RECOUT_WIDTH,
824 enum output_format_class OutputFormat,
825 unsigned int MaxInterDCNTileRepeaters,
826 unsigned int VStartup,
827 unsigned int MaxVStartup,
828 unsigned int GPUVMPageTableLevels,
829 bool GPUVMEnable,
830 bool HostVMEnable,
831 unsigned int HostVMMaxNonCachedPageTableLevels,
832 double HostVMMinPageSize,
833 bool DynamicMetadataEnable,
834 bool DynamicMetadataVMEnabled,
835 int DynamicMetadataLinesBeforeActiveRequired,
836 unsigned int DynamicMetadataTransmittedBytes,
837 double UrgentLatency,
838 double UrgentExtraLatency,
839 double TCalc,
840 unsigned int PDEAndMetaPTEBytesFrame,
841 unsigned int MetaRowByte,
842 unsigned int PixelPTEBytesPerRow,
843 double PrefetchSourceLinesY,
844 unsigned int SwathWidthY,
845 double VInitPreFillY,
846 unsigned int MaxNumSwathY,
847 double PrefetchSourceLinesC,
848 unsigned int SwathWidthC,
849 double VInitPreFillC,
850 unsigned int MaxNumSwathC,
851 int swath_width_luma_ub,
852 int swath_width_chroma_ub,
853 unsigned int SwathHeightY,
854 unsigned int SwathHeightC,
855 double TWait,
856 double *DSTXAfterScaler,
857 double *DSTYAfterScaler,
858 double *DestinationLinesForPrefetch,
859 double *PrefetchBandwidth,
860 double *DestinationLinesToRequestVMInVBlank,
861 double *DestinationLinesToRequestRowInVBlank,
862 double *VRatioPrefetchY,
863 double *VRatioPrefetchC,
864 double *RequiredPrefetchPixDataBWLuma,
865 double *RequiredPrefetchPixDataBWChroma,
866 bool *NotEnoughTimeForDynamicMetadata,
867 double *Tno_bw,
868 double *prefetch_vmrow_bw,
869 double *Tdmdl_vm,
870 double *Tdmdl,
871 double *TSetup,
872 int *VUpdateOffsetPix,
873 double *VUpdateWidthPix,
874 double *VReadyOffsetPix)
875 {
876 bool MyError = false;
877 unsigned int DPPCycles, DISPCLKCycles;
878 double DSTTotalPixelsAfterScaler;
879 double LineTime;
880 double dst_y_prefetch_equ;
881 #ifdef __DML_VBA_DEBUG__
882 double Tsw_oto;
883 #endif
884 double prefetch_bw_oto;
885 double prefetch_bw_pr;
886 double Tvm_oto;
887 double Tr0_oto;
888 double Tvm_oto_lines;
889 double Tr0_oto_lines;
890 double dst_y_prefetch_oto;
891 double TimeForFetchingMetaPTE = 0;
892 double TimeForFetchingRowInVBlank = 0;
893 double LinesToRequestPrefetchPixelData = 0;
894 unsigned int HostVMDynamicLevelsTrips;
895 double trip_to_mem;
896 double Tvm_trips;
897 double Tr0_trips;
898 double Tvm_trips_rounded;
899 double Tr0_trips_rounded;
900 double Lsw_oto;
901 double Tpre_rounded;
902 double prefetch_bw_equ;
903 double Tvm_equ;
904 double Tr0_equ;
905 double Tdmbf;
906 double Tdmec;
907 double Tdmsks;
908 double prefetch_sw_bytes;
909 double bytes_pp;
910 double dep_bytes;
911 int max_vratio_pre = 4;
912 double min_Lsw;
913 double Tsw_est1 = 0;
914 double Tsw_est3 = 0;
915 double max_Tsw = 0;
916
917 if (GPUVMEnable == true && HostVMEnable == true) {
918 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels;
919 } else {
920 HostVMDynamicLevelsTrips = 0;
921 }
922 #ifdef __DML_VBA_DEBUG__
923 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor);
924 #endif
925 CalculateVupdateAndDynamicMetadataParameters(
926 MaxInterDCNTileRepeaters,
927 myPipe->DPPCLK,
928 myPipe->DISPCLK,
929 myPipe->DCFCLKDeepSleep,
930 myPipe->PixelClock,
931 myPipe->HTotal,
932 myPipe->VBlank,
933 DynamicMetadataTransmittedBytes,
934 DynamicMetadataLinesBeforeActiveRequired,
935 myPipe->InterlaceEnable,
936 myPipe->ProgressiveToInterlaceUnitInOPP,
937 TSetup,
938 &Tdmbf,
939 &Tdmec,
940 &Tdmsks,
941 VUpdateOffsetPix,
942 VUpdateWidthPix,
943 VReadyOffsetPix);
944
945 LineTime = myPipe->HTotal / myPipe->PixelClock;
946 trip_to_mem = UrgentLatency;
947 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1);
948
949 #ifdef __DML_VBA_ALLOW_DELTA__
950 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) {
951 #else
952 if (DynamicMetadataVMEnabled == true) {
953 #endif
954 *Tdmdl = TWait + Tvm_trips + trip_to_mem;
955 } else {
956 *Tdmdl = TWait + UrgentExtraLatency;
957 }
958
959 #ifdef __DML_VBA_ALLOW_DELTA__
960 if (DynamicMetadataEnable == false) {
961 *Tdmdl = 0.0;
962 }
963 #endif
964
965 if (DynamicMetadataEnable == true) {
966 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) {
967 *NotEnoughTimeForDynamicMetadata = true;
968 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
969 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf);
970 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec);
971 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks);
972 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl);
973 } else {
974 *NotEnoughTimeForDynamicMetadata = false;
975 }
976 } else {
977 *NotEnoughTimeForDynamicMetadata = false;
978 }
979
980 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0);
981
982 if (myPipe->ScalerEnabled)
983 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL;
984 else
985 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly;
986
987 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor;
988
989 DISPCLKCycles = DISPCLKDelaySubtotal;
990
991 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0)
992 return true;
993
994 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay;
995
996 #ifdef __DML_VBA_DEBUG__
997 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles);
998 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock);
999 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK);
1000 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles);
1001 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK);
1002 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay);
1003 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler);
1004 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled);
1005 #endif
1006
1007 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH;
1008
1009 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP))
1010 *DSTYAfterScaler = 1;
1011 else
1012 *DSTYAfterScaler = 0;
1013
1014 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler;
1015 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1);
1016 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal));
1017
1018 #ifdef __DML_VBA_DEBUG__
1019 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler);
1020 #endif
1021
1022 MyError = false;
1023
1024 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1);
1025 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime;
1026 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime;
1027
1028 #ifdef __DML_VBA_ALLOW_DELTA__
1029 if (!myPipe->DCCEnable) {
1030 Tr0_trips = 0.0;
1031 Tr0_trips_rounded = 0.0;
1032 }
1033 #endif
1034
1035 if (!GPUVMEnable) {
1036 Tvm_trips = 0.0;
1037 Tvm_trips_rounded = 0.0;
1038 }
1039
1040 if (GPUVMEnable) {
1041 if (GPUVMPageTableLevels >= 3) {
1042 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1);
1043 } else {
1044 *Tno_bw = 0;
1045 }
1046 } else if (!myPipe->DCCEnable) {
1047 *Tno_bw = LineTime;
1048 } else {
1049 *Tno_bw = LineTime / 4;
1050 }
1051
1052 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12)
1053 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4;
1054 else
1055 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
1056 /*rev 99*/
1057 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
1058 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
1059 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
1060 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
1061 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
1062
1063 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
1064 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
1065 #ifdef __DML_VBA_DEBUG__
1066 Tsw_oto = Lsw_oto * LineTime;
1067 #endif
1068
1069
1070 #ifdef __DML_VBA_DEBUG__
1071 dml_print("DML: HTotal: %d\n", myPipe->HTotal);
1072 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto);
1073 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY);
1074 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub);
1075 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY);
1076 dml_print("DML: Tsw_oto: %f\n", Tsw_oto);
1077 #endif
1078
1079 if (GPUVMEnable == true)
1080 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0);
1081 else
1082 Tvm_oto = LineTime / 4.0;
1083
1084 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1085 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term)
1086 LineTime - Tvm_oto,
1087 LineTime / 4);
1088 } else {
1089 Tr0_oto = (LineTime - Tvm_oto) / 2.0;
1090 }
1091
1092 #ifdef __DML_VBA_DEBUG__
1093 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips);
1094 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips);
1095 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte);
1096 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1097 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1098 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1099 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto);
1100 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto);
1101 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto);
1102 #endif
1103
1104 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0;
1105 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0;
1106 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto;
1107 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal);
1108 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0;
1109 Tpre_rounded = dst_y_prefetch_equ * LineTime;
1110
1111 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor);
1112
1113 if (prefetch_sw_bytes < dep_bytes)
1114 prefetch_sw_bytes = 2 * dep_bytes;
1115
1116 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto);
1117 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines);
1118 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines);
1119 dml_print("DML: Lsw_oto: %f\n", Lsw_oto);
1120 dml_print("DML: LineTime: %f\n", LineTime);
1121 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ);
1122
1123 dml_print("DML: LineTime: %f\n", LineTime);
1124 dml_print("DML: VStartup: %d\n", VStartup);
1125 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime);
1126 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup);
1127 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc);
1128 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait);
1129 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf);
1130 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec);
1131 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks);
1132 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm);
1133 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl);
1134 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler);
1135 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler);
1136
1137 *PrefetchBandwidth = 0;
1138 *DestinationLinesToRequestVMInVBlank = 0;
1139 *DestinationLinesToRequestRowInVBlank = 0;
1140 *VRatioPrefetchY = 0;
1141 *VRatioPrefetchC = 0;
1142 *RequiredPrefetchPixDataBWLuma = 0;
1143 if (dst_y_prefetch_equ > 1) {
1144 double PrefetchBandwidth1;
1145 double PrefetchBandwidth2;
1146 double PrefetchBandwidth3;
1147 double PrefetchBandwidth4;
1148
1149 if (Tpre_rounded - *Tno_bw > 0) {
1150 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1151 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw);
1152 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1;
1153 } else {
1154 PrefetchBandwidth1 = 0;
1155 }
1156
1157 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) {
1158 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1159 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw);
1160 }
1161
1162 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0)
1163 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded);
1164 else
1165 PrefetchBandwidth2 = 0;
1166
1167 if (Tpre_rounded - Tvm_trips_rounded > 0) {
1168 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor
1169 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded);
1170 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3;
1171 } else {
1172 PrefetchBandwidth3 = 0;
1173 }
1174
1175 #ifdef __DML_VBA_DEBUG__
1176 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded);
1177 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded);
1178 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3);
1179 #endif
1180 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) {
1181 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor)
1182 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded);
1183 }
1184
1185 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0)
1186 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded);
1187 else
1188 PrefetchBandwidth4 = 0;
1189
1190 {
1191 bool Case1OK;
1192 bool Case2OK;
1193 bool Case3OK;
1194
1195 if (PrefetchBandwidth1 > 0) {
1196 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded
1197 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) {
1198 Case1OK = true;
1199 } else {
1200 Case1OK = false;
1201 }
1202 } else {
1203 Case1OK = false;
1204 }
1205
1206 if (PrefetchBandwidth2 > 0) {
1207 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded
1208 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) {
1209 Case2OK = true;
1210 } else {
1211 Case2OK = false;
1212 }
1213 } else {
1214 Case2OK = false;
1215 }
1216
1217 if (PrefetchBandwidth3 > 0) {
1218 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded
1219 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) {
1220 Case3OK = true;
1221 } else {
1222 Case3OK = false;
1223 }
1224 } else {
1225 Case3OK = false;
1226 }
1227
1228 if (Case1OK) {
1229 prefetch_bw_equ = PrefetchBandwidth1;
1230 } else if (Case2OK) {
1231 prefetch_bw_equ = PrefetchBandwidth2;
1232 } else if (Case3OK) {
1233 prefetch_bw_equ = PrefetchBandwidth3;
1234 } else {
1235 prefetch_bw_equ = PrefetchBandwidth4;
1236 }
1237
1238 #ifdef __DML_VBA_DEBUG__
1239 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK);
1240 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK);
1241 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK);
1242 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ);
1243 #endif
1244
1245 if (prefetch_bw_equ > 0) {
1246 if (GPUVMEnable == true) {
1247 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4);
1248 } else {
1249 Tvm_equ = LineTime / 4;
1250 }
1251
1252 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) {
1253 Tr0_equ = dml_max4(
1254 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ,
1255 Tr0_trips,
1256 (LineTime - Tvm_equ) / 2,
1257 LineTime / 4);
1258 } else {
1259 Tr0_equ = (LineTime - Tvm_equ) / 2;
1260 }
1261 } else {
1262 Tvm_equ = 0;
1263 Tr0_equ = 0;
1264 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__);
1265 }
1266 }
1267
1268 if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
1269 *DestinationLinesForPrefetch = dst_y_prefetch_oto;
1270 TimeForFetchingMetaPTE = Tvm_oto;
1271 TimeForFetchingRowInVBlank = Tr0_oto;
1272 *PrefetchBandwidth = prefetch_bw_oto;
1273 } else {
1274 *DestinationLinesForPrefetch = dst_y_prefetch_equ;
1275 TimeForFetchingMetaPTE = Tvm_equ;
1276 TimeForFetchingRowInVBlank = Tr0_equ;
1277 *PrefetchBandwidth = prefetch_bw_equ;
1278 }
1279
1280 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0;
1281
1282 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0;
1283
1284 #ifdef __DML_VBA_ALLOW_DELTA__
1285 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch
1286 // See note above dated 5/30/2018
1287 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ?
1288 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this??
1289 #else
1290 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank;
1291 #endif
1292
1293 #ifdef __DML_VBA_DEBUG__
1294 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch);
1295 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1296 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank);
1297 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1298 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1299 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1300 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData);
1301 #endif
1302
1303 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) {
1304
1305 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData;
1306 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1307 #ifdef __DML_VBA_DEBUG__
1308 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1309 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY);
1310 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY);
1311 #endif
1312 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) {
1313 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) {
1314 *VRatioPrefetchY = dml_max(
1315 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData,
1316 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0));
1317 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0);
1318 } else {
1319 MyError = true;
1320 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1321 *VRatioPrefetchY = 0;
1322 }
1323 #ifdef __DML_VBA_DEBUG__
1324 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY);
1325 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY);
1326 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY);
1327 #endif
1328 }
1329
1330 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData;
1331 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1332
1333 #ifdef __DML_VBA_DEBUG__
1334 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1335 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC);
1336 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC);
1337 #endif
1338 if ((SwathHeightC > 4) || VInitPreFillC > 3) {
1339 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) {
1340 *VRatioPrefetchC = dml_max(
1341 *VRatioPrefetchC,
1342 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0));
1343 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0);
1344 } else {
1345 MyError = true;
1346 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1347 *VRatioPrefetchC = 0;
1348 }
1349 #ifdef __DML_VBA_DEBUG__
1350 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC);
1351 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC);
1352 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC);
1353 #endif
1354 }
1355
1356 #ifdef __DML_VBA_DEBUG__
1357 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY);
1358 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
1359 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1360 #endif
1361
1362 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime;
1363
1364 #ifdef __DML_VBA_DEBUG__
1365 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma);
1366 #endif
1367
1368 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub
1369 / LineTime;
1370 } else {
1371 MyError = true;
1372 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1373 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData);
1374 *VRatioPrefetchY = 0;
1375 *VRatioPrefetchC = 0;
1376 *RequiredPrefetchPixDataBWLuma = 0;
1377 *RequiredPrefetchPixDataBWChroma = 0;
1378 }
1379
1380 dml_print(
1381 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n",
1382 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE);
1383 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE);
1384 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank);
1385 dml_print(
1386 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n",
1387 (double) LinesToRequestPrefetchPixelData * LineTime);
1388 dml_print("DML: To: %fus - time for propagation from scaler to optc\n",
1389 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) /
1390 (double) myPipe->HTotal)) * LineTime);
1391 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
1392 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n",
1393 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank
1394 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup);
1395 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow);
1396
1397 } else {
1398 MyError = true;
1399 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1400 }
1401
1402 {
1403 double prefetch_vm_bw;
1404 double prefetch_row_bw;
1405
1406 if (PDEAndMetaPTEBytesFrame == 0) {
1407 prefetch_vm_bw = 0;
1408 } else if (*DestinationLinesToRequestVMInVBlank > 0) {
1409 #ifdef __DML_VBA_DEBUG__
1410 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1411 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
1412 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank);
1413 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime);
1414 #endif
1415 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime);
1416 #ifdef __DML_VBA_DEBUG__
1417 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
1418 #endif
1419 } else {
1420 prefetch_vm_bw = 0;
1421 MyError = true;
1422 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1423 }
1424
1425 if (MetaRowByte + PixelPTEBytesPerRow == 0) {
1426 prefetch_row_bw = 0;
1427 } else if (*DestinationLinesToRequestRowInVBlank > 0) {
1428 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime);
1429
1430 #ifdef __DML_VBA_DEBUG__
1431 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte);
1432 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow);
1433 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank);
1434 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
1435 #endif
1436 } else {
1437 prefetch_row_bw = 0;
1438 MyError = true;
1439 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__);
1440 }
1441
1442 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
1443 }
1444
1445 if (MyError) {
1446 *PrefetchBandwidth = 0;
1447 TimeForFetchingMetaPTE = 0;
1448 TimeForFetchingRowInVBlank = 0;
1449 *DestinationLinesToRequestVMInVBlank = 0;
1450 *DestinationLinesToRequestRowInVBlank = 0;
1451 *DestinationLinesForPrefetch = 0;
1452 LinesToRequestPrefetchPixelData = 0;
1453 *VRatioPrefetchY = 0;
1454 *VRatioPrefetchC = 0;
1455 *RequiredPrefetchPixDataBWLuma = 0;
1456 *RequiredPrefetchPixDataBWChroma = 0;
1457 }
1458
1459 return MyError;
1460 }
1461
1462 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed)
1463 {
1464 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1);
1465 }
1466
1467 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed)
1468 {
1469 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1);
1470 }
1471
1472 static void CalculateDCCConfiguration(
1473 bool DCCEnabled,
1474 bool DCCProgrammingAssumesScanDirectionUnknown,
1475 enum source_format_class SourcePixelFormat,
1476 unsigned int SurfaceWidthLuma,
1477 unsigned int SurfaceWidthChroma,
1478 unsigned int SurfaceHeightLuma,
1479 unsigned int SurfaceHeightChroma,
1480 double DETBufferSize,
1481 unsigned int RequestHeight256ByteLuma,
1482 unsigned int RequestHeight256ByteChroma,
1483 enum dm_swizzle_mode TilingFormat,
1484 unsigned int BytePerPixelY,
1485 unsigned int BytePerPixelC,
1486 double BytePerPixelDETY,
1487 double BytePerPixelDETC,
1488 enum scan_direction_class ScanOrientation,
1489 unsigned int *MaxUncompressedBlockLuma,
1490 unsigned int *MaxUncompressedBlockChroma,
1491 unsigned int *MaxCompressedBlockLuma,
1492 unsigned int *MaxCompressedBlockChroma,
1493 unsigned int *IndependentBlockLuma,
1494 unsigned int *IndependentBlockChroma)
1495 {
1496 int yuv420;
1497 int horz_div_l;
1498 int horz_div_c;
1499 int vert_div_l;
1500 int vert_div_c;
1501
1502 int swath_buf_size;
1503 double detile_buf_vp_horz_limit;
1504 double detile_buf_vp_vert_limit;
1505
1506 int MAS_vp_horz_limit;
1507 int MAS_vp_vert_limit;
1508 int max_vp_horz_width;
1509 int max_vp_vert_height;
1510 int eff_surf_width_l;
1511 int eff_surf_width_c;
1512 int eff_surf_height_l;
1513 int eff_surf_height_c;
1514
1515 int full_swath_bytes_horz_wc_l;
1516 int full_swath_bytes_horz_wc_c;
1517 int full_swath_bytes_vert_wc_l;
1518 int full_swath_bytes_vert_wc_c;
1519 int req128_horz_wc_l;
1520 int req128_horz_wc_c;
1521 int req128_vert_wc_l;
1522 int req128_vert_wc_c;
1523 int segment_order_horz_contiguous_luma;
1524 int segment_order_horz_contiguous_chroma;
1525 int segment_order_vert_contiguous_luma;
1526 int segment_order_vert_contiguous_chroma;
1527
1528 typedef enum {
1529 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA
1530 } RequestType;
1531 RequestType RequestLuma;
1532 RequestType RequestChroma;
1533
1534 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0);
1535 horz_div_l = 1;
1536 horz_div_c = 1;
1537 vert_div_l = 1;
1538 vert_div_c = 1;
1539
1540 if (BytePerPixelY == 1)
1541 vert_div_l = 0;
1542 if (BytePerPixelC == 1)
1543 vert_div_c = 0;
1544 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1545 horz_div_l = 0;
1546 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x))
1547 horz_div_c = 0;
1548
1549 if (BytePerPixelC == 0) {
1550 swath_buf_size = DETBufferSize / 2 - 2 * 256;
1551 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
1552 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
1553 } else {
1554 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256;
1555 detile_buf_vp_horz_limit = (double) swath_buf_size
1556 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)
1557 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
1558 detile_buf_vp_vert_limit = (double) swath_buf_size
1559 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
1560 }
1561
1562 if (SourcePixelFormat == dm_420_10) {
1563 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
1564 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
1565 }
1566
1567 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16);
1568 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16);
1569
1570 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760;
1571 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760);
1572 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit);
1573 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit);
1574 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
1575 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
1576 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
1577 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
1578
1579 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
1580 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
1581 if (BytePerPixelC > 0) {
1582 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
1583 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
1584 } else {
1585 full_swath_bytes_horz_wc_c = 0;
1586 full_swath_bytes_vert_wc_c = 0;
1587 }
1588
1589 if (SourcePixelFormat == dm_420_10) {
1590 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256);
1591 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256);
1592 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256);
1593 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256);
1594 }
1595
1596 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1597 req128_horz_wc_l = 0;
1598 req128_horz_wc_c = 0;
1599 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) {
1600 req128_horz_wc_l = 0;
1601 req128_horz_wc_c = 1;
1602 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) {
1603 req128_horz_wc_l = 1;
1604 req128_horz_wc_c = 0;
1605 } else {
1606 req128_horz_wc_l = 1;
1607 req128_horz_wc_c = 1;
1608 }
1609
1610 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1611 req128_vert_wc_l = 0;
1612 req128_vert_wc_c = 0;
1613 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) {
1614 req128_vert_wc_l = 0;
1615 req128_vert_wc_c = 1;
1616 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) {
1617 req128_vert_wc_l = 1;
1618 req128_vert_wc_c = 0;
1619 } else {
1620 req128_vert_wc_l = 1;
1621 req128_vert_wc_c = 1;
1622 }
1623
1624 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1625 segment_order_horz_contiguous_luma = 0;
1626 } else {
1627 segment_order_horz_contiguous_luma = 1;
1628 }
1629 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1630 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1631 segment_order_vert_contiguous_luma = 0;
1632 } else {
1633 segment_order_vert_contiguous_luma = 1;
1634 }
1635 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) {
1636 segment_order_horz_contiguous_chroma = 0;
1637 } else {
1638 segment_order_horz_contiguous_chroma = 1;
1639 }
1640 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x))
1641 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) {
1642 segment_order_vert_contiguous_chroma = 0;
1643 } else {
1644 segment_order_vert_contiguous_chroma = 1;
1645 }
1646
1647 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
1648 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
1649 RequestLuma = REQ_256Bytes;
1650 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
1651 RequestLuma = REQ_128BytesNonContiguous;
1652 } else {
1653 RequestLuma = REQ_128BytesContiguous;
1654 }
1655 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
1656 RequestChroma = REQ_256Bytes;
1657 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
1658 RequestChroma = REQ_128BytesNonContiguous;
1659 } else {
1660 RequestChroma = REQ_128BytesContiguous;
1661 }
1662 } else if (ScanOrientation != dm_vert) {
1663 if (req128_horz_wc_l == 0) {
1664 RequestLuma = REQ_256Bytes;
1665 } else if (segment_order_horz_contiguous_luma == 0) {
1666 RequestLuma = REQ_128BytesNonContiguous;
1667 } else {
1668 RequestLuma = REQ_128BytesContiguous;
1669 }
1670 if (req128_horz_wc_c == 0) {
1671 RequestChroma = REQ_256Bytes;
1672 } else if (segment_order_horz_contiguous_chroma == 0) {
1673 RequestChroma = REQ_128BytesNonContiguous;
1674 } else {
1675 RequestChroma = REQ_128BytesContiguous;
1676 }
1677 } else {
1678 if (req128_vert_wc_l == 0) {
1679 RequestLuma = REQ_256Bytes;
1680 } else if (segment_order_vert_contiguous_luma == 0) {
1681 RequestLuma = REQ_128BytesNonContiguous;
1682 } else {
1683 RequestLuma = REQ_128BytesContiguous;
1684 }
1685 if (req128_vert_wc_c == 0) {
1686 RequestChroma = REQ_256Bytes;
1687 } else if (segment_order_vert_contiguous_chroma == 0) {
1688 RequestChroma = REQ_128BytesNonContiguous;
1689 } else {
1690 RequestChroma = REQ_128BytesContiguous;
1691 }
1692 }
1693
1694 if (RequestLuma == REQ_256Bytes) {
1695 *MaxUncompressedBlockLuma = 256;
1696 *MaxCompressedBlockLuma = 256;
1697 *IndependentBlockLuma = 0;
1698 } else if (RequestLuma == REQ_128BytesContiguous) {
1699 *MaxUncompressedBlockLuma = 256;
1700 *MaxCompressedBlockLuma = 128;
1701 *IndependentBlockLuma = 128;
1702 } else {
1703 *MaxUncompressedBlockLuma = 256;
1704 *MaxCompressedBlockLuma = 64;
1705 *IndependentBlockLuma = 64;
1706 }
1707
1708 if (RequestChroma == REQ_256Bytes) {
1709 *MaxUncompressedBlockChroma = 256;
1710 *MaxCompressedBlockChroma = 256;
1711 *IndependentBlockChroma = 0;
1712 } else if (RequestChroma == REQ_128BytesContiguous) {
1713 *MaxUncompressedBlockChroma = 256;
1714 *MaxCompressedBlockChroma = 128;
1715 *IndependentBlockChroma = 128;
1716 } else {
1717 *MaxUncompressedBlockChroma = 256;
1718 *MaxCompressedBlockChroma = 64;
1719 *IndependentBlockChroma = 64;
1720 }
1721
1722 if (DCCEnabled != true || BytePerPixelC == 0) {
1723 *MaxUncompressedBlockChroma = 0;
1724 *MaxCompressedBlockChroma = 0;
1725 *IndependentBlockChroma = 0;
1726 }
1727
1728 if (DCCEnabled != true) {
1729 *MaxUncompressedBlockLuma = 0;
1730 *MaxCompressedBlockLuma = 0;
1731 *IndependentBlockLuma = 0;
1732 }
1733 }
1734
1735 static double CalculatePrefetchSourceLines(
1736 struct display_mode_lib *mode_lib,
1737 double VRatio,
1738 double vtaps,
1739 bool Interlace,
1740 bool ProgressiveToInterlaceUnitInOPP,
1741 unsigned int SwathHeight,
1742 unsigned int ViewportYStart,
1743 double *VInitPreFill,
1744 unsigned int *MaxNumSwath)
1745 {
1746 struct vba_vars_st *v = &mode_lib->vba;
1747 unsigned int MaxPartialSwath;
1748
1749 if (ProgressiveToInterlaceUnitInOPP)
1750 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1);
1751 else
1752 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1);
1753
1754 if (!v->IgnoreViewportPositioning) {
1755
1756 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0;
1757
1758 if (*VInitPreFill > 1.0)
1759 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight;
1760 else
1761 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight;
1762 MaxPartialSwath = dml_max(1U, MaxPartialSwath);
1763
1764 } else {
1765
1766 if (ViewportYStart != 0)
1767 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n");
1768
1769 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1);
1770
1771 if (*VInitPreFill > 1.0)
1772 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight;
1773 else
1774 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight;
1775 }
1776
1777 #ifdef __DML_VBA_DEBUG__
1778 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio);
1779 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps);
1780 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill);
1781 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP);
1782 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning);
1783 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight);
1784 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath);
1785 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath);
1786 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath);
1787 #endif
1788 return *MaxNumSwath * SwathHeight + MaxPartialSwath;
1789 }
1790
1791 static unsigned int CalculateVMAndRowBytes(
1792 struct display_mode_lib *mode_lib,
1793 bool DCCEnable,
1794 unsigned int BlockHeight256Bytes,
1795 unsigned int BlockWidth256Bytes,
1796 enum source_format_class SourcePixelFormat,
1797 unsigned int SurfaceTiling,
1798 unsigned int BytePerPixel,
1799 enum scan_direction_class ScanDirection,
1800 unsigned int SwathWidth,
1801 unsigned int ViewportHeight,
1802 bool GPUVMEnable,
1803 bool HostVMEnable,
1804 unsigned int HostVMMaxNonCachedPageTableLevels,
1805 unsigned int GPUVMMinPageSize,
1806 unsigned int HostVMMinPageSize,
1807 unsigned int PTEBufferSizeInRequests,
1808 unsigned int Pitch,
1809 unsigned int DCCMetaPitch,
1810 unsigned int *MacroTileWidth,
1811 unsigned int *MetaRowByte,
1812 unsigned int *PixelPTEBytesPerRow,
1813 bool *PTEBufferSizeNotExceeded,
1814 int *dpte_row_width_ub,
1815 unsigned int *dpte_row_height,
1816 unsigned int *MetaRequestWidth,
1817 unsigned int *MetaRequestHeight,
1818 unsigned int *meta_row_width,
1819 unsigned int *meta_row_height,
1820 int *vm_group_bytes,
1821 unsigned int *dpte_group_bytes,
1822 unsigned int *PixelPTEReqWidth,
1823 unsigned int *PixelPTEReqHeight,
1824 unsigned int *PTERequestSize,
1825 int *DPDE0BytesFrame,
1826 int *MetaPTEBytesFrame)
1827 {
1828 struct vba_vars_st *v = &mode_lib->vba;
1829 unsigned int MPDEBytesFrame;
1830 unsigned int DCCMetaSurfaceBytes;
1831 unsigned int MacroTileSizeBytes;
1832 unsigned int MacroTileHeight;
1833 unsigned int ExtraDPDEBytesFrame;
1834 unsigned int PDEAndMetaPTEBytesFrame;
1835 unsigned int PixelPTEReqHeightPTEs = 0;
1836 unsigned int HostVMDynamicLevels = 0;
1837 double FractionOfPTEReturnDrop;
1838
1839 if (GPUVMEnable == true && HostVMEnable == true) {
1840 if (HostVMMinPageSize < 2048) {
1841 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1842 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
1843 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
1844 } else {
1845 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
1846 }
1847 }
1848
1849 *MetaRequestHeight = 8 * BlockHeight256Bytes;
1850 *MetaRequestWidth = 8 * BlockWidth256Bytes;
1851 if (ScanDirection != dm_vert) {
1852 *meta_row_height = *MetaRequestHeight;
1853 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth;
1854 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0;
1855 } else {
1856 *meta_row_height = *MetaRequestWidth;
1857 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight;
1858 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0;
1859 }
1860 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256;
1861 if (GPUVMEnable == true) {
1862 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64;
1863 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1);
1864 } else {
1865 *MetaPTEBytesFrame = 0;
1866 MPDEBytesFrame = 0;
1867 }
1868
1869 if (DCCEnable != true) {
1870 *MetaPTEBytesFrame = 0;
1871 MPDEBytesFrame = 0;
1872 *MetaRowByte = 0;
1873 }
1874
1875 if (SurfaceTiling == dm_sw_linear) {
1876 MacroTileSizeBytes = 256;
1877 MacroTileHeight = BlockHeight256Bytes;
1878 } else {
1879 MacroTileSizeBytes = 65536;
1880 MacroTileHeight = 16 * BlockHeight256Bytes;
1881 }
1882 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight;
1883
1884 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) {
1885 if (ScanDirection != dm_vert) {
1886 *DPDE0BytesFrame = 64
1887 * (dml_ceil(
1888 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1889 / (8 * 2097152),
1890 1) + 1);
1891 } else {
1892 *DPDE0BytesFrame = 64
1893 * (dml_ceil(
1894 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes)
1895 / (8 * 2097152),
1896 1) + 1);
1897 }
1898 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2);
1899 } else {
1900 *DPDE0BytesFrame = 0;
1901 ExtraDPDEBytesFrame = 0;
1902 }
1903
1904 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame;
1905
1906 #ifdef __DML_VBA_DEBUG__
1907 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame);
1908 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame);
1909 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame);
1910 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame);
1911 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1912 #endif
1913
1914 if (HostVMEnable == true) {
1915 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels);
1916 }
1917 #ifdef __DML_VBA_DEBUG__
1918 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame);
1919 #endif
1920
1921 if (SurfaceTiling == dm_sw_linear) {
1922 PixelPTEReqHeightPTEs = 1;
1923 *PixelPTEReqHeight = 1;
1924 *PixelPTEReqWidth = 32768.0 / BytePerPixel;
1925 *PTERequestSize = 64;
1926 FractionOfPTEReturnDrop = 0;
1927 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) {
1928 PixelPTEReqHeightPTEs = 16;
1929 *PixelPTEReqHeight = 16 * BlockHeight256Bytes;
1930 *PixelPTEReqWidth = 16 * BlockWidth256Bytes;
1931 *PTERequestSize = 128;
1932 FractionOfPTEReturnDrop = 0;
1933 } else {
1934 PixelPTEReqHeightPTEs = 1;
1935 *PixelPTEReqHeight = MacroTileHeight;
1936 *PixelPTEReqWidth = 8 * *MacroTileWidth;
1937 *PTERequestSize = 64;
1938 FractionOfPTEReturnDrop = 0;
1939 }
1940
1941 if (SurfaceTiling == dm_sw_linear) {
1942 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1));
1943 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1944 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1945 } else if (ScanDirection != dm_vert) {
1946 *dpte_row_height = *PixelPTEReqHeight;
1947 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth;
1948 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize;
1949 } else {
1950 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth);
1951 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight;
1952 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize;
1953 }
1954
1955 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) {
1956 *PTEBufferSizeNotExceeded = true;
1957 } else {
1958 *PTEBufferSizeNotExceeded = false;
1959 }
1960
1961 if (GPUVMEnable != true) {
1962 *PixelPTEBytesPerRow = 0;
1963 *PTEBufferSizeNotExceeded = true;
1964 }
1965
1966 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame);
1967
1968 if (HostVMEnable == true) {
1969 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels);
1970 }
1971
1972 if (HostVMEnable == true) {
1973 *vm_group_bytes = 512;
1974 *dpte_group_bytes = 512;
1975 } else if (GPUVMEnable == true) {
1976 *vm_group_bytes = 2048;
1977 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) {
1978 *dpte_group_bytes = 512;
1979 } else {
1980 *dpte_group_bytes = 2048;
1981 }
1982 } else {
1983 *vm_group_bytes = 0;
1984 *dpte_group_bytes = 0;
1985 }
1986 return PDEAndMetaPTEBytesFrame;
1987 }
1988
1989 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib)
1990 {
1991 struct vba_vars_st *v = &mode_lib->vba;
1992 unsigned int j, k;
1993 double HostVMInefficiencyFactor = 1.0;
1994 bool NoChromaPlanes = true;
1995 int ReorderBytes;
1996 double VMDataOnlyReturnBW;
1997 double MaxTotalRDBandwidth = 0;
1998 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb];
1999
2000 v->WritebackDISPCLK = 0.0;
2001 v->DISPCLKWithRamping = 0;
2002 v->DISPCLKWithoutRamping = 0;
2003 v->GlobalDPPCLK = 0.0;
2004 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */
2005 {
2006 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
2007 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb],
2008 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn);
2009 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth;
2010 if (v->HostVMEnable != true) {
2011 v->ReturnBW = dml_min(
2012 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2013 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
2014 } else {
2015 v->ReturnBW = dml_min(
2016 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2017 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
2018 }
2019 }
2020 /* End DAL custom code */
2021
2022 // DISPCLK and DPPCLK Calculation
2023 //
2024 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2025 if (v->WritebackEnable[k]) {
2026 v->WritebackDISPCLK = dml_max(
2027 v->WritebackDISPCLK,
2028 dml31_CalculateWriteBackDISPCLK(
2029 v->WritebackPixelFormat[k],
2030 v->PixelClock[k],
2031 v->WritebackHRatio[k],
2032 v->WritebackVRatio[k],
2033 v->WritebackHTaps[k],
2034 v->WritebackVTaps[k],
2035 v->WritebackSourceWidth[k],
2036 v->WritebackDestinationWidth[k],
2037 v->HTotal[k],
2038 v->WritebackLineBufferSize));
2039 }
2040 }
2041
2042 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2043 if (v->HRatio[k] > 1) {
2044 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(
2045 v->MaxDCHUBToPSCLThroughput,
2046 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1));
2047 } else {
2048 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2049 }
2050
2051 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k]
2052 * dml_max(
2053 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
2054 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0));
2055
2056 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) {
2057 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k];
2058 }
2059
2060 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12
2061 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) {
2062 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0;
2063 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma;
2064 } else {
2065 if (v->HRatioChroma[k] > 1) {
2066 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(
2067 v->MaxDCHUBToPSCLThroughput,
2068 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
2069 } else {
2070 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
2071 }
2072 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k]
2073 * dml_max3(
2074 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
2075 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k],
2076 1.0);
2077
2078 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) {
2079 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k];
2080 }
2081
2082 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma);
2083 }
2084 }
2085
2086 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2087 if (v->BlendingAndTiming[k] != k)
2088 continue;
2089 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) {
2090 v->DISPCLKWithRamping = dml_max(
2091 v->DISPCLKWithRamping,
2092 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2093 * (1 + v->DISPCLKRampingMargin / 100));
2094 v->DISPCLKWithoutRamping = dml_max(
2095 v->DISPCLKWithoutRamping,
2096 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2097 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2098 v->DISPCLKWithRamping = dml_max(
2099 v->DISPCLKWithRamping,
2100 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)
2101 * (1 + v->DISPCLKRampingMargin / 100));
2102 v->DISPCLKWithoutRamping = dml_max(
2103 v->DISPCLKWithoutRamping,
2104 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2105 } else {
2106 v->DISPCLKWithRamping = dml_max(
2107 v->DISPCLKWithRamping,
2108 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100));
2109 v->DISPCLKWithoutRamping = dml_max(
2110 v->DISPCLKWithoutRamping,
2111 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100));
2112 }
2113 }
2114
2115 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK);
2116 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK);
2117
2118 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0);
2119 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed);
2120 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed);
2121 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown(
2122 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz,
2123 v->DISPCLKDPPCLKVCOSpeed);
2124 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2125 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity;
2126 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) {
2127 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity;
2128 } else {
2129 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity;
2130 }
2131 v->DISPCLK = v->DISPCLK_calculated;
2132 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated);
2133
2134 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2135 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2136 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]);
2137 }
2138 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed);
2139 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2140 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1);
2141 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]);
2142 }
2143
2144 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2145 v->DPPCLK[k] = v->DPPCLK_calculated[k];
2146 }
2147
2148 // Urgent and B P-State/DRAM Clock Change Watermark
2149 DTRACE(" dcfclk_mhz = %f", v->DCFCLK);
2150 DTRACE(" return_bus_bw = %f", v->ReturnBW);
2151
2152 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2153 dml30_CalculateBytePerPixelAnd256BBlockSizes(
2154 v->SourcePixelFormat[k],
2155 v->SurfaceTiling[k],
2156 &v->BytePerPixelY[k],
2157 &v->BytePerPixelC[k],
2158 &v->BytePerPixelDETY[k],
2159 &v->BytePerPixelDETC[k],
2160 &v->BlockHeight256BytesY[k],
2161 &v->BlockHeight256BytesC[k],
2162 &v->BlockWidth256BytesY[k],
2163 &v->BlockWidth256BytesC[k]);
2164 }
2165
2166 CalculateSwathWidth(
2167 false,
2168 v->NumberOfActivePlanes,
2169 v->SourcePixelFormat,
2170 v->SourceScan,
2171 v->ViewportWidth,
2172 v->ViewportHeight,
2173 v->SurfaceWidthY,
2174 v->SurfaceWidthC,
2175 v->SurfaceHeightY,
2176 v->SurfaceHeightC,
2177 v->ODMCombineEnabled,
2178 v->BytePerPixelY,
2179 v->BytePerPixelC,
2180 v->BlockHeight256BytesY,
2181 v->BlockHeight256BytesC,
2182 v->BlockWidth256BytesY,
2183 v->BlockWidth256BytesC,
2184 v->BlendingAndTiming,
2185 v->HActive,
2186 v->HRatio,
2187 v->DPPPerPlane,
2188 v->SwathWidthSingleDPPY,
2189 v->SwathWidthSingleDPPC,
2190 v->SwathWidthY,
2191 v->SwathWidthC,
2192 v->dummyinteger3,
2193 v->dummyinteger4,
2194 v->swath_width_luma_ub,
2195 v->swath_width_chroma_ub);
2196
2197 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2198 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k])
2199 * v->VRatio[k];
2200 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k])
2201 * v->VRatioChroma[k];
2202 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]);
2203 }
2204
2205 // DCFCLK Deep Sleep
2206 CalculateDCFCLKDeepSleep(
2207 mode_lib,
2208 v->NumberOfActivePlanes,
2209 v->BytePerPixelY,
2210 v->BytePerPixelC,
2211 v->VRatio,
2212 v->VRatioChroma,
2213 v->SwathWidthY,
2214 v->SwathWidthC,
2215 v->DPPPerPlane,
2216 v->HRatio,
2217 v->HRatioChroma,
2218 v->PixelClock,
2219 v->PSCL_THROUGHPUT_LUMA,
2220 v->PSCL_THROUGHPUT_CHROMA,
2221 v->DPPCLK,
2222 v->ReadBandwidthPlaneLuma,
2223 v->ReadBandwidthPlaneChroma,
2224 v->ReturnBusWidth,
2225 &v->DCFCLKDeepSleep);
2226
2227 // DSCCLK
2228 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2229 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) {
2230 v->DSCCLK_calculated[k] = 0.0;
2231 } else {
2232 if (v->OutputFormat[k] == dm_420)
2233 v->DSCFormatFactor = 2;
2234 else if (v->OutputFormat[k] == dm_444)
2235 v->DSCFormatFactor = 1;
2236 else if (v->OutputFormat[k] == dm_n422)
2237 v->DSCFormatFactor = 2;
2238 else
2239 v->DSCFormatFactor = 1;
2240 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1)
2241 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor
2242 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2243 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1)
2244 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor
2245 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2246 else
2247 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor
2248 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100);
2249 }
2250 }
2251
2252 // DSC Delay
2253 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2254 double BPP = v->OutputBpp[k];
2255
2256 if (v->DSCEnabled[k] && BPP != 0) {
2257 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) {
2258 v->DSCDelay[k] = dscceComputeDelay(
2259 v->DSCInputBitPerComponent[k],
2260 BPP,
2261 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2262 v->NumberOfDSCSlices[k],
2263 v->OutputFormat[k],
2264 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
2265 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) {
2266 v->DSCDelay[k] = 2
2267 * (dscceComputeDelay(
2268 v->DSCInputBitPerComponent[k],
2269 BPP,
2270 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2271 v->NumberOfDSCSlices[k] / 2.0,
2272 v->OutputFormat[k],
2273 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2274 } else {
2275 v->DSCDelay[k] = 4
2276 * (dscceComputeDelay(
2277 v->DSCInputBitPerComponent[k],
2278 BPP,
2279 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1),
2280 v->NumberOfDSCSlices[k] / 4.0,
2281 v->OutputFormat[k],
2282 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
2283 }
2284 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
2285 } else {
2286 v->DSCDelay[k] = 0;
2287 }
2288 }
2289
2290 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2291 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes
2292 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j])
2293 v->DSCDelay[k] = v->DSCDelay[j];
2294
2295 // Prefetch
2296 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2297 unsigned int PDEAndMetaPTEBytesFrameY;
2298 unsigned int PixelPTEBytesPerRowY;
2299 unsigned int MetaRowByteY;
2300 unsigned int MetaRowByteC;
2301 unsigned int PDEAndMetaPTEBytesFrameC;
2302 unsigned int PixelPTEBytesPerRowC;
2303 bool PTEBufferSizeNotExceededY;
2304 bool PTEBufferSizeNotExceededC;
2305
2306 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2307 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
2308 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) {
2309 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2;
2310 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
2311 } else {
2312 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
2313 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
2314 }
2315
2316 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes(
2317 mode_lib,
2318 v->DCCEnable[k],
2319 v->BlockHeight256BytesC[k],
2320 v->BlockWidth256BytesC[k],
2321 v->SourcePixelFormat[k],
2322 v->SurfaceTiling[k],
2323 v->BytePerPixelC[k],
2324 v->SourceScan[k],
2325 v->SwathWidthC[k],
2326 v->ViewportHeightChroma[k],
2327 v->GPUVMEnable,
2328 v->HostVMEnable,
2329 v->HostVMMaxNonCachedPageTableLevels,
2330 v->GPUVMMinPageSize,
2331 v->HostVMMinPageSize,
2332 v->PTEBufferSizeInRequestsForChroma,
2333 v->PitchC[k],
2334 v->DCCMetaPitchC[k],
2335 &v->MacroTileWidthC[k],
2336 &MetaRowByteC,
2337 &PixelPTEBytesPerRowC,
2338 &PTEBufferSizeNotExceededC,
2339 &v->dpte_row_width_chroma_ub[k],
2340 &v->dpte_row_height_chroma[k],
2341 &v->meta_req_width_chroma[k],
2342 &v->meta_req_height_chroma[k],
2343 &v->meta_row_width_chroma[k],
2344 &v->meta_row_height_chroma[k],
2345 &v->dummyinteger1,
2346 &v->dummyinteger2,
2347 &v->PixelPTEReqWidthC[k],
2348 &v->PixelPTEReqHeightC[k],
2349 &v->PTERequestSizeC[k],
2350 &v->dpde0_bytes_per_frame_ub_c[k],
2351 &v->meta_pte_bytes_per_frame_ub_c[k]);
2352
2353 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2354 mode_lib,
2355 v->VRatioChroma[k],
2356 v->VTAPsChroma[k],
2357 v->Interlace[k],
2358 v->ProgressiveToInterlaceUnitInOPP,
2359 v->SwathHeightC[k],
2360 v->ViewportYStartC[k],
2361 &v->VInitPreFillC[k],
2362 &v->MaxNumSwathC[k]);
2363 } else {
2364 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
2365 v->PTEBufferSizeInRequestsForChroma = 0;
2366 PixelPTEBytesPerRowC = 0;
2367 PDEAndMetaPTEBytesFrameC = 0;
2368 MetaRowByteC = 0;
2369 v->MaxNumSwathC[k] = 0;
2370 v->PrefetchSourceLinesC[k] = 0;
2371 }
2372
2373 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes(
2374 mode_lib,
2375 v->DCCEnable[k],
2376 v->BlockHeight256BytesY[k],
2377 v->BlockWidth256BytesY[k],
2378 v->SourcePixelFormat[k],
2379 v->SurfaceTiling[k],
2380 v->BytePerPixelY[k],
2381 v->SourceScan[k],
2382 v->SwathWidthY[k],
2383 v->ViewportHeight[k],
2384 v->GPUVMEnable,
2385 v->HostVMEnable,
2386 v->HostVMMaxNonCachedPageTableLevels,
2387 v->GPUVMMinPageSize,
2388 v->HostVMMinPageSize,
2389 v->PTEBufferSizeInRequestsForLuma,
2390 v->PitchY[k],
2391 v->DCCMetaPitchY[k],
2392 &v->MacroTileWidthY[k],
2393 &MetaRowByteY,
2394 &PixelPTEBytesPerRowY,
2395 &PTEBufferSizeNotExceededY,
2396 &v->dpte_row_width_luma_ub[k],
2397 &v->dpte_row_height[k],
2398 &v->meta_req_width[k],
2399 &v->meta_req_height[k],
2400 &v->meta_row_width[k],
2401 &v->meta_row_height[k],
2402 &v->vm_group_bytes[k],
2403 &v->dpte_group_bytes[k],
2404 &v->PixelPTEReqWidthY[k],
2405 &v->PixelPTEReqHeightY[k],
2406 &v->PTERequestSizeY[k],
2407 &v->dpde0_bytes_per_frame_ub_l[k],
2408 &v->meta_pte_bytes_per_frame_ub_l[k]);
2409
2410 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
2411 mode_lib,
2412 v->VRatio[k],
2413 v->vtaps[k],
2414 v->Interlace[k],
2415 v->ProgressiveToInterlaceUnitInOPP,
2416 v->SwathHeightY[k],
2417 v->ViewportYStartY[k],
2418 &v->VInitPreFillY[k],
2419 &v->MaxNumSwathY[k]);
2420 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC;
2421 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC;
2422 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC;
2423
2424 CalculateRowBandwidth(
2425 v->GPUVMEnable,
2426 v->SourcePixelFormat[k],
2427 v->VRatio[k],
2428 v->VRatioChroma[k],
2429 v->DCCEnable[k],
2430 v->HTotal[k] / v->PixelClock[k],
2431 MetaRowByteY,
2432 MetaRowByteC,
2433 v->meta_row_height[k],
2434 v->meta_row_height_chroma[k],
2435 PixelPTEBytesPerRowY,
2436 PixelPTEBytesPerRowC,
2437 v->dpte_row_height[k],
2438 v->dpte_row_height_chroma[k],
2439 &v->meta_row_bw[k],
2440 &v->dpte_row_bw[k]);
2441 }
2442
2443 v->TotalDCCActiveDPP = 0;
2444 v->TotalActiveDPP = 0;
2445 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2446 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k];
2447 if (v->DCCEnable[k])
2448 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k];
2449 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12
2450 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
2451 NoChromaPlanes = false;
2452 }
2453
2454 ReorderBytes = v->NumberOfChannels
2455 * dml_max3(
2456 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
2457 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
2458 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
2459
2460 VMDataOnlyReturnBW = dml_min(
2461 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn)
2462 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
2463 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth
2464 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
2465
2466 #ifdef __DML_VBA_DEBUG__
2467 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth);
2468 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK);
2469 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock);
2470 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn);
2471 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency);
2472 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed);
2473 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels);
2474 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth);
2475 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly);
2476 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW);
2477 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW);
2478 #endif
2479
2480 if (v->GPUVMEnable && v->HostVMEnable)
2481 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW;
2482
2483 v->UrgentExtraLatency = CalculateExtraLatency(
2484 v->RoundTripPingLatencyCycles,
2485 ReorderBytes,
2486 v->DCFCLK,
2487 v->TotalActiveDPP,
2488 v->PixelChunkSizeInKByte,
2489 v->TotalDCCActiveDPP,
2490 v->MetaChunkSize,
2491 v->ReturnBW,
2492 v->GPUVMEnable,
2493 v->HostVMEnable,
2494 v->NumberOfActivePlanes,
2495 v->DPPPerPlane,
2496 v->dpte_group_bytes,
2497 HostVMInefficiencyFactor,
2498 v->HostVMMinPageSize,
2499 v->HostVMMaxNonCachedPageTableLevels);
2500
2501 v->TCalc = 24.0 / v->DCFCLKDeepSleep;
2502
2503 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2504 if (v->BlendingAndTiming[k] == k) {
2505 if (v->WritebackEnable[k] == true) {
2506 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency
2507 + CalculateWriteBackDelay(
2508 v->WritebackPixelFormat[k],
2509 v->WritebackHRatio[k],
2510 v->WritebackVRatio[k],
2511 v->WritebackVTaps[k],
2512 v->WritebackDestinationWidth[k],
2513 v->WritebackDestinationHeight[k],
2514 v->WritebackSourceHeight[k],
2515 v->HTotal[k]) / v->DISPCLK;
2516 } else
2517 v->WritebackDelay[v->VoltageLevel][k] = 0;
2518 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
2519 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) {
2520 v->WritebackDelay[v->VoltageLevel][k] = dml_max(
2521 v->WritebackDelay[v->VoltageLevel][k],
2522 v->WritebackLatency
2523 + CalculateWriteBackDelay(
2524 v->WritebackPixelFormat[j],
2525 v->WritebackHRatio[j],
2526 v->WritebackVRatio[j],
2527 v->WritebackVTaps[j],
2528 v->WritebackDestinationWidth[j],
2529 v->WritebackDestinationHeight[j],
2530 v->WritebackSourceHeight[j],
2531 v->HTotal[k]) / v->DISPCLK);
2532 }
2533 }
2534 }
2535 }
2536
2537 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2538 for (j = 0; j < v->NumberOfActivePlanes; ++j)
2539 if (v->BlendingAndTiming[k] == j)
2540 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j];
2541
2542 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2543 v->MaxVStartupLines[k] =
2544 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
2545 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
2546 v->VTotal[k] - v->VActive[k]
2547 - dml_max(
2548 1.0,
2549 dml_ceil(
2550 (double) v->WritebackDelay[v->VoltageLevel][k]
2551 / (v->HTotal[k] / v->PixelClock[k]),
2552 1));
2553 if (v->MaxVStartupLines[k] > 1023)
2554 v->MaxVStartupLines[k] = 1023;
2555
2556 #ifdef __DML_VBA_DEBUG__
2557 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
2558 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel);
2559 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]);
2560 #endif
2561 }
2562
2563 v->MaximumMaxVStartupLines = 0;
2564 for (k = 0; k < v->NumberOfActivePlanes; ++k)
2565 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]);
2566
2567 // VBA_DELTA
2568 // We don't really care to iterate between the various prefetch modes
2569 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode);
2570
2571 v->UrgentLatency = CalculateUrgentLatency(
2572 v->UrgentLatencyPixelDataOnly,
2573 v->UrgentLatencyPixelMixedWithVMData,
2574 v->UrgentLatencyVMDataOnly,
2575 v->DoUrgentLatencyAdjustment,
2576 v->UrgentLatencyAdjustmentFabricClockComponent,
2577 v->UrgentLatencyAdjustmentFabricClockReference,
2578 v->FabricClock);
2579
2580 v->FractionOfUrgentBandwidth = 0.0;
2581 v->FractionOfUrgentBandwidthImmediateFlip = 0.0;
2582
2583 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__;
2584
2585 do {
2586 double MaxTotalRDBandwidthNoUrgentBurst = 0.0;
2587 bool DestinationLineTimesForPrefetchLessThan2 = false;
2588 bool VRatioPrefetchMoreThan4 = false;
2589 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime);
2590 MaxTotalRDBandwidth = 0;
2591
2592 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines);
2593
2594 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2595 Pipe myPipe;
2596
2597 myPipe.DPPCLK = v->DPPCLK[k];
2598 myPipe.DISPCLK = v->DISPCLK;
2599 myPipe.PixelClock = v->PixelClock[k];
2600 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep;
2601 myPipe.DPPPerPlane = v->DPPPerPlane[k];
2602 myPipe.ScalerEnabled = v->ScalerEnabled[k];
2603 myPipe.VRatio = v->VRatio[k];
2604 myPipe.VRatioChroma = v->VRatioChroma[k];
2605 myPipe.SourceScan = v->SourceScan[k];
2606 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k];
2607 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k];
2608 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k];
2609 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k];
2610 myPipe.InterlaceEnable = v->Interlace[k];
2611 myPipe.NumberOfCursors = v->NumberOfCursors[k];
2612 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
2613 myPipe.HTotal = v->HTotal[k];
2614 myPipe.DCCEnable = v->DCCEnable[k];
2615 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1
2616 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1;
2617 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
2618 myPipe.BytePerPixelY = v->BytePerPixelY[k];
2619 myPipe.BytePerPixelC = v->BytePerPixelC[k];
2620 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
2621 v->ErrorResult[k] = CalculatePrefetchSchedule(
2622 mode_lib,
2623 HostVMInefficiencyFactor,
2624 &myPipe,
2625 v->DSCDelay[k],
2626 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
2627 v->DPPCLKDelaySCL,
2628 v->DPPCLKDelaySCLLBOnly,
2629 v->DPPCLKDelayCNVCCursor,
2630 v->DISPCLKDelaySubtotal,
2631 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]),
2632 v->OutputFormat[k],
2633 v->MaxInterDCNTileRepeaters,
2634 dml_min(v->VStartupLines, v->MaxVStartupLines[k]),
2635 v->MaxVStartupLines[k],
2636 v->GPUVMMaxPageTableLevels,
2637 v->GPUVMEnable,
2638 v->HostVMEnable,
2639 v->HostVMMaxNonCachedPageTableLevels,
2640 v->HostVMMinPageSize,
2641 v->DynamicMetadataEnable[k],
2642 v->DynamicMetadataVMEnabled,
2643 v->DynamicMetadataLinesBeforeActiveRequired[k],
2644 v->DynamicMetadataTransmittedBytes[k],
2645 v->UrgentLatency,
2646 v->UrgentExtraLatency,
2647 v->TCalc,
2648 v->PDEAndMetaPTEBytesFrame[k],
2649 v->MetaRowByte[k],
2650 v->PixelPTEBytesPerRow[k],
2651 v->PrefetchSourceLinesY[k],
2652 v->SwathWidthY[k],
2653 v->VInitPreFillY[k],
2654 v->MaxNumSwathY[k],
2655 v->PrefetchSourceLinesC[k],
2656 v->SwathWidthC[k],
2657 v->VInitPreFillC[k],
2658 v->MaxNumSwathC[k],
2659 v->swath_width_luma_ub[k],
2660 v->swath_width_chroma_ub[k],
2661 v->SwathHeightY[k],
2662 v->SwathHeightC[k],
2663 TWait,
2664 &v->DSTXAfterScaler[k],
2665 &v->DSTYAfterScaler[k],
2666 &v->DestinationLinesForPrefetch[k],
2667 &v->PrefetchBandwidth[k],
2668 &v->DestinationLinesToRequestVMInVBlank[k],
2669 &v->DestinationLinesToRequestRowInVBlank[k],
2670 &v->VRatioPrefetchY[k],
2671 &v->VRatioPrefetchC[k],
2672 &v->RequiredPrefetchPixDataBWLuma[k],
2673 &v->RequiredPrefetchPixDataBWChroma[k],
2674 &v->NotEnoughTimeForDynamicMetadata[k],
2675 &v->Tno_bw[k],
2676 &v->prefetch_vmrow_bw[k],
2677 &v->Tdmdl_vm[k],
2678 &v->Tdmdl[k],
2679 &v->TSetup[k],
2680 &v->VUpdateOffsetPix[k],
2681 &v->VUpdateWidthPix[k],
2682 &v->VReadyOffsetPix[k]);
2683
2684 #ifdef __DML_VBA_DEBUG__
2685 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]);
2686 #endif
2687 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]);
2688 }
2689
2690 v->NoEnoughUrgentLatencyHiding = false;
2691 v->NoEnoughUrgentLatencyHidingPre = false;
2692
2693 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2694 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2695 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
2696 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
2697 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k];
2698
2699 CalculateUrgentBurstFactor(
2700 v->swath_width_luma_ub[k],
2701 v->swath_width_chroma_ub[k],
2702 v->SwathHeightY[k],
2703 v->SwathHeightC[k],
2704 v->HTotal[k] / v->PixelClock[k],
2705 v->UrgentLatency,
2706 v->CursorBufferSize,
2707 v->CursorWidth[k][0],
2708 v->CursorBPP[k][0],
2709 v->VRatio[k],
2710 v->VRatioChroma[k],
2711 v->BytePerPixelDETY[k],
2712 v->BytePerPixelDETC[k],
2713 v->DETBufferSizeY[k],
2714 v->DETBufferSizeC[k],
2715 &v->UrgBurstFactorCursor[k],
2716 &v->UrgBurstFactorLuma[k],
2717 &v->UrgBurstFactorChroma[k],
2718 &v->NoUrgentLatencyHiding[k]);
2719
2720 CalculateUrgentBurstFactor(
2721 v->swath_width_luma_ub[k],
2722 v->swath_width_chroma_ub[k],
2723 v->SwathHeightY[k],
2724 v->SwathHeightC[k],
2725 v->HTotal[k] / v->PixelClock[k],
2726 v->UrgentLatency,
2727 v->CursorBufferSize,
2728 v->CursorWidth[k][0],
2729 v->CursorBPP[k][0],
2730 v->VRatioPrefetchY[k],
2731 v->VRatioPrefetchC[k],
2732 v->BytePerPixelDETY[k],
2733 v->BytePerPixelDETC[k],
2734 v->DETBufferSizeY[k],
2735 v->DETBufferSizeC[k],
2736 &v->UrgBurstFactorCursorPre[k],
2737 &v->UrgBurstFactorLumaPre[k],
2738 &v->UrgBurstFactorChromaPre[k],
2739 &v->NoUrgentLatencyHidingPre[k]);
2740
2741 MaxTotalRDBandwidth = MaxTotalRDBandwidth
2742 + dml_max3(
2743 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2744 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2745 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2746 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k]
2747 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2748 v->DPPPerPlane[k]
2749 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2750 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2751 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2752
2753 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst
2754 + dml_max3(
2755 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2756 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k]
2757 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]),
2758 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k])
2759 + v->cursor_bw_pre[k]);
2760
2761 #ifdef __DML_VBA_DEBUG__
2762 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]);
2763 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]);
2764 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]);
2765 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]);
2766 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]);
2767
2768 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]);
2769 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]);
2770
2771 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]);
2772 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]);
2773 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]);
2774 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]);
2775 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]);
2776 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]);
2777 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]);
2778 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]);
2779 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]);
2780 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst);
2781 #endif
2782
2783 if (v->DestinationLinesForPrefetch[k] < 2)
2784 DestinationLineTimesForPrefetchLessThan2 = true;
2785
2786 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4)
2787 VRatioPrefetchMoreThan4 = true;
2788
2789 if (v->NoUrgentLatencyHiding[k] == true)
2790 v->NoEnoughUrgentLatencyHiding = true;
2791
2792 if (v->NoUrgentLatencyHidingPre[k] == true)
2793 v->NoEnoughUrgentLatencyHidingPre = true;
2794 }
2795
2796 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW;
2797
2798 #ifdef __DML_VBA_DEBUG__
2799 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst);
2800 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW);
2801 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth);
2802 #endif
2803
2804 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0
2805 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2)
2806 v->PrefetchModeSupported = true;
2807 else {
2808 v->PrefetchModeSupported = false;
2809 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__);
2810 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW);
2811 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not");
2812 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not");
2813 }
2814
2815 // PREVIOUS_ERROR
2816 // This error result check was done after the PrefetchModeSupported. So we will
2817 // still try to calculate flip schedule even prefetch mode not supported
2818 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2819 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) {
2820 v->PrefetchModeSupported = false;
2821 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__);
2822 }
2823 }
2824
2825 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) {
2826 v->BandwidthAvailableForImmediateFlip = v->ReturnBW;
2827 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2828 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
2829 - dml_max(
2830 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k]
2831 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k]
2832 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2833 v->DPPPerPlane[k]
2834 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2835 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2836 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2837 }
2838
2839 v->TotImmediateFlipBytes = 0;
2840 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2841 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
2842 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]);
2843 }
2844 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2845 CalculateFlipSchedule(
2846 mode_lib,
2847 k,
2848 HostVMInefficiencyFactor,
2849 v->UrgentExtraLatency,
2850 v->UrgentLatency,
2851 v->PDEAndMetaPTEBytesFrame[k],
2852 v->MetaRowByte[k],
2853 v->PixelPTEBytesPerRow[k]);
2854 }
2855
2856 v->total_dcn_read_bw_with_flip = 0.0;
2857 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0;
2858 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2859 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
2860 + dml_max3(
2861 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2862 v->DPPPerPlane[k] * v->final_flip_bw[k]
2863 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k]
2864 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k]
2865 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k],
2866 v->DPPPerPlane[k]
2867 * (v->final_flip_bw[k]
2868 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k]
2869 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k])
2870 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]);
2871 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst
2872 + dml_max3(
2873 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k],
2874 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k]
2875 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k],
2876 v->DPPPerPlane[k]
2877 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k]
2878 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]);
2879 }
2880 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW;
2881
2882 v->ImmediateFlipSupported = true;
2883 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) {
2884 #ifdef __DML_VBA_DEBUG__
2885 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip);
2886 #endif
2887 v->ImmediateFlipSupported = false;
2888 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth;
2889 }
2890 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2891 if (v->ImmediateFlipSupportedForPipe[k] == false) {
2892 #ifdef __DML_VBA_DEBUG__
2893 dml_print("DML::%s: Pipe %0d not supporting iflip\n",
2894 __func__, k);
2895 #endif
2896 v->ImmediateFlipSupported = false;
2897 }
2898 }
2899 } else {
2900 v->ImmediateFlipSupported = false;
2901 }
2902
2903 v->PrefetchAndImmediateFlipSupported =
2904 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable
2905 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) ||
2906 v->ImmediateFlipSupported)) ? true : false;
2907 #ifdef __DML_VBA_DEBUG__
2908 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported);
2909 dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required);
2910 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported);
2911 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport);
2912 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable);
2913 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported);
2914 #endif
2915 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines);
2916
2917 v->VStartupLines = v->VStartupLines + 1;
2918 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines);
2919 ASSERT(v->PrefetchAndImmediateFlipSupported);
2920
2921 // Unbounded Request Enabled
2922 CalculateUnboundedRequestAndCompressedBufferSize(
2923 v->DETBufferSizeInKByte[0],
2924 v->ConfigReturnBufferSizeInKByte,
2925 v->UseUnboundedRequesting,
2926 v->TotalActiveDPP,
2927 NoChromaPlanes,
2928 v->MaxNumDPP,
2929 v->CompressedBufferSegmentSizeInkByte,
2930 v->Output,
2931 &v->UnboundedRequestEnabled,
2932 &v->CompressedBufferSizeInkByte);
2933
2934 //Watermarks and NB P-State/DRAM Clock Change Support
2935 {
2936 enum clock_change_support DRAMClockChangeSupport; // dummy
2937 CalculateWatermarksAndDRAMSpeedChangeSupport(
2938 mode_lib,
2939 PrefetchMode,
2940 v->DCFCLK,
2941 v->ReturnBW,
2942 v->UrgentLatency,
2943 v->UrgentExtraLatency,
2944 v->SOCCLK,
2945 v->DCFCLKDeepSleep,
2946 v->DETBufferSizeY,
2947 v->DETBufferSizeC,
2948 v->SwathHeightY,
2949 v->SwathHeightC,
2950 v->SwathWidthY,
2951 v->SwathWidthC,
2952 v->DPPPerPlane,
2953 v->BytePerPixelDETY,
2954 v->BytePerPixelDETC,
2955 v->UnboundedRequestEnabled,
2956 v->CompressedBufferSizeInkByte,
2957 &DRAMClockChangeSupport,
2958 &v->StutterExitWatermark,
2959 &v->StutterEnterPlusExitWatermark,
2960 &v->Z8StutterExitWatermark,
2961 &v->Z8StutterEnterPlusExitWatermark);
2962
2963 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
2964 if (v->WritebackEnable[k] == true) {
2965 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max(
2966 0,
2967 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark);
2968 } else {
2969 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0;
2970 }
2971 }
2972 }
2973
2974 //Display Pipeline Delivery Time in Prefetch, Groups
2975 CalculatePixelDeliveryTimes(
2976 v->NumberOfActivePlanes,
2977 v->VRatio,
2978 v->VRatioChroma,
2979 v->VRatioPrefetchY,
2980 v->VRatioPrefetchC,
2981 v->swath_width_luma_ub,
2982 v->swath_width_chroma_ub,
2983 v->DPPPerPlane,
2984 v->HRatio,
2985 v->HRatioChroma,
2986 v->PixelClock,
2987 v->PSCL_THROUGHPUT_LUMA,
2988 v->PSCL_THROUGHPUT_CHROMA,
2989 v->DPPCLK,
2990 v->BytePerPixelC,
2991 v->SourceScan,
2992 v->NumberOfCursors,
2993 v->CursorWidth,
2994 v->CursorBPP,
2995 v->BlockWidth256BytesY,
2996 v->BlockHeight256BytesY,
2997 v->BlockWidth256BytesC,
2998 v->BlockHeight256BytesC,
2999 v->DisplayPipeLineDeliveryTimeLuma,
3000 v->DisplayPipeLineDeliveryTimeChroma,
3001 v->DisplayPipeLineDeliveryTimeLumaPrefetch,
3002 v->DisplayPipeLineDeliveryTimeChromaPrefetch,
3003 v->DisplayPipeRequestDeliveryTimeLuma,
3004 v->DisplayPipeRequestDeliveryTimeChroma,
3005 v->DisplayPipeRequestDeliveryTimeLumaPrefetch,
3006 v->DisplayPipeRequestDeliveryTimeChromaPrefetch,
3007 v->CursorRequestDeliveryTime,
3008 v->CursorRequestDeliveryTimePrefetch);
3009
3010 CalculateMetaAndPTETimes(
3011 v->NumberOfActivePlanes,
3012 v->GPUVMEnable,
3013 v->MetaChunkSize,
3014 v->MinMetaChunkSizeBytes,
3015 v->HTotal,
3016 v->VRatio,
3017 v->VRatioChroma,
3018 v->DestinationLinesToRequestRowInVBlank,
3019 v->DestinationLinesToRequestRowInImmediateFlip,
3020 v->DCCEnable,
3021 v->PixelClock,
3022 v->BytePerPixelY,
3023 v->BytePerPixelC,
3024 v->SourceScan,
3025 v->dpte_row_height,
3026 v->dpte_row_height_chroma,
3027 v->meta_row_width,
3028 v->meta_row_width_chroma,
3029 v->meta_row_height,
3030 v->meta_row_height_chroma,
3031 v->meta_req_width,
3032 v->meta_req_width_chroma,
3033 v->meta_req_height,
3034 v->meta_req_height_chroma,
3035 v->dpte_group_bytes,
3036 v->PTERequestSizeY,
3037 v->PTERequestSizeC,
3038 v->PixelPTEReqWidthY,
3039 v->PixelPTEReqHeightY,
3040 v->PixelPTEReqWidthC,
3041 v->PixelPTEReqHeightC,
3042 v->dpte_row_width_luma_ub,
3043 v->dpte_row_width_chroma_ub,
3044 v->DST_Y_PER_PTE_ROW_NOM_L,
3045 v->DST_Y_PER_PTE_ROW_NOM_C,
3046 v->DST_Y_PER_META_ROW_NOM_L,
3047 v->DST_Y_PER_META_ROW_NOM_C,
3048 v->TimePerMetaChunkNominal,
3049 v->TimePerChromaMetaChunkNominal,
3050 v->TimePerMetaChunkVBlank,
3051 v->TimePerChromaMetaChunkVBlank,
3052 v->TimePerMetaChunkFlip,
3053 v->TimePerChromaMetaChunkFlip,
3054 v->time_per_pte_group_nom_luma,
3055 v->time_per_pte_group_vblank_luma,
3056 v->time_per_pte_group_flip_luma,
3057 v->time_per_pte_group_nom_chroma,
3058 v->time_per_pte_group_vblank_chroma,
3059 v->time_per_pte_group_flip_chroma);
3060
3061 CalculateVMGroupAndRequestTimes(
3062 v->NumberOfActivePlanes,
3063 v->GPUVMEnable,
3064 v->GPUVMMaxPageTableLevels,
3065 v->HTotal,
3066 v->BytePerPixelC,
3067 v->DestinationLinesToRequestVMInVBlank,
3068 v->DestinationLinesToRequestVMInImmediateFlip,
3069 v->DCCEnable,
3070 v->PixelClock,
3071 v->dpte_row_width_luma_ub,
3072 v->dpte_row_width_chroma_ub,
3073 v->vm_group_bytes,
3074 v->dpde0_bytes_per_frame_ub_l,
3075 v->dpde0_bytes_per_frame_ub_c,
3076 v->meta_pte_bytes_per_frame_ub_l,
3077 v->meta_pte_bytes_per_frame_ub_c,
3078 v->TimePerVMGroupVBlank,
3079 v->TimePerVMGroupFlip,
3080 v->TimePerVMRequestVBlank,
3081 v->TimePerVMRequestFlip);
3082
3083 // Min TTUVBlank
3084 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3085 if (PrefetchMode == 0) {
3086 v->AllowDRAMClockChangeDuringVBlank[k] = true;
3087 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3088 v->MinTTUVBlank[k] = dml_max(
3089 v->DRAMClockChangeWatermark,
3090 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark));
3091 } else if (PrefetchMode == 1) {
3092 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3093 v->AllowDRAMSelfRefreshDuringVBlank[k] = true;
3094 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark);
3095 } else {
3096 v->AllowDRAMClockChangeDuringVBlank[k] = false;
3097 v->AllowDRAMSelfRefreshDuringVBlank[k] = false;
3098 v->MinTTUVBlank[k] = v->UrgentWatermark;
3099 }
3100 if (!v->DynamicMetadataEnable[k])
3101 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k];
3102 }
3103
3104 // DCC Configuration
3105 v->ActiveDPPs = 0;
3106 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3107 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown,
3108 v->SourcePixelFormat[k],
3109 v->SurfaceWidthY[k],
3110 v->SurfaceWidthC[k],
3111 v->SurfaceHeightY[k],
3112 v->SurfaceHeightC[k],
3113 v->DETBufferSizeInKByte[k] * 1024,
3114 v->BlockHeight256BytesY[k],
3115 v->BlockHeight256BytesC[k],
3116 v->SurfaceTiling[k],
3117 v->BytePerPixelY[k],
3118 v->BytePerPixelC[k],
3119 v->BytePerPixelDETY[k],
3120 v->BytePerPixelDETC[k],
3121 v->SourceScan[k],
3122 &v->DCCYMaxUncompressedBlock[k],
3123 &v->DCCCMaxUncompressedBlock[k],
3124 &v->DCCYMaxCompressedBlock[k],
3125 &v->DCCCMaxCompressedBlock[k],
3126 &v->DCCYIndependentBlock[k],
3127 &v->DCCCIndependentBlock[k]);
3128 }
3129
3130 // VStartup Adjustment
3131 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3132 bool isInterlaceTiming;
3133 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k];
3134 #ifdef __DML_VBA_DEBUG__
3135 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]);
3136 #endif
3137
3138 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin;
3139
3140 #ifdef __DML_VBA_DEBUG__
3141 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin);
3142 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]);
3143 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3144 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]);
3145 #endif
3146
3147 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin;
3148 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) {
3149 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin;
3150 }
3151
3152 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP);
3153
3154 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k])
3155 - v->VFrontPorch[k])
3156 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0))
3157 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0;
3158
3159 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]);
3160
3161 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k])
3162 <= (isInterlaceTiming ?
3163 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) :
3164 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) {
3165 v->VREADY_AT_OR_AFTER_VSYNC[k] = true;
3166 } else {
3167 v->VREADY_AT_OR_AFTER_VSYNC[k] = false;
3168 }
3169 #ifdef __DML_VBA_DEBUG__
3170 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]);
3171 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]);
3172 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]);
3173 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]);
3174 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]);
3175 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]);
3176 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]);
3177 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]);
3178 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]);
3179 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]);
3180 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]);
3181 #endif
3182 }
3183
3184 {
3185 //Maximum Bandwidth Used
3186 double TotalWRBandwidth = 0;
3187 double MaxPerPlaneVActiveWRBandwidth = 0;
3188 double WRBandwidth = 0;
3189 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3190 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) {
3191 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3192 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4;
3193 } else if (v->WritebackEnable[k] == true) {
3194 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3195 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8;
3196 }
3197 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth;
3198 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth);
3199 }
3200
3201 v->TotalDataReadBandwidth = 0;
3202 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3203 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k];
3204 }
3205 }
3206 // Stutter Efficiency
3207 CalculateStutterEfficiency(
3208 mode_lib,
3209 v->CompressedBufferSizeInkByte,
3210 v->UnboundedRequestEnabled,
3211 v->ConfigReturnBufferSizeInKByte,
3212 v->MetaFIFOSizeInKEntries,
3213 v->ZeroSizeBufferEntries,
3214 v->NumberOfActivePlanes,
3215 v->ROBBufferSizeInKByte,
3216 v->TotalDataReadBandwidth,
3217 v->DCFCLK,
3218 v->ReturnBW,
3219 v->COMPBUF_RESERVED_SPACE_64B,
3220 v->COMPBUF_RESERVED_SPACE_ZS,
3221 v->SRExitTime,
3222 v->SRExitZ8Time,
3223 v->SynchronizedVBlank,
3224 v->StutterEnterPlusExitWatermark,
3225 v->Z8StutterEnterPlusExitWatermark,
3226 v->ProgressiveToInterlaceUnitInOPP,
3227 v->Interlace,
3228 v->MinTTUVBlank,
3229 v->DPPPerPlane,
3230 v->DETBufferSizeY,
3231 v->BytePerPixelY,
3232 v->BytePerPixelDETY,
3233 v->SwathWidthY,
3234 v->SwathHeightY,
3235 v->SwathHeightC,
3236 v->DCCRateLuma,
3237 v->DCCRateChroma,
3238 v->DCCFractionOfZeroSizeRequestsLuma,
3239 v->DCCFractionOfZeroSizeRequestsChroma,
3240 v->HTotal,
3241 v->VTotal,
3242 v->PixelClock,
3243 v->VRatio,
3244 v->SourceScan,
3245 v->BlockHeight256BytesY,
3246 v->BlockWidth256BytesY,
3247 v->BlockHeight256BytesC,
3248 v->BlockWidth256BytesC,
3249 v->DCCYMaxUncompressedBlock,
3250 v->DCCCMaxUncompressedBlock,
3251 v->VActive,
3252 v->DCCEnable,
3253 v->WritebackEnable,
3254 v->ReadBandwidthPlaneLuma,
3255 v->ReadBandwidthPlaneChroma,
3256 v->meta_row_bw,
3257 v->dpte_row_bw,
3258 &v->StutterEfficiencyNotIncludingVBlank,
3259 &v->StutterEfficiency,
3260 &v->NumberOfStutterBurstsPerFrame,
3261 &v->Z8StutterEfficiencyNotIncludingVBlank,
3262 &v->Z8StutterEfficiency,
3263 &v->Z8NumberOfStutterBurstsPerFrame,
3264 &v->StutterPeriod);
3265 }
3266
3267 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib)
3268 {
3269 struct vba_vars_st *v = &mode_lib->vba;
3270 // Display Pipe Configuration
3271 double BytePerPixDETY[DC__NUM_DPP__MAX];
3272 double BytePerPixDETC[DC__NUM_DPP__MAX];
3273 int BytePerPixY[DC__NUM_DPP__MAX];
3274 int BytePerPixC[DC__NUM_DPP__MAX];
3275 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX];
3276 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX];
3277 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX];
3278 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX];
3279 double dummy1[DC__NUM_DPP__MAX];
3280 double dummy2[DC__NUM_DPP__MAX];
3281 double dummy3[DC__NUM_DPP__MAX];
3282 double dummy4[DC__NUM_DPP__MAX];
3283 int dummy5[DC__NUM_DPP__MAX];
3284 int dummy6[DC__NUM_DPP__MAX];
3285 bool dummy7[DC__NUM_DPP__MAX];
3286 bool dummysinglestring;
3287
3288 unsigned int k;
3289
3290 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
3291
3292 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3293 v->SourcePixelFormat[k],
3294 v->SurfaceTiling[k],
3295 &BytePerPixY[k],
3296 &BytePerPixC[k],
3297 &BytePerPixDETY[k],
3298 &BytePerPixDETC[k],
3299 &Read256BytesBlockHeightY[k],
3300 &Read256BytesBlockHeightC[k],
3301 &Read256BytesBlockWidthY[k],
3302 &Read256BytesBlockWidthC[k]);
3303 }
3304
3305 CalculateSwathAndDETConfiguration(
3306 false,
3307 v->NumberOfActivePlanes,
3308 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
3309 v->DETBufferSizeInKByte,
3310 dummy1,
3311 dummy2,
3312 v->SourceScan,
3313 v->SourcePixelFormat,
3314 v->SurfaceTiling,
3315 v->ViewportWidth,
3316 v->ViewportHeight,
3317 v->SurfaceWidthY,
3318 v->SurfaceWidthC,
3319 v->SurfaceHeightY,
3320 v->SurfaceHeightC,
3321 Read256BytesBlockHeightY,
3322 Read256BytesBlockHeightC,
3323 Read256BytesBlockWidthY,
3324 Read256BytesBlockWidthC,
3325 v->ODMCombineEnabled,
3326 v->BlendingAndTiming,
3327 BytePerPixY,
3328 BytePerPixC,
3329 BytePerPixDETY,
3330 BytePerPixDETC,
3331 v->HActive,
3332 v->HRatio,
3333 v->HRatioChroma,
3334 v->DPPPerPlane,
3335 dummy5,
3336 dummy6,
3337 dummy3,
3338 dummy4,
3339 v->SwathHeightY,
3340 v->SwathHeightC,
3341 v->DETBufferSizeY,
3342 v->DETBufferSizeC,
3343 dummy7,
3344 &dummysinglestring);
3345 }
3346
3347 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime)
3348 {
3349 if (PrefetchMode == 0) {
3350 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency));
3351 } else if (PrefetchMode == 1) {
3352 return dml_max(SREnterPlusExitTime, UrgentLatency);
3353 } else {
3354 return UrgentLatency;
3355 }
3356 }
3357
3358 double dml31_CalculateWriteBackDISPCLK(
3359 enum source_format_class WritebackPixelFormat,
3360 double PixelClock,
3361 double WritebackHRatio,
3362 double WritebackVRatio,
3363 unsigned int WritebackHTaps,
3364 unsigned int WritebackVTaps,
3365 long WritebackSourceWidth,
3366 long WritebackDestinationWidth,
3367 unsigned int HTotal,
3368 unsigned int WritebackLineBufferSize)
3369 {
3370 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
3371
3372 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio;
3373 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal;
3374 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth;
3375 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
3376 }
3377
3378 static double CalculateWriteBackDelay(
3379 enum source_format_class WritebackPixelFormat,
3380 double WritebackHRatio,
3381 double WritebackVRatio,
3382 unsigned int WritebackVTaps,
3383 int WritebackDestinationWidth,
3384 int WritebackDestinationHeight,
3385 int WritebackSourceHeight,
3386 unsigned int HTotal)
3387 {
3388 double CalculateWriteBackDelay;
3389 double Line_length;
3390 double Output_lines_last_notclamped;
3391 double WritebackVInit;
3392
3393 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3394 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps);
3395 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1);
3396 if (Output_lines_last_notclamped < 0) {
3397 CalculateWriteBackDelay = 0;
3398 } else {
3399 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3400 }
3401 return CalculateWriteBackDelay;
3402 }
3403
3404 static void CalculateVupdateAndDynamicMetadataParameters(
3405 int MaxInterDCNTileRepeaters,
3406 double DPPCLK,
3407 double DISPCLK,
3408 double DCFClkDeepSleep,
3409 double PixelClock,
3410 int HTotal,
3411 int VBlank,
3412 int DynamicMetadataTransmittedBytes,
3413 int DynamicMetadataLinesBeforeActiveRequired,
3414 int InterlaceEnable,
3415 bool ProgressiveToInterlaceUnitInOPP,
3416 double *TSetup,
3417 double *Tdmbf,
3418 double *Tdmec,
3419 double *Tdmsks,
3420 int *VUpdateOffsetPix,
3421 double *VUpdateWidthPix,
3422 double *VReadyOffsetPix)
3423 {
3424 double TotalRepeaterDelayTime;
3425
3426 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK);
3427 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0);
3428 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0);
3429 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1);
3430 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
3431 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK;
3432 *Tdmec = HTotal / PixelClock;
3433 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
3434 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
3435 } else {
3436 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
3437 }
3438 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
3439 *Tdmsks = *Tdmsks / 2;
3440 }
3441 #ifdef __DML_VBA_DEBUG__
3442 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix);
3443 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix);
3444 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix);
3445 #endif
3446 }
3447
3448 static void CalculateRowBandwidth(
3449 bool GPUVMEnable,
3450 enum source_format_class SourcePixelFormat,
3451 double VRatio,
3452 double VRatioChroma,
3453 bool DCCEnable,
3454 double LineTime,
3455 unsigned int MetaRowByteLuma,
3456 unsigned int MetaRowByteChroma,
3457 unsigned int meta_row_height_luma,
3458 unsigned int meta_row_height_chroma,
3459 unsigned int PixelPTEBytesPerRowLuma,
3460 unsigned int PixelPTEBytesPerRowChroma,
3461 unsigned int dpte_row_height_luma,
3462 unsigned int dpte_row_height_chroma,
3463 double *meta_row_bw,
3464 double *dpte_row_bw)
3465 {
3466 if (DCCEnable != true) {
3467 *meta_row_bw = 0;
3468 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3469 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime);
3470 } else {
3471 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime);
3472 }
3473
3474 if (GPUVMEnable != true) {
3475 *dpte_row_bw = 0;
3476 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) {
3477 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
3478 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
3479 } else {
3480 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
3481 }
3482 }
3483
3484 static void CalculateFlipSchedule(
3485 struct display_mode_lib *mode_lib,
3486 unsigned int k,
3487 double HostVMInefficiencyFactor,
3488 double UrgentExtraLatency,
3489 double UrgentLatency,
3490 double PDEAndMetaPTEBytesPerFrame,
3491 double MetaRowBytes,
3492 double DPTEBytesPerRow)
3493 {
3494 struct vba_vars_st *v = &mode_lib->vba;
3495 double min_row_time = 0.0;
3496 unsigned int HostVMDynamicLevelsTrips;
3497 double TimeForFetchingMetaPTEImmediateFlip;
3498 double TimeForFetchingRowInVBlankImmediateFlip;
3499 double ImmediateFlipBW = 1.0;
3500 double LineTime = v->HTotal[k] / v->PixelClock[k];
3501
3502 if (v->GPUVMEnable == true && v->HostVMEnable == true) {
3503 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels;
3504 } else {
3505 HostVMDynamicLevelsTrips = 0;
3506 }
3507
3508 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) {
3509 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes;
3510 }
3511
3512 if (v->GPUVMEnable == true) {
3513 TimeForFetchingMetaPTEImmediateFlip = dml_max3(
3514 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW,
3515 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1),
3516 LineTime / 4.0);
3517 } else {
3518 TimeForFetchingMetaPTEImmediateFlip = 0;
3519 }
3520
3521 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0;
3522 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3523 TimeForFetchingRowInVBlankImmediateFlip = dml_max3(
3524 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW,
3525 UrgentLatency * (HostVMDynamicLevelsTrips + 1),
3526 LineTime / 4);
3527 } else {
3528 TimeForFetchingRowInVBlankImmediateFlip = 0;
3529 }
3530
3531 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0;
3532
3533 if (v->GPUVMEnable == true) {
3534 v->final_flip_bw[k] = dml_max(
3535 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime),
3536 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime));
3537 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) {
3538 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime);
3539 } else {
3540 v->final_flip_bw[k] = 0;
3541 }
3542
3543 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3544 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3545 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3546 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3547 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3548 } else {
3549 min_row_time = dml_min4(
3550 v->dpte_row_height[k] * LineTime / v->VRatio[k],
3551 v->meta_row_height[k] * LineTime / v->VRatio[k],
3552 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k],
3553 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]);
3554 }
3555 } else {
3556 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) {
3557 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k];
3558 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) {
3559 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k];
3560 } else {
3561 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]);
3562 }
3563 }
3564
3565 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16
3566 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) {
3567 v->ImmediateFlipSupportedForPipe[k] = false;
3568 } else {
3569 v->ImmediateFlipSupportedForPipe[k] = true;
3570 }
3571
3572 #ifdef __DML_VBA_DEBUG__
3573 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]);
3574 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]);
3575 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip);
3576 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip);
3577 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time);
3578 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]);
3579 #endif
3580
3581 }
3582
3583 static double TruncToValidBPP(
3584 double LinkBitRate,
3585 int Lanes,
3586 int HTotal,
3587 int HActive,
3588 double PixelClock,
3589 double DesiredBPP,
3590 bool DSCEnable,
3591 enum output_encoder_class Output,
3592 enum output_format_class Format,
3593 unsigned int DSCInputBitPerComponent,
3594 int DSCSlices,
3595 int AudioRate,
3596 int AudioLayout,
3597 enum odm_combine_mode ODMCombine)
3598 {
3599 double MaxLinkBPP;
3600 int MinDSCBPP;
3601 double MaxDSCBPP;
3602 int NonDSCBPP0;
3603 int NonDSCBPP1;
3604 int NonDSCBPP2;
3605
3606 if (Format == dm_420) {
3607 NonDSCBPP0 = 12;
3608 NonDSCBPP1 = 15;
3609 NonDSCBPP2 = 18;
3610 MinDSCBPP = 6;
3611 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16;
3612 } else if (Format == dm_444) {
3613 NonDSCBPP0 = 24;
3614 NonDSCBPP1 = 30;
3615 NonDSCBPP2 = 36;
3616 MinDSCBPP = 8;
3617 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16;
3618 } else {
3619
3620 NonDSCBPP0 = 16;
3621 NonDSCBPP1 = 20;
3622 NonDSCBPP2 = 24;
3623
3624 if (Format == dm_n422) {
3625 MinDSCBPP = 7;
3626 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0;
3627 } else {
3628 MinDSCBPP = 8;
3629 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0;
3630 }
3631 }
3632
3633 if (DSCEnable && Output == dm_dp) {
3634 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100);
3635 } else {
3636 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock;
3637 }
3638
3639 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) {
3640 MaxLinkBPP = 16;
3641 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) {
3642 MaxLinkBPP = 32;
3643 }
3644
3645 if (DesiredBPP == 0) {
3646 if (DSCEnable) {
3647 if (MaxLinkBPP < MinDSCBPP) {
3648 return BPP_INVALID;
3649 } else if (MaxLinkBPP >= MaxDSCBPP) {
3650 return MaxDSCBPP;
3651 } else {
3652 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0;
3653 }
3654 } else {
3655 if (MaxLinkBPP >= NonDSCBPP2) {
3656 return NonDSCBPP2;
3657 } else if (MaxLinkBPP >= NonDSCBPP1) {
3658 return NonDSCBPP1;
3659 } else if (MaxLinkBPP >= NonDSCBPP0) {
3660 return 16.0;
3661 } else {
3662 return BPP_INVALID;
3663 }
3664 }
3665 } else {
3666 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0))
3667 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
3668 return BPP_INVALID;
3669 } else {
3670 return DesiredBPP;
3671 }
3672 }
3673 }
3674
3675 static noinline void CalculatePrefetchSchedulePerPlane(
3676 struct display_mode_lib *mode_lib,
3677 double HostVMInefficiencyFactor,
3678 int i,
3679 unsigned j,
3680 unsigned k)
3681 {
3682 struct vba_vars_st *v = &mode_lib->vba;
3683 Pipe myPipe;
3684
3685 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
3686 myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
3687 myPipe.PixelClock = v->PixelClock[k];
3688 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
3689 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
3690 myPipe.ScalerEnabled = v->ScalerEnabled[k];
3691 myPipe.VRatio = mode_lib->vba.VRatio[k];
3692 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k];
3693
3694 myPipe.SourceScan = v->SourceScan[k];
3695 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
3696 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
3697 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
3698 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
3699 myPipe.InterlaceEnable = v->Interlace[k];
3700 myPipe.NumberOfCursors = v->NumberOfCursors[k];
3701 myPipe.VBlank = v->VTotal[k] - v->VActive[k];
3702 myPipe.HTotal = v->HTotal[k];
3703 myPipe.DCCEnable = v->DCCEnable[k];
3704 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
3705 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
3706 myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
3707 myPipe.BytePerPixelY = v->BytePerPixelY[k];
3708 myPipe.BytePerPixelC = v->BytePerPixelC[k];
3709 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
3710 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
3711 mode_lib,
3712 HostVMInefficiencyFactor,
3713 &myPipe,
3714 v->DSCDelayPerState[i][k],
3715 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
3716 v->DPPCLKDelaySCL,
3717 v->DPPCLKDelaySCLLBOnly,
3718 v->DPPCLKDelayCNVCCursor,
3719 v->DISPCLKDelaySubtotal,
3720 v->SwathWidthYThisState[k] / v->HRatio[k],
3721 v->OutputFormat[k],
3722 v->MaxInterDCNTileRepeaters,
3723 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
3724 v->MaximumVStartup[i][j][k],
3725 v->GPUVMMaxPageTableLevels,
3726 v->GPUVMEnable,
3727 v->HostVMEnable,
3728 v->HostVMMaxNonCachedPageTableLevels,
3729 v->HostVMMinPageSize,
3730 v->DynamicMetadataEnable[k],
3731 v->DynamicMetadataVMEnabled,
3732 v->DynamicMetadataLinesBeforeActiveRequired[k],
3733 v->DynamicMetadataTransmittedBytes[k],
3734 v->UrgLatency[i],
3735 v->ExtraLatency,
3736 v->TimeCalc,
3737 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
3738 v->MetaRowBytes[i][j][k],
3739 v->DPTEBytesPerRow[i][j][k],
3740 v->PrefetchLinesY[i][j][k],
3741 v->SwathWidthYThisState[k],
3742 v->PrefillY[k],
3743 v->MaxNumSwY[k],
3744 v->PrefetchLinesC[i][j][k],
3745 v->SwathWidthCThisState[k],
3746 v->PrefillC[k],
3747 v->MaxNumSwC[k],
3748 v->swath_width_luma_ub_this_state[k],
3749 v->swath_width_chroma_ub_this_state[k],
3750 v->SwathHeightYThisState[k],
3751 v->SwathHeightCThisState[k],
3752 v->TWait,
3753 &v->DSTXAfterScaler[k],
3754 &v->DSTYAfterScaler[k],
3755 &v->LineTimesForPrefetch[k],
3756 &v->PrefetchBW[k],
3757 &v->LinesForMetaPTE[k],
3758 &v->LinesForMetaAndDPTERow[k],
3759 &v->VRatioPreY[i][j][k],
3760 &v->VRatioPreC[i][j][k],
3761 &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
3762 &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
3763 &v->NoTimeForDynamicMetadata[i][j][k],
3764 &v->Tno_bw[k],
3765 &v->prefetch_vmrow_bw[k],
3766 &v->dummy7[k],
3767 &v->dummy8[k],
3768 &v->dummy13[k],
3769 &v->VUpdateOffsetPix[k],
3770 &v->VUpdateWidthPix[k],
3771 &v->VReadyOffsetPix[k]);
3772 }
3773
3774 static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[])
3775 {
3776 int i, total_pipes = 0;
3777 for (i = 0; i < NumberOfActivePlanes; i++)
3778 total_pipes += NoOfDPPThisState[i];
3779 DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64;
3780 if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE)
3781 DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE;
3782 for (i = 1; i < NumberOfActivePlanes; i++)
3783 DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0];
3784 }
3785
3786
3787 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
3788 {
3789 struct vba_vars_st *v = &mode_lib->vba;
3790
3791 int i, j;
3792 unsigned int k, m;
3793 int ReorderingBytes;
3794 int MinPrefetchMode = 0, MaxPrefetchMode = 2;
3795 bool NoChroma = true;
3796 bool EnoughWritebackUnits = true;
3797 bool P2IWith420 = false;
3798 bool DSCOnlyIfNecessaryWithBPP = false;
3799 bool DSC422NativeNotSupported = false;
3800 double MaxTotalVActiveRDBandwidth;
3801 bool ViewportExceedsSurface = false;
3802 bool FMTBufferExceeded = false;
3803
3804 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
3805
3806 CalculateMinAndMaxPrefetchMode(
3807 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank,
3808 &MinPrefetchMode, &MaxPrefetchMode);
3809
3810 /*Scale Ratio, taps Support Check*/
3811
3812 v->ScaleRatioAndTapsSupport = true;
3813 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3814 if (v->ScalerEnabled[k] == false
3815 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3816 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3817 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3818 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0
3819 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) {
3820 v->ScaleRatioAndTapsSupport = false;
3821 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0
3822 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio
3823 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k]
3824 || v->VRatio[k] > v->vtaps[k]
3825 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
3826 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16
3827 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe
3828 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1
3829 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1)
3830 || v->HRatioChroma[k] > v->MaxHSCLRatio
3831 || v->VRatioChroma[k] > v->MaxVSCLRatio
3832 || v->HRatioChroma[k] > v->HTAPsChroma[k]
3833 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) {
3834 v->ScaleRatioAndTapsSupport = false;
3835 }
3836 }
3837 /*Source Format, Pixel Format and Scan Support Check*/
3838
3839 v->SourceFormatPixelAndScanSupport = true;
3840 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3841 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true))
3842 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t
3843 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) {
3844 v->SourceFormatPixelAndScanSupport = false;
3845 }
3846 }
3847 /*Bandwidth Support Check*/
3848
3849 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3850 dml30_CalculateBytePerPixelAnd256BBlockSizes(
3851 v->SourcePixelFormat[k],
3852 v->SurfaceTiling[k],
3853 &v->BytePerPixelY[k],
3854 &v->BytePerPixelC[k],
3855 &v->BytePerPixelInDETY[k],
3856 &v->BytePerPixelInDETC[k],
3857 &v->Read256BlockHeightY[k],
3858 &v->Read256BlockHeightC[k],
3859 &v->Read256BlockWidthY[k],
3860 &v->Read256BlockWidthC[k]);
3861 }
3862 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3863 if (v->SourceScan[k] != dm_vert) {
3864 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k];
3865 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k];
3866 } else {
3867 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k];
3868 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k];
3869 }
3870 }
3871 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3872 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0)
3873 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
3874 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0)
3875 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0;
3876 }
3877 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3878 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) {
3879 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3880 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0;
3881 } else if (v->WritebackEnable[k] == true) {
3882 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k]
3883 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0;
3884 } else {
3885 v->WriteBandwidth[k] = 0.0;
3886 }
3887 }
3888
3889 /*Writeback Latency support check*/
3890
3891 v->WritebackLatencySupport = true;
3892 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3893 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) {
3894 v->WritebackLatencySupport = false;
3895 }
3896 }
3897
3898 /*Writeback Mode Support Check*/
3899
3900 v->TotalNumberOfActiveWriteback = 0;
3901 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3902 if (v->WritebackEnable[k] == true) {
3903 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1;
3904 }
3905 }
3906
3907 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) {
3908 EnoughWritebackUnits = false;
3909 }
3910
3911 /*Writeback Scale Ratio and Taps Support Check*/
3912
3913 v->WritebackScaleRatioAndTapsSupport = true;
3914 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3915 if (v->WritebackEnable[k] == true) {
3916 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio
3917 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio
3918 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio
3919 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps
3920 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps
3921 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k]
3922 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) {
3923 v->WritebackScaleRatioAndTapsSupport = false;
3924 }
3925 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) {
3926 v->WritebackScaleRatioAndTapsSupport = false;
3927 }
3928 }
3929 }
3930 /*Maximum DISPCLK/DPPCLK Support check*/
3931
3932 v->WritebackRequiredDISPCLK = 0.0;
3933 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3934 if (v->WritebackEnable[k] == true) {
3935 v->WritebackRequiredDISPCLK = dml_max(
3936 v->WritebackRequiredDISPCLK,
3937 dml31_CalculateWriteBackDISPCLK(
3938 v->WritebackPixelFormat[k],
3939 v->PixelClock[k],
3940 v->WritebackHRatio[k],
3941 v->WritebackVRatio[k],
3942 v->WritebackHTaps[k],
3943 v->WritebackVTaps[k],
3944 v->WritebackSourceWidth[k],
3945 v->WritebackDestinationWidth[k],
3946 v->HTotal[k],
3947 v->WritebackLineBufferSize));
3948 }
3949 }
3950 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3951 if (v->HRatio[k] > 1.0) {
3952 v->PSCL_FACTOR[k] = dml_min(
3953 v->MaxDCHUBToPSCLThroughput,
3954 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0));
3955 } else {
3956 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3957 }
3958 if (v->BytePerPixelC[k] == 0.0) {
3959 v->PSCL_FACTOR_CHROMA[k] = 0.0;
3960 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3961 * dml_max3(
3962 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3963 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3964 1.0);
3965 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3966 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3967 }
3968 } else {
3969 if (v->HRatioChroma[k] > 1.0) {
3970 v->PSCL_FACTOR_CHROMA[k] = dml_min(
3971 v->MaxDCHUBToPSCLThroughput,
3972 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0));
3973 } else {
3974 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput);
3975 }
3976 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k]
3977 * dml_max5(
3978 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]),
3979 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k],
3980 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]),
3981 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k],
3982 1.0);
3983 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0)
3984 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) {
3985 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k];
3986 }
3987 }
3988 }
3989 for (k = 0; k < v->NumberOfActivePlanes; k++) {
3990 int MaximumSwathWidthSupportLuma;
3991 int MaximumSwathWidthSupportChroma;
3992
3993 if (v->SurfaceTiling[k] == dm_sw_linear) {
3994 MaximumSwathWidthSupportLuma = 8192.0;
3995 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) {
3996 MaximumSwathWidthSupportLuma = 2880.0;
3997 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) {
3998 MaximumSwathWidthSupportLuma = 3840.0;
3999 } else {
4000 MaximumSwathWidthSupportLuma = 5760.0;
4001 }
4002
4003 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) {
4004 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0;
4005 } else {
4006 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma;
4007 }
4008 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k]
4009 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0));
4010 if (v->BytePerPixelC[k] == 0.0) {
4011 v->MaximumSwathWidthInLineBufferChroma = 0;
4012 } else {
4013 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k]
4014 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0));
4015 }
4016 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma);
4017 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma);
4018 }
4019
4020 CalculateSwathAndDETConfiguration(
4021 true,
4022 v->NumberOfActivePlanes,
4023 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
4024 v->DETBufferSizeInKByte,
4025 v->MaximumSwathWidthLuma,
4026 v->MaximumSwathWidthChroma,
4027 v->SourceScan,
4028 v->SourcePixelFormat,
4029 v->SurfaceTiling,
4030 v->ViewportWidth,
4031 v->ViewportHeight,
4032 v->SurfaceWidthY,
4033 v->SurfaceWidthC,
4034 v->SurfaceHeightY,
4035 v->SurfaceHeightC,
4036 v->Read256BlockHeightY,
4037 v->Read256BlockHeightC,
4038 v->Read256BlockWidthY,
4039 v->Read256BlockWidthC,
4040 v->odm_combine_dummy,
4041 v->BlendingAndTiming,
4042 v->BytePerPixelY,
4043 v->BytePerPixelC,
4044 v->BytePerPixelInDETY,
4045 v->BytePerPixelInDETC,
4046 v->HActive,
4047 v->HRatio,
4048 v->HRatioChroma,
4049 v->NoOfDPPThisState,
4050 v->swath_width_luma_ub_this_state,
4051 v->swath_width_chroma_ub_this_state,
4052 v->SwathWidthYThisState,
4053 v->SwathWidthCThisState,
4054 v->SwathHeightYThisState,
4055 v->SwathHeightCThisState,
4056 v->DETBufferSizeYThisState,
4057 v->DETBufferSizeCThisState,
4058 v->SingleDPPViewportSizeSupportPerPlane,
4059 &v->ViewportSizeSupport[0][0]);
4060
4061 for (i = 0; i < v->soc.num_states; i++) {
4062 for (j = 0; j < 2; j++) {
4063 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed);
4064 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed);
4065 v->RequiredDISPCLK[i][j] = 0.0;
4066 v->DISPCLK_DPPCLK_Support[i][j] = true;
4067 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4068 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4069 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4070 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i]
4071 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4072 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4073 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k]
4074 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4075 }
4076 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4077 * (1 + v->DISPCLKRampingMargin / 100.0);
4078 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i]
4079 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4080 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4081 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2
4082 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4083 }
4084 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4085 * (1 + v->DISPCLKRampingMargin / 100.0);
4086 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i]
4087 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4088 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4089 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4
4090 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4091 }
4092
4093 if (v->ODMCombinePolicy == dm_odm_combine_policy_none
4094 || !(v->Output[k] == dm_dp ||
4095 v->Output[k] == dm_dp2p0 ||
4096 v->Output[k] == dm_edp)) {
4097 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4098 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4099
4100 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4101 FMTBufferExceeded = true;
4102 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) {
4103 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4104 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4105 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1
4106 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) {
4107 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4108 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4109 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) {
4110 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4111 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4112 } else {
4113 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4114 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine;
4115 }
4116 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH
4117 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4118 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) {
4119 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4120 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4121 } else {
4122 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4123 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4124 }
4125 }
4126 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH
4127 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) {
4128 if (v->Output[k] == dm_hdmi) {
4129 FMTBufferExceeded = true;
4130 } else if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) {
4131 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1;
4132 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1;
4133
4134 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH)
4135 FMTBufferExceeded = true;
4136 } else {
4137 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1;
4138 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1;
4139 }
4140 }
4141 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4142 v->MPCCombine[i][j][k] = false;
4143 v->NoOfDPP[i][j][k] = 4;
4144 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4;
4145 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4146 v->MPCCombine[i][j][k] = false;
4147 v->NoOfDPP[i][j][k] = 2;
4148 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2;
4149 } else if ((v->WhenToDoMPCCombine == dm_mpc_never
4150 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4151 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) {
4152 v->MPCCombine[i][j][k] = false;
4153 v->NoOfDPP[i][j][k] = 1;
4154 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4155 } else {
4156 v->MPCCombine[i][j][k] = true;
4157 v->NoOfDPP[i][j][k] = 2;
4158 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4159 }
4160 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4161 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4162 > v->MaxDppclkRoundedDownToDFSGranularity)
4163 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4164 v->DISPCLK_DPPCLK_Support[i][j] = false;
4165 }
4166 if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) {
4167 v->MPCCombine[i][j][k] = true;
4168 v->NoOfDPP[i][j][k] = 2;
4169 }
4170 }
4171 v->TotalNumberOfActiveDPP[i][j] = 0;
4172 v->TotalNumberOfSingleDPPPlanes[i][j] = 0;
4173 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4174 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4175 if (v->NoOfDPP[i][j][k] == 1)
4176 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1;
4177 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4178 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha)
4179 NoChroma = false;
4180 }
4181
4182 // UPTO
4183 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never
4184 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) {
4185 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) {
4186 double BWOfNonSplitPlaneOfMaximumBandwidth;
4187 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth;
4188 BWOfNonSplitPlaneOfMaximumBandwidth = 0;
4189 NumberOfNonSplitPlaneOfMaximumBandwidth = 0;
4190 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4191 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth
4192 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) {
4193 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
4194 NumberOfNonSplitPlaneOfMaximumBandwidth = k;
4195 }
4196 }
4197 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true;
4198 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2;
4199 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] =
4200 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth]
4201 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2;
4202 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1;
4203 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1;
4204 }
4205 }
4206 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) {
4207 v->RequiredDISPCLK[i][j] = 0.0;
4208 v->DISPCLK_DPPCLK_Support[i][j] = true;
4209 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4210 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled;
4211 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) {
4212 v->MPCCombine[i][j][k] = true;
4213 v->NoOfDPP[i][j][k] = 2;
4214 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4215 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0;
4216 } else {
4217 v->MPCCombine[i][j][k] = false;
4218 v->NoOfDPP[i][j][k] = 1;
4219 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k]
4220 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4221 }
4222 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1]
4223 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) {
4224 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4225 * (1.0 + v->DISPCLKRampingMargin / 100.0);
4226 } else {
4227 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0);
4228 }
4229 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK);
4230 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0)
4231 > v->MaxDppclkRoundedDownToDFSGranularity)
4232 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) {
4233 v->DISPCLK_DPPCLK_Support[i][j] = false;
4234 }
4235 }
4236 v->TotalNumberOfActiveDPP[i][j] = 0.0;
4237 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4238 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4239 }
4240 }
4241 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK);
4242 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) {
4243 v->DISPCLK_DPPCLK_Support[i][j] = false;
4244 }
4245 }
4246 }
4247
4248 /*Total Available Pipes Support Check*/
4249
4250 for (i = 0; i < v->soc.num_states; i++) {
4251 for (j = 0; j < 2; j++) {
4252 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) {
4253 v->TotalAvailablePipesSupport[i][j] = true;
4254 } else {
4255 v->TotalAvailablePipesSupport[i][j] = false;
4256 }
4257 }
4258 }
4259 /*Display IO and DSC Support Check*/
4260
4261 v->NonsupportedDSCInputBPC = false;
4262 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4263 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0)
4264 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) {
4265 v->NonsupportedDSCInputBPC = true;
4266 }
4267 }
4268
4269 /*Number Of DSC Slices*/
4270 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4271 if (v->BlendingAndTiming[k] == k) {
4272 if (v->PixelClockBackEnd[k] > 3200) {
4273 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0);
4274 } else if (v->PixelClockBackEnd[k] > 1360) {
4275 v->NumberOfDSCSlices[k] = 8;
4276 } else if (v->PixelClockBackEnd[k] > 680) {
4277 v->NumberOfDSCSlices[k] = 4;
4278 } else if (v->PixelClockBackEnd[k] > 340) {
4279 v->NumberOfDSCSlices[k] = 2;
4280 } else {
4281 v->NumberOfDSCSlices[k] = 1;
4282 }
4283 } else {
4284 v->NumberOfDSCSlices[k] = 0;
4285 }
4286 }
4287
4288 for (i = 0; i < v->soc.num_states; i++) {
4289 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4290 v->RequiresDSC[i][k] = false;
4291 v->RequiresFEC[i][k] = false;
4292 if (v->BlendingAndTiming[k] == k) {
4293 if (v->Output[k] == dm_hdmi) {
4294 v->RequiresDSC[i][k] = false;
4295 v->RequiresFEC[i][k] = false;
4296 v->OutputBppPerState[i][k] = TruncToValidBPP(
4297 dml_min(600.0, v->PHYCLKPerState[i]) * 10,
4298 3,
4299 v->HTotal[k],
4300 v->HActive[k],
4301 v->PixelClockBackEnd[k],
4302 v->ForcedOutputLinkBPP[k],
4303 false,
4304 v->Output[k],
4305 v->OutputFormat[k],
4306 v->DSCInputBitPerComponent[k],
4307 v->NumberOfDSCSlices[k],
4308 v->AudioSampleRate[k],
4309 v->AudioSampleLayout[k],
4310 v->ODMCombineEnablePerState[i][k]);
4311 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) {
4312 if (v->DSCEnable[k] == true) {
4313 v->RequiresDSC[i][k] = true;
4314 v->LinkDSCEnable = true;
4315 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) {
4316 v->RequiresFEC[i][k] = true;
4317 } else {
4318 v->RequiresFEC[i][k] = false;
4319 }
4320 } else {
4321 v->RequiresDSC[i][k] = false;
4322 v->LinkDSCEnable = false;
4323 if (v->Output[k] == dm_dp2p0) {
4324 v->RequiresFEC[i][k] = true;
4325 } else {
4326 v->RequiresFEC[i][k] = false;
4327 }
4328 }
4329 if (v->Output[k] == dm_dp2p0) {
4330 v->Outbpp = BPP_INVALID;
4331 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) &&
4332 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) {
4333 v->Outbpp = TruncToValidBPP(
4334 (1.0 - v->Downspreading / 100.0) * 10000,
4335 v->OutputLinkDPLanes[k],
4336 v->HTotal[k],
4337 v->HActive[k],
4338 v->PixelClockBackEnd[k],
4339 v->ForcedOutputLinkBPP[k],
4340 v->LinkDSCEnable,
4341 v->Output[k],
4342 v->OutputFormat[k],
4343 v->DSCInputBitPerComponent[k],
4344 v->NumberOfDSCSlices[k],
4345 v->AudioSampleRate[k],
4346 v->AudioSampleLayout[k],
4347 v->ODMCombineEnablePerState[i][k]);
4348 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 &&
4349 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4350 v->RequiresDSC[i][k] = true;
4351 v->LinkDSCEnable = true;
4352 v->Outbpp = TruncToValidBPP(
4353 (1.0 - v->Downspreading / 100.0) * 10000,
4354 v->OutputLinkDPLanes[k],
4355 v->HTotal[k],
4356 v->HActive[k],
4357 v->PixelClockBackEnd[k],
4358 v->ForcedOutputLinkBPP[k],
4359 v->LinkDSCEnable,
4360 v->Output[k],
4361 v->OutputFormat[k],
4362 v->DSCInputBitPerComponent[k],
4363 v->NumberOfDSCSlices[k],
4364 v->AudioSampleRate[k],
4365 v->AudioSampleLayout[k],
4366 v->ODMCombineEnablePerState[i][k]);
4367 }
4368 v->OutputBppPerState[i][k] = v->Outbpp;
4369 // TODO: Need some other way to handle this nonsense
4370 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10"
4371 }
4372 if (v->Outbpp == BPP_INVALID &&
4373 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) &&
4374 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) {
4375 v->Outbpp = TruncToValidBPP(
4376 (1.0 - v->Downspreading / 100.0) * 13500,
4377 v->OutputLinkDPLanes[k],
4378 v->HTotal[k],
4379 v->HActive[k],
4380 v->PixelClockBackEnd[k],
4381 v->ForcedOutputLinkBPP[k],
4382 v->LinkDSCEnable,
4383 v->Output[k],
4384 v->OutputFormat[k],
4385 v->DSCInputBitPerComponent[k],
4386 v->NumberOfDSCSlices[k],
4387 v->AudioSampleRate[k],
4388 v->AudioSampleLayout[k],
4389 v->ODMCombineEnablePerState[i][k]);
4390 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 &&
4391 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) {
4392 v->RequiresDSC[i][k] = true;
4393 v->LinkDSCEnable = true;
4394 v->Outbpp = TruncToValidBPP(
4395 (1.0 - v->Downspreading / 100.0) * 13500,
4396 v->OutputLinkDPLanes[k],
4397 v->HTotal[k],
4398 v->HActive[k],
4399 v->PixelClockBackEnd[k],
4400 v->ForcedOutputLinkBPP[k],
4401 v->LinkDSCEnable,
4402 v->Output[k],
4403 v->OutputFormat[k],
4404 v->DSCInputBitPerComponent[k],
4405 v->NumberOfDSCSlices[k],
4406 v->AudioSampleRate[k],
4407 v->AudioSampleLayout[k],
4408 v->ODMCombineEnablePerState[i][k]);
4409 }
4410 v->OutputBppPerState[i][k] = v->Outbpp;
4411 // TODO: Need some other way to handle this nonsense
4412 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5"
4413 }
4414 if (v->Outbpp == BPP_INVALID &&
4415 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) &&
4416 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) {
4417 v->Outbpp = TruncToValidBPP(
4418 (1.0 - v->Downspreading / 100.0) * 20000,
4419 v->OutputLinkDPLanes[k],
4420 v->HTotal[k],
4421 v->HActive[k],
4422 v->PixelClockBackEnd[k],
4423 v->ForcedOutputLinkBPP[k],
4424 v->LinkDSCEnable,
4425 v->Output[k],
4426 v->OutputFormat[k],
4427 v->DSCInputBitPerComponent[k],
4428 v->NumberOfDSCSlices[k],
4429 v->AudioSampleRate[k],
4430 v->AudioSampleLayout[k],
4431 v->ODMCombineEnablePerState[i][k]);
4432 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true &&
4433 v->ForcedOutputLinkBPP[k] == 0) {
4434 v->RequiresDSC[i][k] = true;
4435 v->LinkDSCEnable = true;
4436 v->Outbpp = TruncToValidBPP(
4437 (1.0 - v->Downspreading / 100.0) * 20000,
4438 v->OutputLinkDPLanes[k],
4439 v->HTotal[k],
4440 v->HActive[k],
4441 v->PixelClockBackEnd[k],
4442 v->ForcedOutputLinkBPP[k],
4443 v->LinkDSCEnable,
4444 v->Output[k],
4445 v->OutputFormat[k],
4446 v->DSCInputBitPerComponent[k],
4447 v->NumberOfDSCSlices[k],
4448 v->AudioSampleRate[k],
4449 v->AudioSampleLayout[k],
4450 v->ODMCombineEnablePerState[i][k]);
4451 }
4452 v->OutputBppPerState[i][k] = v->Outbpp;
4453 // TODO: Need some other way to handle this nonsense
4454 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20"
4455 }
4456 } else {
4457 v->Outbpp = BPP_INVALID;
4458 if (v->PHYCLKPerState[i] >= 270.0) {
4459 v->Outbpp = TruncToValidBPP(
4460 (1.0 - v->Downspreading / 100.0) * 2700,
4461 v->OutputLinkDPLanes[k],
4462 v->HTotal[k],
4463 v->HActive[k],
4464 v->PixelClockBackEnd[k],
4465 v->ForcedOutputLinkBPP[k],
4466 v->LinkDSCEnable,
4467 v->Output[k],
4468 v->OutputFormat[k],
4469 v->DSCInputBitPerComponent[k],
4470 v->NumberOfDSCSlices[k],
4471 v->AudioSampleRate[k],
4472 v->AudioSampleLayout[k],
4473 v->ODMCombineEnablePerState[i][k]);
4474 v->OutputBppPerState[i][k] = v->Outbpp;
4475 // TODO: Need some other way to handle this nonsense
4476 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR"
4477 }
4478 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) {
4479 v->Outbpp = TruncToValidBPP(
4480 (1.0 - v->Downspreading / 100.0) * 5400,
4481 v->OutputLinkDPLanes[k],
4482 v->HTotal[k],
4483 v->HActive[k],
4484 v->PixelClockBackEnd[k],
4485 v->ForcedOutputLinkBPP[k],
4486 v->LinkDSCEnable,
4487 v->Output[k],
4488 v->OutputFormat[k],
4489 v->DSCInputBitPerComponent[k],
4490 v->NumberOfDSCSlices[k],
4491 v->AudioSampleRate[k],
4492 v->AudioSampleLayout[k],
4493 v->ODMCombineEnablePerState[i][k]);
4494 v->OutputBppPerState[i][k] = v->Outbpp;
4495 // TODO: Need some other way to handle this nonsense
4496 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2"
4497 }
4498 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) {
4499 v->Outbpp = TruncToValidBPP(
4500 (1.0 - v->Downspreading / 100.0) * 8100,
4501 v->OutputLinkDPLanes[k],
4502 v->HTotal[k],
4503 v->HActive[k],
4504 v->PixelClockBackEnd[k],
4505 v->ForcedOutputLinkBPP[k],
4506 v->LinkDSCEnable,
4507 v->Output[k],
4508 v->OutputFormat[k],
4509 v->DSCInputBitPerComponent[k],
4510 v->NumberOfDSCSlices[k],
4511 v->AudioSampleRate[k],
4512 v->AudioSampleLayout[k],
4513 v->ODMCombineEnablePerState[i][k]);
4514 v->OutputBppPerState[i][k] = v->Outbpp;
4515 // TODO: Need some other way to handle this nonsense
4516 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3"
4517 }
4518 }
4519 }
4520 } else {
4521 v->OutputBppPerState[i][k] = 0;
4522 }
4523 }
4524 }
4525
4526 for (i = 0; i < v->soc.num_states; i++) {
4527 v->LinkCapacitySupport[i] = true;
4528 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4529 if (v->BlendingAndTiming[k] == k
4530 && (v->Output[k] == dm_dp ||
4531 v->Output[k] == dm_edp ||
4532 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) {
4533 v->LinkCapacitySupport[i] = false;
4534 }
4535 }
4536 }
4537
4538 // UPTO 2172
4539 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4540 if (v->BlendingAndTiming[k] == k
4541 && (v->Output[k] == dm_dp ||
4542 v->Output[k] == dm_edp ||
4543 v->Output[k] == dm_hdmi)) {
4544 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) {
4545 P2IWith420 = true;
4546 }
4547 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422
4548 && !v->DSC422NativeSupport) {
4549 DSC422NativeNotSupported = true;
4550 }
4551 }
4552 }
4553
4554 for (i = 0; i < v->soc.num_states; ++i) {
4555 v->ODMCombine4To1SupportCheckOK[i] = true;
4556 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4557 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
4558 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp
4559 || v->Output[k] == dm_hdmi)) {
4560 v->ODMCombine4To1SupportCheckOK[i] = false;
4561 }
4562 }
4563 }
4564
4565 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */
4566
4567 for (i = 0; i < v->soc.num_states; i++) {
4568 v->NotEnoughDSCUnits[i] = false;
4569 v->TotalDSCUnitsRequired = 0.0;
4570 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4571 if (v->RequiresDSC[i][k] == true) {
4572 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) {
4573 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0;
4574 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4575 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0;
4576 } else {
4577 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0;
4578 }
4579 }
4580 }
4581 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) {
4582 v->NotEnoughDSCUnits[i] = true;
4583 }
4584 }
4585 /*DSC Delay per state*/
4586
4587 for (i = 0; i < v->soc.num_states; i++) {
4588 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4589 if (v->OutputBppPerState[i][k] == BPP_INVALID) {
4590 v->BPP = 0.0;
4591 } else {
4592 v->BPP = v->OutputBppPerState[i][k];
4593 }
4594 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) {
4595 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) {
4596 v->DSCDelayPerState[i][k] = dscceComputeDelay(
4597 v->DSCInputBitPerComponent[k],
4598 v->BPP,
4599 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4600 v->NumberOfDSCSlices[k],
4601 v->OutputFormat[k],
4602 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]);
4603 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) {
4604 v->DSCDelayPerState[i][k] = 2.0
4605 * (dscceComputeDelay(
4606 v->DSCInputBitPerComponent[k],
4607 v->BPP,
4608 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4609 v->NumberOfDSCSlices[k] / 2,
4610 v->OutputFormat[k],
4611 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4612 } else {
4613 v->DSCDelayPerState[i][k] = 4.0
4614 * (dscceComputeDelay(
4615 v->DSCInputBitPerComponent[k],
4616 v->BPP,
4617 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0),
4618 v->NumberOfDSCSlices[k] / 4,
4619 v->OutputFormat[k],
4620 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]));
4621 }
4622 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k];
4623 } else {
4624 v->DSCDelayPerState[i][k] = 0.0;
4625 }
4626 }
4627 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4628 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4629 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) {
4630 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m];
4631 }
4632 }
4633 }
4634 }
4635
4636 //Calculate Swath, DET Configuration, DCFCLKDeepSleep
4637 //
4638 for (i = 0; i < v->soc.num_states; ++i) {
4639 for (j = 0; j <= 1; ++j) {
4640 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4641 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k];
4642 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
4643 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k];
4644 }
4645
4646 if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0])
4647 PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte);
4648 CalculateSwathAndDETConfiguration(
4649 false,
4650 v->NumberOfActivePlanes,
4651 mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0],
4652 v->DETBufferSizeInKByte,
4653 v->MaximumSwathWidthLuma,
4654 v->MaximumSwathWidthChroma,
4655 v->SourceScan,
4656 v->SourcePixelFormat,
4657 v->SurfaceTiling,
4658 v->ViewportWidth,
4659 v->ViewportHeight,
4660 v->SurfaceWidthY,
4661 v->SurfaceWidthC,
4662 v->SurfaceHeightY,
4663 v->SurfaceHeightC,
4664 v->Read256BlockHeightY,
4665 v->Read256BlockHeightC,
4666 v->Read256BlockWidthY,
4667 v->Read256BlockWidthC,
4668 v->ODMCombineEnableThisState,
4669 v->BlendingAndTiming,
4670 v->BytePerPixelY,
4671 v->BytePerPixelC,
4672 v->BytePerPixelInDETY,
4673 v->BytePerPixelInDETC,
4674 v->HActive,
4675 v->HRatio,
4676 v->HRatioChroma,
4677 v->NoOfDPPThisState,
4678 v->swath_width_luma_ub_this_state,
4679 v->swath_width_chroma_ub_this_state,
4680 v->SwathWidthYThisState,
4681 v->SwathWidthCThisState,
4682 v->SwathHeightYThisState,
4683 v->SwathHeightCThisState,
4684 v->DETBufferSizeYThisState,
4685 v->DETBufferSizeCThisState,
4686 v->dummystring,
4687 &v->ViewportSizeSupport[i][j]);
4688
4689 CalculateDCFCLKDeepSleep(
4690 mode_lib,
4691 v->NumberOfActivePlanes,
4692 v->BytePerPixelY,
4693 v->BytePerPixelC,
4694 v->VRatio,
4695 v->VRatioChroma,
4696 v->SwathWidthYThisState,
4697 v->SwathWidthCThisState,
4698 v->NoOfDPPThisState,
4699 v->HRatio,
4700 v->HRatioChroma,
4701 v->PixelClock,
4702 v->PSCL_FACTOR,
4703 v->PSCL_FACTOR_CHROMA,
4704 v->RequiredDPPCLKThisState,
4705 v->ReadBandwidthLuma,
4706 v->ReadBandwidthChroma,
4707 v->ReturnBusWidth,
4708 &v->ProjectedDCFCLKDeepSleep[i][j]);
4709
4710 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4711 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k];
4712 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k];
4713 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k];
4714 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k];
4715 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k];
4716 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k];
4717 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k];
4718 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k];
4719 }
4720 }
4721 }
4722
4723 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4724 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
4725 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k];
4726 }
4727
4728 for (i = 0; i < v->soc.num_states; i++) {
4729 for (j = 0; j < 2; j++) {
4730 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX];
4731
4732 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4733 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
4734 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
4735 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
4736 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
4737 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
4738 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
4739 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
4740 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
4741 }
4742
4743 v->TotalNumberOfDCCActiveDPP[i][j] = 0;
4744 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4745 if (v->DCCEnable[k] == true) {
4746 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k];
4747 }
4748 }
4749
4750 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4751 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10
4752 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) {
4753
4754 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12)
4755 && v->SourceScan[k] != dm_vert) {
4756 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma)
4757 / 2;
4758 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma;
4759 } else {
4760 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma;
4761 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma;
4762 }
4763
4764 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes(
4765 mode_lib,
4766 v->DCCEnable[k],
4767 v->Read256BlockHeightC[k],
4768 v->Read256BlockWidthC[k],
4769 v->SourcePixelFormat[k],
4770 v->SurfaceTiling[k],
4771 v->BytePerPixelC[k],
4772 v->SourceScan[k],
4773 v->SwathWidthCThisState[k],
4774 v->ViewportHeightChroma[k],
4775 v->GPUVMEnable,
4776 v->HostVMEnable,
4777 v->HostVMMaxNonCachedPageTableLevels,
4778 v->GPUVMMinPageSize,
4779 v->HostVMMinPageSize,
4780 v->PTEBufferSizeInRequestsForChroma,
4781 v->PitchC[k],
4782 0.0,
4783 &v->MacroTileWidthC[k],
4784 &v->MetaRowBytesC,
4785 &v->DPTEBytesPerRowC,
4786 &v->PTEBufferSizeNotExceededC[i][j][k],
4787 &v->dummyinteger7,
4788 &v->dpte_row_height_chroma[k],
4789 &v->dummyinteger28,
4790 &v->dummyinteger26,
4791 &v->dummyinteger23,
4792 &v->meta_row_height_chroma[k],
4793 &v->dummyinteger8,
4794 &v->dummyinteger9,
4795 &v->dummyinteger19,
4796 &v->dummyinteger20,
4797 &v->dummyinteger17,
4798 &v->dummyinteger10,
4799 &v->dummyinteger11);
4800
4801 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines(
4802 mode_lib,
4803 v->VRatioChroma[k],
4804 v->VTAPsChroma[k],
4805 v->Interlace[k],
4806 v->ProgressiveToInterlaceUnitInOPP,
4807 v->SwathHeightCThisState[k],
4808 v->ViewportYStartC[k],
4809 &v->PrefillC[k],
4810 &v->MaxNumSwC[k]);
4811 } else {
4812 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma;
4813 v->PTEBufferSizeInRequestsForChroma = 0;
4814 v->PDEAndMetaPTEBytesPerFrameC = 0.0;
4815 v->MetaRowBytesC = 0.0;
4816 v->DPTEBytesPerRowC = 0.0;
4817 v->PrefetchLinesC[i][j][k] = 0.0;
4818 v->PTEBufferSizeNotExceededC[i][j][k] = true;
4819 }
4820 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes(
4821 mode_lib,
4822 v->DCCEnable[k],
4823 v->Read256BlockHeightY[k],
4824 v->Read256BlockWidthY[k],
4825 v->SourcePixelFormat[k],
4826 v->SurfaceTiling[k],
4827 v->BytePerPixelY[k],
4828 v->SourceScan[k],
4829 v->SwathWidthYThisState[k],
4830 v->ViewportHeight[k],
4831 v->GPUVMEnable,
4832 v->HostVMEnable,
4833 v->HostVMMaxNonCachedPageTableLevels,
4834 v->GPUVMMinPageSize,
4835 v->HostVMMinPageSize,
4836 v->PTEBufferSizeInRequestsForLuma,
4837 v->PitchY[k],
4838 v->DCCMetaPitchY[k],
4839 &v->MacroTileWidthY[k],
4840 &v->MetaRowBytesY,
4841 &v->DPTEBytesPerRowY,
4842 &v->PTEBufferSizeNotExceededY[i][j][k],
4843 &v->dummyinteger7,
4844 &v->dpte_row_height[k],
4845 &v->dummyinteger29,
4846 &v->dummyinteger27,
4847 &v->dummyinteger24,
4848 &v->meta_row_height[k],
4849 &v->dummyinteger25,
4850 &v->dpte_group_bytes[k],
4851 &v->dummyinteger21,
4852 &v->dummyinteger22,
4853 &v->dummyinteger18,
4854 &v->dummyinteger5,
4855 &v->dummyinteger6);
4856 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines(
4857 mode_lib,
4858 v->VRatio[k],
4859 v->vtaps[k],
4860 v->Interlace[k],
4861 v->ProgressiveToInterlaceUnitInOPP,
4862 v->SwathHeightYThisState[k],
4863 v->ViewportYStartY[k],
4864 &v->PrefillY[k],
4865 &v->MaxNumSwY[k]);
4866 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC;
4867 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC;
4868 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC;
4869
4870 CalculateRowBandwidth(
4871 v->GPUVMEnable,
4872 v->SourcePixelFormat[k],
4873 v->VRatio[k],
4874 v->VRatioChroma[k],
4875 v->DCCEnable[k],
4876 v->HTotal[k] / v->PixelClock[k],
4877 v->MetaRowBytesY,
4878 v->MetaRowBytesC,
4879 v->meta_row_height[k],
4880 v->meta_row_height_chroma[k],
4881 v->DPTEBytesPerRowY,
4882 v->DPTEBytesPerRowC,
4883 v->dpte_row_height[k],
4884 v->dpte_row_height_chroma[k],
4885 &v->meta_row_bandwidth[i][j][k],
4886 &v->dpte_row_bandwidth[i][j][k]);
4887 }
4888 /*DCCMetaBufferSizeSupport(i, j) = True
4889 For k = 0 To NumberOfActivePlanes - 1
4890 If MetaRowBytes(i, j, k) > 24064 Then
4891 DCCMetaBufferSizeSupport(i, j) = False
4892 End If
4893 Next k*/
4894 v->DCCMetaBufferSizeSupport[i][j] = true;
4895 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4896 if (v->MetaRowBytes[i][j][k] > 24064)
4897 v->DCCMetaBufferSizeSupport[i][j] = false;
4898 }
4899 v->UrgLatency[i] = CalculateUrgentLatency(
4900 v->UrgentLatencyPixelDataOnly,
4901 v->UrgentLatencyPixelMixedWithVMData,
4902 v->UrgentLatencyVMDataOnly,
4903 v->DoUrgentLatencyAdjustment,
4904 v->UrgentLatencyAdjustmentFabricClockComponent,
4905 v->UrgentLatencyAdjustmentFabricClockReference,
4906 v->FabricClockPerState[i]);
4907
4908 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4909 CalculateUrgentBurstFactor(
4910 v->swath_width_luma_ub_this_state[k],
4911 v->swath_width_chroma_ub_this_state[k],
4912 v->SwathHeightYThisState[k],
4913 v->SwathHeightCThisState[k],
4914 v->HTotal[k] / v->PixelClock[k],
4915 v->UrgLatency[i],
4916 v->CursorBufferSize,
4917 v->CursorWidth[k][0],
4918 v->CursorBPP[k][0],
4919 v->VRatio[k],
4920 v->VRatioChroma[k],
4921 v->BytePerPixelInDETY[k],
4922 v->BytePerPixelInDETC[k],
4923 v->DETBufferSizeYThisState[k],
4924 v->DETBufferSizeCThisState[k],
4925 &v->UrgentBurstFactorCursor[k],
4926 &v->UrgentBurstFactorLuma[k],
4927 &v->UrgentBurstFactorChroma[k],
4928 &NotUrgentLatencyHiding[k]);
4929 }
4930
4931 v->NotEnoughUrgentLatencyHidingA[i][j] = false;
4932 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4933 if (NotUrgentLatencyHiding[k]) {
4934 v->NotEnoughUrgentLatencyHidingA[i][j] = true;
4935 }
4936 }
4937
4938 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4939 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k]
4940 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k];
4941 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k];
4942 }
4943
4944 v->TotalVActivePixelBandwidth[i][j] = 0;
4945 v->TotalVActiveCursorBandwidth[i][j] = 0;
4946 v->TotalMetaRowBandwidth[i][j] = 0;
4947 v->TotalDPTERowBandwidth[i][j] = 0;
4948 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
4949 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k];
4950 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k];
4951 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k];
4952 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k];
4953 }
4954 }
4955 }
4956
4957 //Calculate Return BW
4958 for (i = 0; i < v->soc.num_states; ++i) {
4959 for (j = 0; j <= 1; ++j) {
4960 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4961 if (v->BlendingAndTiming[k] == k) {
4962 if (v->WritebackEnable[k] == true) {
4963 v->WritebackDelayTime[k] = v->WritebackLatency
4964 + CalculateWriteBackDelay(
4965 v->WritebackPixelFormat[k],
4966 v->WritebackHRatio[k],
4967 v->WritebackVRatio[k],
4968 v->WritebackVTaps[k],
4969 v->WritebackDestinationWidth[k],
4970 v->WritebackDestinationHeight[k],
4971 v->WritebackSourceHeight[k],
4972 v->HTotal[k]) / v->RequiredDISPCLK[i][j];
4973 } else {
4974 v->WritebackDelayTime[k] = 0.0;
4975 }
4976 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4977 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) {
4978 v->WritebackDelayTime[k] = dml_max(
4979 v->WritebackDelayTime[k],
4980 v->WritebackLatency
4981 + CalculateWriteBackDelay(
4982 v->WritebackPixelFormat[m],
4983 v->WritebackHRatio[m],
4984 v->WritebackVRatio[m],
4985 v->WritebackVTaps[m],
4986 v->WritebackDestinationWidth[m],
4987 v->WritebackDestinationHeight[m],
4988 v->WritebackSourceHeight[m],
4989 v->HTotal[m]) / v->RequiredDISPCLK[i][j]);
4990 }
4991 }
4992 }
4993 }
4994 for (k = 0; k < v->NumberOfActivePlanes; k++) {
4995 for (m = 0; m < v->NumberOfActivePlanes; m++) {
4996 if (v->BlendingAndTiming[k] == m) {
4997 v->WritebackDelayTime[k] = v->WritebackDelayTime[m];
4998 }
4999 }
5000 }
5001 v->MaxMaxVStartup[i][j] = 0;
5002 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5003 v->MaximumVStartup[i][j][k] =
5004 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ?
5005 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) :
5006 v->VTotal[k] - v->VActive[k]
5007 - dml_max(
5008 1.0,
5009 dml_ceil(
5010 1.0 * v->WritebackDelayTime[k]
5011 / (v->HTotal[k]
5012 / v->PixelClock[k]),
5013 1.0));
5014 if (v->MaximumVStartup[i][j][k] > 1023)
5015 v->MaximumVStartup[i][j][k] = 1023;
5016 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]);
5017 }
5018 }
5019 }
5020
5021 ReorderingBytes = v->NumberOfChannels
5022 * dml_max3(
5023 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly,
5024 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData,
5025 v->UrgentOutOfOrderReturnPerChannelVMDataOnly);
5026
5027 for (i = 0; i < v->soc.num_states; ++i) {
5028 for (j = 0; j <= 1; ++j) {
5029 v->DCFCLKState[i][j] = v->DCFCLKPerState[i];
5030 }
5031 }
5032
5033 if (v->UseMinimumRequiredDCFCLK == true)
5034 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
5035
5036 for (i = 0; i < v->soc.num_states; ++i) {
5037 for (j = 0; j <= 1; ++j) {
5038 double IdealFabricAndSDPPortBandwidthPerState = dml_min(
5039 v->ReturnBusWidth * v->DCFCLKState[i][j],
5040 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn);
5041 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth;
5042 double PixelDataOnlyReturnBWPerState = dml_min(
5043 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5044 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0);
5045 double PixelMixedWithVMDataReturnBWPerState = dml_min(
5046 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5047 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0);
5048
5049 if (v->HostVMEnable != true) {
5050 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState;
5051 } else {
5052 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState;
5053 }
5054 }
5055 }
5056
5057 //Re-ordering Buffer Support Check
5058 for (i = 0; i < v->soc.num_states; ++i) {
5059 for (j = 0; j <= 1; ++j) {
5060 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j]
5061 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) {
5062 v->ROBSupport[i][j] = true;
5063 } else {
5064 v->ROBSupport[i][j] = false;
5065 }
5066 }
5067 }
5068
5069 //Vertical Active BW support check
5070
5071 MaxTotalVActiveRDBandwidth = 0;
5072 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5073 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k];
5074 }
5075
5076 for (i = 0; i < v->soc.num_states; ++i) {
5077 for (j = 0; j <= 1; ++j) {
5078 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min(
5079 dml_min(
5080 v->ReturnBusWidth * v->DCFCLKState[i][j],
5081 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5082 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100,
5083 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5084 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100);
5085
5086 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) {
5087 v->TotalVerticalActiveBandwidthSupport[i][j] = true;
5088 } else {
5089 v->TotalVerticalActiveBandwidthSupport[i][j] = false;
5090 }
5091 }
5092 }
5093
5094 v->UrgentLatency = CalculateUrgentLatency(
5095 v->UrgentLatencyPixelDataOnly,
5096 v->UrgentLatencyPixelMixedWithVMData,
5097 v->UrgentLatencyVMDataOnly,
5098 v->DoUrgentLatencyAdjustment,
5099 v->UrgentLatencyAdjustmentFabricClockComponent,
5100 v->UrgentLatencyAdjustmentFabricClockReference,
5101 v->FabricClock);
5102 //Prefetch Check
5103 for (i = 0; i < v->soc.num_states; ++i) {
5104 for (j = 0; j <= 1; ++j) {
5105 double VMDataOnlyReturnBWPerState;
5106 double HostVMInefficiencyFactor = 1;
5107 int NextPrefetchModeState = MinPrefetchMode;
5108 bool UnboundedRequestEnabledThisState = false;
5109 int CompressedBufferSizeInkByteThisState = 0;
5110 double dummy;
5111
5112 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j];
5113
5114 v->BandwidthWithoutPrefetchSupported[i][j] = true;
5115 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]
5116 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) {
5117 v->BandwidthWithoutPrefetchSupported[i][j] = false;
5118 }
5119
5120 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5121 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k];
5122 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k];
5123 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k];
5124 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k];
5125 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k];
5126 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k];
5127 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k];
5128 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k];
5129 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k];
5130 }
5131
5132 VMDataOnlyReturnBWPerState = dml_min(
5133 dml_min(
5134 v->ReturnBusWidth * v->DCFCLKState[i][j],
5135 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn)
5136 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0,
5137 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth
5138 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0);
5139 if (v->GPUVMEnable && v->HostVMEnable)
5140 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState;
5141
5142 v->ExtraLatency = CalculateExtraLatency(
5143 v->RoundTripPingLatencyCycles,
5144 ReorderingBytes,
5145 v->DCFCLKState[i][j],
5146 v->TotalNumberOfActiveDPP[i][j],
5147 v->PixelChunkSizeInKByte,
5148 v->TotalNumberOfDCCActiveDPP[i][j],
5149 v->MetaChunkSize,
5150 v->ReturnBWPerState[i][j],
5151 v->GPUVMEnable,
5152 v->HostVMEnable,
5153 v->NumberOfActivePlanes,
5154 v->NoOfDPPThisState,
5155 v->dpte_group_bytes,
5156 HostVMInefficiencyFactor,
5157 v->HostVMMinPageSize,
5158 v->HostVMMaxNonCachedPageTableLevels);
5159
5160 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5161 do {
5162 v->PrefetchModePerState[i][j] = NextPrefetchModeState;
5163 v->MaxVStartup = v->NextMaxVStartup;
5164
5165 v->TWait = CalculateTWait(
5166 v->PrefetchModePerState[i][j],
5167 v->DRAMClockChangeLatency,
5168 v->UrgLatency[i],
5169 v->SREnterPlusExitTime);
5170
5171 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5172 CalculatePrefetchSchedulePerPlane(mode_lib,
5173 HostVMInefficiencyFactor,
5174 i, j, k);
5175 }
5176
5177 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5178 CalculateUrgentBurstFactor(
5179 v->swath_width_luma_ub_this_state[k],
5180 v->swath_width_chroma_ub_this_state[k],
5181 v->SwathHeightYThisState[k],
5182 v->SwathHeightCThisState[k],
5183 v->HTotal[k] / v->PixelClock[k],
5184 v->UrgLatency[i],
5185 v->CursorBufferSize,
5186 v->CursorWidth[k][0],
5187 v->CursorBPP[k][0],
5188 v->VRatioPreY[i][j][k],
5189 v->VRatioPreC[i][j][k],
5190 v->BytePerPixelInDETY[k],
5191 v->BytePerPixelInDETC[k],
5192 v->DETBufferSizeYThisState[k],
5193 v->DETBufferSizeCThisState[k],
5194 &v->UrgentBurstFactorCursorPre[k],
5195 &v->UrgentBurstFactorLumaPre[k],
5196 &v->UrgentBurstFactorChromaPre[k],
5197 &v->NotUrgentLatencyHidingPre[k]);
5198 }
5199
5200 v->MaximumReadBandwidthWithPrefetch = 0.0;
5201 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5202 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0
5203 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k];
5204
5205 v->MaximumReadBandwidthWithPrefetch =
5206 v->MaximumReadBandwidthWithPrefetch
5207 + dml_max3(
5208 v->VActivePixelBandwidth[i][j][k]
5209 + v->VActiveCursorBandwidth[i][j][k]
5210 + v->NoOfDPP[i][j][k]
5211 * (v->meta_row_bandwidth[i][j][k]
5212 + v->dpte_row_bandwidth[i][j][k]),
5213 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5214 v->NoOfDPP[i][j][k]
5215 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5216 * v->UrgentBurstFactorLumaPre[k]
5217 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5218 * v->UrgentBurstFactorChromaPre[k])
5219 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5220 }
5221
5222 v->NotEnoughUrgentLatencyHidingPre = false;
5223 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5224 if (v->NotUrgentLatencyHidingPre[k] == true) {
5225 v->NotEnoughUrgentLatencyHidingPre = true;
5226 }
5227 }
5228
5229 v->PrefetchSupported[i][j] = true;
5230 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j]
5231 || v->NotEnoughUrgentLatencyHidingPre == 1) {
5232 v->PrefetchSupported[i][j] = false;
5233 }
5234 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5235 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0
5236 || v->NoTimeForPrefetch[i][j][k] == true) {
5237 v->PrefetchSupported[i][j] = false;
5238 }
5239 }
5240
5241 v->DynamicMetadataSupported[i][j] = true;
5242 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5243 if (v->NoTimeForDynamicMetadata[i][j][k] == true) {
5244 v->DynamicMetadataSupported[i][j] = false;
5245 }
5246 }
5247
5248 v->VRatioInPrefetchSupported[i][j] = true;
5249 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5250 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) {
5251 v->VRatioInPrefetchSupported[i][j] = false;
5252 }
5253 }
5254 v->AnyLinesForVMOrRowTooLarge = false;
5255 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5256 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) {
5257 v->AnyLinesForVMOrRowTooLarge = true;
5258 }
5259 }
5260
5261 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5262
5263 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) {
5264 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j];
5265 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5266 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip
5267 - dml_max(
5268 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k],
5269 v->NoOfDPP[i][j][k]
5270 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5271 * v->UrgentBurstFactorLumaPre[k]
5272 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5273 * v->UrgentBurstFactorChromaPre[k])
5274 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5275 }
5276 v->TotImmediateFlipBytes = 0.0;
5277 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5278 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes
5279 + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k]
5280 + v->DPTEBytesPerRow[i][j][k]);
5281 }
5282
5283 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5284 CalculateFlipSchedule(
5285 mode_lib,
5286 k,
5287 HostVMInefficiencyFactor,
5288 v->ExtraLatency,
5289 v->UrgLatency[i],
5290 v->PDEAndMetaPTEBytesPerFrame[i][j][k],
5291 v->MetaRowBytes[i][j][k],
5292 v->DPTEBytesPerRow[i][j][k]);
5293 }
5294 v->total_dcn_read_bw_with_flip = 0.0;
5295 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5296 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip
5297 + dml_max3(
5298 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k],
5299 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k]
5300 + v->VActiveCursorBandwidth[i][j][k],
5301 v->NoOfDPP[i][j][k]
5302 * (v->final_flip_bw[k]
5303 + v->RequiredPrefetchPixelDataBWLuma[i][j][k]
5304 * v->UrgentBurstFactorLumaPre[k]
5305 + v->RequiredPrefetchPixelDataBWChroma[i][j][k]
5306 * v->UrgentBurstFactorChromaPre[k])
5307 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]);
5308 }
5309 v->ImmediateFlipSupportedForState[i][j] = true;
5310 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) {
5311 v->ImmediateFlipSupportedForState[i][j] = false;
5312 }
5313 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5314 if (v->ImmediateFlipSupportedForPipe[k] == false) {
5315 v->ImmediateFlipSupportedForState[i][j] = false;
5316 }
5317 }
5318 } else {
5319 v->ImmediateFlipSupportedForState[i][j] = false;
5320 }
5321
5322 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) {
5323 v->NextMaxVStartup = v->MaxMaxVStartup[i][j];
5324 NextPrefetchModeState = NextPrefetchModeState + 1;
5325 } else {
5326 v->NextMaxVStartup = v->NextMaxVStartup - 1;
5327 }
5328 v->NextPrefetchMode = v->NextPrefetchMode + 1;
5329 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5330 && ((v->HostVMEnable == false &&
5331 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5332 || v->ImmediateFlipSupportedForState[i][j] == true))
5333 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode)));
5334
5335 CalculateUnboundedRequestAndCompressedBufferSize(
5336 v->DETBufferSizeInKByte[0],
5337 v->ConfigReturnBufferSizeInKByte,
5338 v->UseUnboundedRequesting,
5339 v->TotalNumberOfActiveDPP[i][j],
5340 NoChroma,
5341 v->MaxNumDPP,
5342 v->CompressedBufferSegmentSizeInkByte,
5343 v->Output,
5344 &UnboundedRequestEnabledThisState,
5345 &CompressedBufferSizeInkByteThisState);
5346
5347 CalculateWatermarksAndDRAMSpeedChangeSupport(
5348 mode_lib,
5349 v->PrefetchModePerState[i][j],
5350 v->DCFCLKState[i][j],
5351 v->ReturnBWPerState[i][j],
5352 v->UrgLatency[i],
5353 v->ExtraLatency,
5354 v->SOCCLKPerState[i],
5355 v->ProjectedDCFCLKDeepSleep[i][j],
5356 v->DETBufferSizeYThisState,
5357 v->DETBufferSizeCThisState,
5358 v->SwathHeightYThisState,
5359 v->SwathHeightCThisState,
5360 v->SwathWidthYThisState,
5361 v->SwathWidthCThisState,
5362 v->NoOfDPPThisState,
5363 v->BytePerPixelInDETY,
5364 v->BytePerPixelInDETC,
5365 UnboundedRequestEnabledThisState,
5366 CompressedBufferSizeInkByteThisState,
5367 &v->DRAMClockChangeSupport[i][j],
5368 &dummy,
5369 &dummy,
5370 &dummy,
5371 &dummy);
5372 }
5373 }
5374
5375 /*PTE Buffer Size Check*/
5376 for (i = 0; i < v->soc.num_states; i++) {
5377 for (j = 0; j < 2; j++) {
5378 v->PTEBufferSizeNotExceeded[i][j] = true;
5379 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5380 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) {
5381 v->PTEBufferSizeNotExceeded[i][j] = false;
5382 }
5383 }
5384 }
5385 }
5386
5387 /*Cursor Support Check*/
5388 v->CursorSupport = true;
5389 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5390 if (v->CursorWidth[k][0] > 0.0) {
5391 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) {
5392 v->CursorSupport = false;
5393 }
5394 }
5395 }
5396
5397 /*Valid Pitch Check*/
5398 v->PitchSupport = true;
5399 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5400 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]);
5401 if (v->DCCEnable[k] == true) {
5402 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]);
5403 } else {
5404 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k];
5405 }
5406 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16
5407 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe
5408 && v->SourcePixelFormat[k] != dm_mono_8) {
5409 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]);
5410 if (v->DCCEnable[k] == true) {
5411 v->AlignedDCCMetaPitchC[k] = dml_ceil(
5412 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]),
5413 64.0 * v->Read256BlockWidthC[k]);
5414 } else {
5415 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5416 }
5417 } else {
5418 v->AlignedCPitch[k] = v->PitchC[k];
5419 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k];
5420 }
5421 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k]
5422 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) {
5423 v->PitchSupport = false;
5424 }
5425 }
5426
5427 for (k = 0; k < v->NumberOfActivePlanes; k++) {
5428 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) {
5429 ViewportExceedsSurface = true;
5430 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32
5431 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8
5432 && v->SourcePixelFormat[k] != dm_rgbe) {
5433 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k]
5434 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) {
5435 ViewportExceedsSurface = true;
5436 }
5437 }
5438 }
5439 }
5440
5441 /*Mode Support, Voltage State and SOC Configuration*/
5442 for (i = v->soc.num_states - 1; i >= 0; i--) {
5443 for (j = 0; j < 2; j++) {
5444 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true
5445 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP
5446 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false
5447 && v->DTBCLKRequiredMoreThanSupported[i] == false
5448 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true
5449 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true
5450 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true
5451 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false
5452 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true
5453 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true
5454 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false
5455 && ((v->HostVMEnable == false
5456 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5457 || v->ImmediateFlipSupportedForState[i][j] == true)
5458 && FMTBufferExceeded == false) {
5459 v->ModeSupport[i][j] = true;
5460 } else {
5461 v->ModeSupport[i][j] = false;
5462 #ifdef __DML_VBA_DEBUG__
5463 if (v->ScaleRatioAndTapsSupport == false)
5464 dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed");
5465 if (v->SourceFormatPixelAndScanSupport == false)
5466 dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed");
5467 if (v->ViewportSizeSupport[i][j] == false)
5468 dml_print("DML SUPPORT: ViewportSizeSupport failed");
5469 if (v->LinkCapacitySupport[i] == false)
5470 dml_print("DML SUPPORT: LinkCapacitySupport failed");
5471 if (v->ODMCombine4To1SupportCheckOK[i] == false)
5472 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5473 if (v->NotEnoughDSCUnits[i] == true)
5474 dml_print("DML SUPPORT: NotEnoughDSCUnits");
5475 if (v->DTBCLKRequiredMoreThanSupported[i] == true)
5476 dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported");
5477 if (v->ROBSupport[i][j] == false)
5478 dml_print("DML SUPPORT: ROBSupport failed");
5479 if (v->DISPCLK_DPPCLK_Support[i][j] == false)
5480 dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed");
5481 if (v->TotalAvailablePipesSupport[i][j] == false)
5482 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5483 if (EnoughWritebackUnits == false)
5484 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5485 if (v->WritebackLatencySupport == false)
5486 dml_print("DML SUPPORT: WritebackLatencySupport failed");
5487 if (v->WritebackScaleRatioAndTapsSupport == false)
5488 dml_print("DML SUPPORT: DSC422NativeNotSupported ");
5489 if (v->CursorSupport == false)
5490 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5491 if (v->PitchSupport == false)
5492 dml_print("DML SUPPORT: PitchSupport failed");
5493 if (ViewportExceedsSurface == true)
5494 dml_print("DML SUPPORT: ViewportExceedsSurface failed");
5495 if (v->PrefetchSupported[i][j] == false)
5496 dml_print("DML SUPPORT: PrefetchSupported failed");
5497 if (v->DynamicMetadataSupported[i][j] == false)
5498 dml_print("DML SUPPORT: DSC422NativeNotSupported failed");
5499 if (v->TotalVerticalActiveBandwidthSupport[i][j] == false)
5500 dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed");
5501 if (v->VRatioInPrefetchSupported[i][j] == false)
5502 dml_print("DML SUPPORT: VRatioInPrefetchSupported failed");
5503 if (v->PTEBufferSizeNotExceeded[i][j] == false)
5504 dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed");
5505 if (v->NonsupportedDSCInputBPC == true)
5506 dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed");
5507 if (!((v->HostVMEnable == false
5508 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
5509 || v->ImmediateFlipSupportedForState[i][j] == true))
5510 dml_print("DML SUPPORT: ImmediateFlipRequirement failed");
5511 if (FMTBufferExceeded == true)
5512 dml_print("DML SUPPORT: FMTBufferExceeded failed");
5513 #endif
5514 }
5515 }
5516 }
5517
5518 {
5519 unsigned int MaximumMPCCombine = 0;
5520 for (i = v->soc.num_states; i >= 0; i--) {
5521 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) {
5522 v->VoltageLevel = i;
5523 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true;
5524 if (v->ModeSupport[i][0] == true) {
5525 MaximumMPCCombine = 0;
5526 } else {
5527 MaximumMPCCombine = 1;
5528 }
5529 }
5530 }
5531 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine];
5532 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) {
5533 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k];
5534 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k];
5535 }
5536 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine];
5537 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel];
5538 v->FabricClock = v->FabricClockPerState[v->VoltageLevel];
5539 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel];
5540 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine];
5541 v->maxMpcComb = MaximumMPCCombine;
5542 }
5543 }
5544
5545 static void CalculateWatermarksAndDRAMSpeedChangeSupport(
5546 struct display_mode_lib *mode_lib,
5547 unsigned int PrefetchMode,
5548 double DCFCLK,
5549 double ReturnBW,
5550 double UrgentLatency,
5551 double ExtraLatency,
5552 double SOCCLK,
5553 double DCFCLKDeepSleep,
5554 unsigned int DETBufferSizeY[],
5555 unsigned int DETBufferSizeC[],
5556 unsigned int SwathHeightY[],
5557 unsigned int SwathHeightC[],
5558 double SwathWidthY[],
5559 double SwathWidthC[],
5560 unsigned int DPPPerPlane[],
5561 double BytePerPixelDETY[],
5562 double BytePerPixelDETC[],
5563 bool UnboundedRequestEnabled,
5564 int unsigned CompressedBufferSizeInkByte,
5565 enum clock_change_support *DRAMClockChangeSupport,
5566 double *StutterExitWatermark,
5567 double *StutterEnterPlusExitWatermark,
5568 double *Z8StutterExitWatermark,
5569 double *Z8StutterEnterPlusExitWatermark)
5570 {
5571 struct vba_vars_st *v = &mode_lib->vba;
5572 double EffectiveLBLatencyHidingY;
5573 double EffectiveLBLatencyHidingC;
5574 double LinesInDETY[DC__NUM_DPP__MAX];
5575 double LinesInDETC;
5576 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX];
5577 unsigned int LinesInDETCRoundedDownToSwath;
5578 double FullDETBufferingTimeY;
5579 double FullDETBufferingTimeC;
5580 double ActiveDRAMClockChangeLatencyMarginY;
5581 double ActiveDRAMClockChangeLatencyMarginC;
5582 double WritebackDRAMClockChangeLatencyMargin;
5583 double PlaneWithMinActiveDRAMClockChangeMargin;
5584 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank;
5585 double WritebackDRAMClockChangeLatencyHiding;
5586 double TotalPixelBW = 0.0;
5587 int k, j;
5588
5589 v->UrgentWatermark = UrgentLatency + ExtraLatency;
5590
5591 #ifdef __DML_VBA_DEBUG__
5592 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
5593 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency);
5594 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark);
5595 #endif
5596
5597 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark;
5598
5599 #ifdef __DML_VBA_DEBUG__
5600 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency);
5601 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark);
5602 #endif
5603
5604 v->TotalActiveWriteback = 0;
5605 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5606 if (v->WritebackEnable[k] == true) {
5607 v->TotalActiveWriteback = v->TotalActiveWriteback + 1;
5608 }
5609 }
5610
5611 if (v->TotalActiveWriteback <= 1) {
5612 v->WritebackUrgentWatermark = v->WritebackLatency;
5613 } else {
5614 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5615 }
5616
5617 if (v->TotalActiveWriteback <= 1) {
5618 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency;
5619 } else {
5620 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK;
5621 }
5622
5623 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5624 TotalPixelBW = TotalPixelBW
5625 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k])
5626 / (v->HTotal[k] / v->PixelClock[k]);
5627 }
5628
5629 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5630 double EffectiveDETBufferSizeY = DETBufferSizeY[k];
5631
5632 v->LBLatencyHidingSourceLinesY = dml_min(
5633 (double) v->MaxLineBufferLines,
5634 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1);
5635
5636 v->LBLatencyHidingSourceLinesC = dml_min(
5637 (double) v->MaxLineBufferLines,
5638 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1);
5639
5640 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]);
5641
5642 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]);
5643
5644 if (UnboundedRequestEnabled) {
5645 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY
5646 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW;
5647 }
5648
5649 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k];
5650 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]);
5651 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k];
5652 if (BytePerPixelDETC[k] > 0) {
5653 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k];
5654 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]);
5655 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k];
5656 } else {
5657 LinesInDETC = 0;
5658 FullDETBufferingTimeC = 999999;
5659 }
5660
5661 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY
5662 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5663
5664 if (v->NumberOfActivePlanes > 1) {
5665 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY
5666 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k];
5667 }
5668
5669 if (BytePerPixelDETC[k] > 0) {
5670 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC
5671 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark;
5672
5673 if (v->NumberOfActivePlanes > 1) {
5674 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC
5675 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k];
5676 }
5677 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC);
5678 } else {
5679 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY;
5680 }
5681
5682 if (v->WritebackEnable[k] == true) {
5683 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024
5684 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4);
5685 if (v->WritebackPixelFormat[k] == dm_444_64) {
5686 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2;
5687 }
5688 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark;
5689 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin);
5690 }
5691 }
5692
5693 v->MinActiveDRAMClockChangeMargin = 999999;
5694 PlaneWithMinActiveDRAMClockChangeMargin = 0;
5695 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5696 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) {
5697 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k];
5698 if (v->BlendingAndTiming[k] == k) {
5699 PlaneWithMinActiveDRAMClockChangeMargin = k;
5700 } else {
5701 for (j = 0; j < v->NumberOfActivePlanes; ++j) {
5702 if (v->BlendingAndTiming[k] == j) {
5703 PlaneWithMinActiveDRAMClockChangeMargin = j;
5704 }
5705 }
5706 }
5707 }
5708 }
5709
5710 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ;
5711
5712 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999;
5713 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5714 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin)
5715 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) {
5716 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k];
5717 }
5718 }
5719
5720 v->TotalNumberOfActiveOTG = 0;
5721
5722 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
5723 if (v->BlendingAndTiming[k] == k) {
5724 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1;
5725 }
5726 }
5727
5728 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) {
5729 *DRAMClockChangeSupport = dm_dram_clock_change_vactive;
5730 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1
5731 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) {
5732 *DRAMClockChangeSupport = dm_dram_clock_change_vblank;
5733 } else {
5734 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported;
5735 }
5736
5737 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep;
5738 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep);
5739 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5740 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep;
5741
5742 #ifdef __DML_VBA_DEBUG__
5743 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark);
5744 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark);
5745 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark);
5746 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark);
5747 #endif
5748 }
5749
5750 static void CalculateDCFCLKDeepSleep(
5751 struct display_mode_lib *mode_lib,
5752 unsigned int NumberOfActivePlanes,
5753 int BytePerPixelY[],
5754 int BytePerPixelC[],
5755 double VRatio[],
5756 double VRatioChroma[],
5757 double SwathWidthY[],
5758 double SwathWidthC[],
5759 unsigned int DPPPerPlane[],
5760 double HRatio[],
5761 double HRatioChroma[],
5762 double PixelClock[],
5763 double PSCL_THROUGHPUT[],
5764 double PSCL_THROUGHPUT_CHROMA[],
5765 double DPPCLK[],
5766 double ReadBandwidthLuma[],
5767 double ReadBandwidthChroma[],
5768 int ReturnBusWidth,
5769 double *DCFCLKDeepSleep)
5770 {
5771 struct vba_vars_st *v = &mode_lib->vba;
5772 double DisplayPipeLineDeliveryTimeLuma;
5773 double DisplayPipeLineDeliveryTimeChroma;
5774 double ReadBandwidth = 0.0;
5775 int k;
5776
5777 for (k = 0; k < NumberOfActivePlanes; ++k) {
5778
5779 if (VRatio[k] <= 1) {
5780 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5781 } else {
5782 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5783 }
5784 if (BytePerPixelC[k] == 0) {
5785 DisplayPipeLineDeliveryTimeChroma = 0;
5786 } else {
5787 if (VRatioChroma[k] <= 1) {
5788 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5789 } else {
5790 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5791 }
5792 }
5793
5794 if (BytePerPixelC[k] > 0) {
5795 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
5796 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
5797 } else {
5798 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
5799 }
5800 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16);
5801
5802 }
5803
5804 for (k = 0; k < NumberOfActivePlanes; ++k) {
5805 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
5806 }
5807
5808 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth);
5809
5810 for (k = 0; k < NumberOfActivePlanes; ++k) {
5811 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]);
5812 }
5813 }
5814
5815 static void CalculateUrgentBurstFactor(
5816 int swath_width_luma_ub,
5817 int swath_width_chroma_ub,
5818 unsigned int SwathHeightY,
5819 unsigned int SwathHeightC,
5820 double LineTime,
5821 double UrgentLatency,
5822 double CursorBufferSize,
5823 unsigned int CursorWidth,
5824 unsigned int CursorBPP,
5825 double VRatio,
5826 double VRatioC,
5827 double BytePerPixelInDETY,
5828 double BytePerPixelInDETC,
5829 double DETBufferSizeY,
5830 double DETBufferSizeC,
5831 double *UrgentBurstFactorCursor,
5832 double *UrgentBurstFactorLuma,
5833 double *UrgentBurstFactorChroma,
5834 bool *NotEnoughUrgentLatencyHiding)
5835 {
5836 double LinesInDETLuma;
5837 double LinesInDETChroma;
5838 unsigned int LinesInCursorBuffer;
5839 double CursorBufferSizeInTime;
5840 double DETBufferSizeInTimeLuma;
5841 double DETBufferSizeInTimeChroma;
5842
5843 *NotEnoughUrgentLatencyHiding = 0;
5844
5845 if (CursorWidth > 0) {
5846 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0);
5847 if (VRatio > 0) {
5848 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio;
5849 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
5850 *NotEnoughUrgentLatencyHiding = 1;
5851 *UrgentBurstFactorCursor = 0;
5852 } else {
5853 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
5854 }
5855 } else {
5856 *UrgentBurstFactorCursor = 1;
5857 }
5858 }
5859
5860 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub;
5861 if (VRatio > 0) {
5862 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
5863 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
5864 *NotEnoughUrgentLatencyHiding = 1;
5865 *UrgentBurstFactorLuma = 0;
5866 } else {
5867 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
5868 }
5869 } else {
5870 *UrgentBurstFactorLuma = 1;
5871 }
5872
5873 if (BytePerPixelInDETC > 0) {
5874 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub;
5875 if (VRatio > 0) {
5876 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio;
5877 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
5878 *NotEnoughUrgentLatencyHiding = 1;
5879 *UrgentBurstFactorChroma = 0;
5880 } else {
5881 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
5882 }
5883 } else {
5884 *UrgentBurstFactorChroma = 1;
5885 }
5886 }
5887 }
5888
5889 static void CalculatePixelDeliveryTimes(
5890 unsigned int NumberOfActivePlanes,
5891 double VRatio[],
5892 double VRatioChroma[],
5893 double VRatioPrefetchY[],
5894 double VRatioPrefetchC[],
5895 unsigned int swath_width_luma_ub[],
5896 unsigned int swath_width_chroma_ub[],
5897 unsigned int DPPPerPlane[],
5898 double HRatio[],
5899 double HRatioChroma[],
5900 double PixelClock[],
5901 double PSCL_THROUGHPUT[],
5902 double PSCL_THROUGHPUT_CHROMA[],
5903 double DPPCLK[],
5904 int BytePerPixelC[],
5905 enum scan_direction_class SourceScan[],
5906 unsigned int NumberOfCursors[],
5907 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX],
5908 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX],
5909 unsigned int BlockWidth256BytesY[],
5910 unsigned int BlockHeight256BytesY[],
5911 unsigned int BlockWidth256BytesC[],
5912 unsigned int BlockHeight256BytesC[],
5913 double DisplayPipeLineDeliveryTimeLuma[],
5914 double DisplayPipeLineDeliveryTimeChroma[],
5915 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
5916 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
5917 double DisplayPipeRequestDeliveryTimeLuma[],
5918 double DisplayPipeRequestDeliveryTimeChroma[],
5919 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
5920 double DisplayPipeRequestDeliveryTimeChromaPrefetch[],
5921 double CursorRequestDeliveryTime[],
5922 double CursorRequestDeliveryTimePrefetch[])
5923 {
5924 double req_per_swath_ub;
5925 int k;
5926
5927 for (k = 0; k < NumberOfActivePlanes; ++k) {
5928 if (VRatio[k] <= 1) {
5929 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5930 } else {
5931 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5932 }
5933
5934 if (BytePerPixelC[k] == 0) {
5935 DisplayPipeLineDeliveryTimeChroma[k] = 0;
5936 } else {
5937 if (VRatioChroma[k] <= 1) {
5938 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5939 } else {
5940 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5941 }
5942 }
5943
5944 if (VRatioPrefetchY[k] <= 1) {
5945 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k];
5946 } else {
5947 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k];
5948 }
5949
5950 if (BytePerPixelC[k] == 0) {
5951 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
5952 } else {
5953 if (VRatioPrefetchC[k] <= 1) {
5954 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k];
5955 } else {
5956 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k];
5957 }
5958 }
5959 }
5960
5961 for (k = 0; k < NumberOfActivePlanes; ++k) {
5962 if (SourceScan[k] != dm_vert) {
5963 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k];
5964 } else {
5965 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k];
5966 }
5967 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub;
5968 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub;
5969 if (BytePerPixelC[k] == 0) {
5970 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
5971 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
5972 } else {
5973 if (SourceScan[k] != dm_vert) {
5974 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k];
5975 } else {
5976 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k];
5977 }
5978 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub;
5979 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub;
5980 }
5981 #ifdef __DML_VBA_DEBUG__
5982 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]);
5983 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]);
5984 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]);
5985 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]);
5986 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
5987 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
5988 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
5989 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
5990 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
5991 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
5992 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
5993 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
5994 #endif
5995 }
5996
5997 for (k = 0; k < NumberOfActivePlanes; ++k) {
5998 int cursor_req_per_width;
5999 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1);
6000 if (NumberOfCursors[k] > 0) {
6001 if (VRatio[k] <= 1) {
6002 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6003 } else {
6004 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6005 }
6006 if (VRatioPrefetchY[k] <= 1) {
6007 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width;
6008 } else {
6009 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width;
6010 }
6011 } else {
6012 CursorRequestDeliveryTime[k] = 0;
6013 CursorRequestDeliveryTimePrefetch[k] = 0;
6014 }
6015 #ifdef __DML_VBA_DEBUG__
6016 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]);
6017 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]);
6018 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]);
6019 #endif
6020 }
6021 }
6022
6023 static void CalculateMetaAndPTETimes(
6024 int NumberOfActivePlanes,
6025 bool GPUVMEnable,
6026 int MetaChunkSize,
6027 int MinMetaChunkSizeBytes,
6028 int HTotal[],
6029 double VRatio[],
6030 double VRatioChroma[],
6031 double DestinationLinesToRequestRowInVBlank[],
6032 double DestinationLinesToRequestRowInImmediateFlip[],
6033 bool DCCEnable[],
6034 double PixelClock[],
6035 int BytePerPixelY[],
6036 int BytePerPixelC[],
6037 enum scan_direction_class SourceScan[],
6038 int dpte_row_height[],
6039 int dpte_row_height_chroma[],
6040 int meta_row_width[],
6041 int meta_row_width_chroma[],
6042 int meta_row_height[],
6043 int meta_row_height_chroma[],
6044 int meta_req_width[],
6045 int meta_req_width_chroma[],
6046 int meta_req_height[],
6047 int meta_req_height_chroma[],
6048 int dpte_group_bytes[],
6049 int PTERequestSizeY[],
6050 int PTERequestSizeC[],
6051 int PixelPTEReqWidthY[],
6052 int PixelPTEReqHeightY[],
6053 int PixelPTEReqWidthC[],
6054 int PixelPTEReqHeightC[],
6055 int dpte_row_width_luma_ub[],
6056 int dpte_row_width_chroma_ub[],
6057 double DST_Y_PER_PTE_ROW_NOM_L[],
6058 double DST_Y_PER_PTE_ROW_NOM_C[],
6059 double DST_Y_PER_META_ROW_NOM_L[],
6060 double DST_Y_PER_META_ROW_NOM_C[],
6061 double TimePerMetaChunkNominal[],
6062 double TimePerChromaMetaChunkNominal[],
6063 double TimePerMetaChunkVBlank[],
6064 double TimePerChromaMetaChunkVBlank[],
6065 double TimePerMetaChunkFlip[],
6066 double TimePerChromaMetaChunkFlip[],
6067 double time_per_pte_group_nom_luma[],
6068 double time_per_pte_group_vblank_luma[],
6069 double time_per_pte_group_flip_luma[],
6070 double time_per_pte_group_nom_chroma[],
6071 double time_per_pte_group_vblank_chroma[],
6072 double time_per_pte_group_flip_chroma[])
6073 {
6074 unsigned int meta_chunk_width;
6075 unsigned int min_meta_chunk_width;
6076 unsigned int meta_chunk_per_row_int;
6077 unsigned int meta_row_remainder;
6078 unsigned int meta_chunk_threshold;
6079 unsigned int meta_chunks_per_row_ub;
6080 unsigned int meta_chunk_width_chroma;
6081 unsigned int min_meta_chunk_width_chroma;
6082 unsigned int meta_chunk_per_row_int_chroma;
6083 unsigned int meta_row_remainder_chroma;
6084 unsigned int meta_chunk_threshold_chroma;
6085 unsigned int meta_chunks_per_row_ub_chroma;
6086 unsigned int dpte_group_width_luma;
6087 unsigned int dpte_groups_per_row_luma_ub;
6088 unsigned int dpte_group_width_chroma;
6089 unsigned int dpte_groups_per_row_chroma_ub;
6090 int k;
6091
6092 for (k = 0; k < NumberOfActivePlanes; ++k) {
6093 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k];
6094 if (BytePerPixelC[k] == 0) {
6095 DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
6096 } else {
6097 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k];
6098 }
6099 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k];
6100 if (BytePerPixelC[k] == 0) {
6101 DST_Y_PER_META_ROW_NOM_C[k] = 0;
6102 } else {
6103 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k];
6104 }
6105 }
6106
6107 for (k = 0; k < NumberOfActivePlanes; ++k) {
6108 if (DCCEnable[k] == true) {
6109 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k];
6110 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k];
6111 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width;
6112 meta_row_remainder = meta_row_width[k] % meta_chunk_width;
6113 if (SourceScan[k] != dm_vert) {
6114 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k];
6115 } else {
6116 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k];
6117 }
6118 if (meta_row_remainder <= meta_chunk_threshold) {
6119 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
6120 } else {
6121 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
6122 }
6123 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6124 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6125 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub;
6126 if (BytePerPixelC[k] == 0) {
6127 TimePerChromaMetaChunkNominal[k] = 0;
6128 TimePerChromaMetaChunkVBlank[k] = 0;
6129 TimePerChromaMetaChunkFlip[k] = 0;
6130 } else {
6131 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6132 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k];
6133 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma;
6134 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma;
6135 if (SourceScan[k] != dm_vert) {
6136 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k];
6137 } else {
6138 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k];
6139 }
6140 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
6141 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
6142 } else {
6143 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
6144 }
6145 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6146 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6147 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma;
6148 }
6149 } else {
6150 TimePerMetaChunkNominal[k] = 0;
6151 TimePerMetaChunkVBlank[k] = 0;
6152 TimePerMetaChunkFlip[k] = 0;
6153 TimePerChromaMetaChunkNominal[k] = 0;
6154 TimePerChromaMetaChunkVBlank[k] = 0;
6155 TimePerChromaMetaChunkFlip[k] = 0;
6156 }
6157 }
6158
6159 for (k = 0; k < NumberOfActivePlanes; ++k) {
6160 if (GPUVMEnable == true) {
6161 if (SourceScan[k] != dm_vert) {
6162 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k];
6163 } else {
6164 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k];
6165 }
6166 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1);
6167 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6168 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6169 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub;
6170 if (BytePerPixelC[k] == 0) {
6171 time_per_pte_group_nom_chroma[k] = 0;
6172 time_per_pte_group_vblank_chroma[k] = 0;
6173 time_per_pte_group_flip_chroma[k] = 0;
6174 } else {
6175 if (SourceScan[k] != dm_vert) {
6176 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k];
6177 } else {
6178 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k];
6179 }
6180 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1);
6181 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6182 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6183 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub;
6184 }
6185 } else {
6186 time_per_pte_group_nom_luma[k] = 0;
6187 time_per_pte_group_vblank_luma[k] = 0;
6188 time_per_pte_group_flip_luma[k] = 0;
6189 time_per_pte_group_nom_chroma[k] = 0;
6190 time_per_pte_group_vblank_chroma[k] = 0;
6191 time_per_pte_group_flip_chroma[k] = 0;
6192 }
6193 }
6194 }
6195
6196 static void CalculateVMGroupAndRequestTimes(
6197 unsigned int NumberOfActivePlanes,
6198 bool GPUVMEnable,
6199 unsigned int GPUVMMaxPageTableLevels,
6200 unsigned int HTotal[],
6201 int BytePerPixelC[],
6202 double DestinationLinesToRequestVMInVBlank[],
6203 double DestinationLinesToRequestVMInImmediateFlip[],
6204 bool DCCEnable[],
6205 double PixelClock[],
6206 int dpte_row_width_luma_ub[],
6207 int dpte_row_width_chroma_ub[],
6208 int vm_group_bytes[],
6209 unsigned int dpde0_bytes_per_frame_ub_l[],
6210 unsigned int dpde0_bytes_per_frame_ub_c[],
6211 int meta_pte_bytes_per_frame_ub_l[],
6212 int meta_pte_bytes_per_frame_ub_c[],
6213 double TimePerVMGroupVBlank[],
6214 double TimePerVMGroupFlip[],
6215 double TimePerVMRequestVBlank[],
6216 double TimePerVMRequestFlip[])
6217 {
6218 int num_group_per_lower_vm_stage;
6219 int num_req_per_lower_vm_stage;
6220 int k;
6221
6222 for (k = 0; k < NumberOfActivePlanes; ++k) {
6223 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) {
6224 if (DCCEnable[k] == false) {
6225 if (BytePerPixelC[k] > 0) {
6226 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6227 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6228 } else {
6229 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6230 }
6231 } else {
6232 if (GPUVMMaxPageTableLevels == 1) {
6233 if (BytePerPixelC[k] > 0) {
6234 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6235 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6236 } else {
6237 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6238 }
6239 } else {
6240 if (BytePerPixelC[k] > 0) {
6241 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6242 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1)
6243 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6244 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
6245 } else {
6246 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1)
6247 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
6248 }
6249 }
6250 }
6251
6252 if (DCCEnable[k] == false) {
6253 if (BytePerPixelC[k] > 0) {
6254 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64;
6255 } else {
6256 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64;
6257 }
6258 } else {
6259 if (GPUVMMaxPageTableLevels == 1) {
6260 if (BytePerPixelC[k] > 0) {
6261 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6262 } else {
6263 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64;
6264 }
6265 } else {
6266 if (BytePerPixelC[k] > 0) {
6267 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64
6268 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64;
6269 } else {
6270 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64;
6271 }
6272 }
6273 }
6274
6275 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6276 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage;
6277 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6278 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage;
6279
6280 if (GPUVMMaxPageTableLevels > 2) {
6281 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
6282 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
6283 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
6284 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
6285 }
6286
6287 } else {
6288 TimePerVMGroupVBlank[k] = 0;
6289 TimePerVMGroupFlip[k] = 0;
6290 TimePerVMRequestVBlank[k] = 0;
6291 TimePerVMRequestFlip[k] = 0;
6292 }
6293 }
6294 }
6295
6296 static void CalculateStutterEfficiency(
6297 struct display_mode_lib *mode_lib,
6298 int CompressedBufferSizeInkByte,
6299 bool UnboundedRequestEnabled,
6300 int ConfigReturnBufferSizeInKByte,
6301 int MetaFIFOSizeInKEntries,
6302 int ZeroSizeBufferEntries,
6303 int NumberOfActivePlanes,
6304 int ROBBufferSizeInKByte,
6305 double TotalDataReadBandwidth,
6306 double DCFCLK,
6307 double ReturnBW,
6308 double COMPBUF_RESERVED_SPACE_64B,
6309 double COMPBUF_RESERVED_SPACE_ZS,
6310 double SRExitTime,
6311 double SRExitZ8Time,
6312 bool SynchronizedVBlank,
6313 double Z8StutterEnterPlusExitWatermark,
6314 double StutterEnterPlusExitWatermark,
6315 bool ProgressiveToInterlaceUnitInOPP,
6316 bool Interlace[],
6317 double MinTTUVBlank[],
6318 int DPPPerPlane[],
6319 unsigned int DETBufferSizeY[],
6320 int BytePerPixelY[],
6321 double BytePerPixelDETY[],
6322 double SwathWidthY[],
6323 int SwathHeightY[],
6324 int SwathHeightC[],
6325 double NetDCCRateLuma[],
6326 double NetDCCRateChroma[],
6327 double DCCFractionOfZeroSizeRequestsLuma[],
6328 double DCCFractionOfZeroSizeRequestsChroma[],
6329 int HTotal[],
6330 int VTotal[],
6331 double PixelClock[],
6332 double VRatio[],
6333 enum scan_direction_class SourceScan[],
6334 int BlockHeight256BytesY[],
6335 int BlockWidth256BytesY[],
6336 int BlockHeight256BytesC[],
6337 int BlockWidth256BytesC[],
6338 int DCCYMaxUncompressedBlock[],
6339 int DCCCMaxUncompressedBlock[],
6340 int VActive[],
6341 bool DCCEnable[],
6342 bool WritebackEnable[],
6343 double ReadBandwidthPlaneLuma[],
6344 double ReadBandwidthPlaneChroma[],
6345 double meta_row_bw[],
6346 double dpte_row_bw[],
6347 double *StutterEfficiencyNotIncludingVBlank,
6348 double *StutterEfficiency,
6349 int *NumberOfStutterBurstsPerFrame,
6350 double *Z8StutterEfficiencyNotIncludingVBlank,
6351 double *Z8StutterEfficiency,
6352 int *Z8NumberOfStutterBurstsPerFrame,
6353 double *StutterPeriod)
6354 {
6355 struct vba_vars_st *v = &mode_lib->vba;
6356
6357 double DETBufferingTimeY;
6358 double SwathWidthYCriticalPlane = 0;
6359 double VActiveTimeCriticalPlane = 0;
6360 double FrameTimeCriticalPlane = 0;
6361 int BytePerPixelYCriticalPlane = 0;
6362 double LinesToFinishSwathTransferStutterCriticalPlane = 0;
6363 double MinTTUVBlankCriticalPlane = 0;
6364 double TotalCompressedReadBandwidth;
6365 double TotalRowReadBandwidth;
6366 double AverageDCCCompressionRate;
6367 double EffectiveCompressedBufferSize;
6368 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer;
6369 double StutterBurstTime;
6370 int TotalActiveWriteback;
6371 double LinesInDETY;
6372 double LinesInDETYRoundedDownToSwath;
6373 double MaximumEffectiveCompressionLuma;
6374 double MaximumEffectiveCompressionChroma;
6375 double TotalZeroSizeRequestReadBandwidth;
6376 double TotalZeroSizeCompressedReadBandwidth;
6377 double AverageDCCZeroSizeFraction;
6378 double AverageZeroSizeCompressionRate;
6379 int TotalNumberOfActiveOTG = 0;
6380 double LastStutterPeriod = 0.0;
6381 double LastZ8StutterPeriod = 0.0;
6382 int k;
6383
6384 TotalZeroSizeRequestReadBandwidth = 0;
6385 TotalZeroSizeCompressedReadBandwidth = 0;
6386 TotalRowReadBandwidth = 0;
6387 TotalCompressedReadBandwidth = 0;
6388
6389 for (k = 0; k < NumberOfActivePlanes; ++k) {
6390 if (DCCEnable[k] == true) {
6391 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k])
6392 || DCCYMaxUncompressedBlock[k] < 256) {
6393 MaximumEffectiveCompressionLuma = 2;
6394 } else {
6395 MaximumEffectiveCompressionLuma = 4;
6396 }
6397 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma);
6398 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k];
6399 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6400 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma;
6401 if (ReadBandwidthPlaneChroma[k] > 0) {
6402 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k])
6403 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) {
6404 MaximumEffectiveCompressionChroma = 2;
6405 } else {
6406 MaximumEffectiveCompressionChroma = 4;
6407 }
6408 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth
6409 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma);
6410 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k];
6411 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth
6412 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma;
6413 }
6414 } else {
6415 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k];
6416 }
6417 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]);
6418 }
6419
6420 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth;
6421 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth;
6422
6423 #ifdef __DML_VBA_DEBUG__
6424 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth);
6425 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth);
6426 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth);
6427 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma);
6428 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma);
6429 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6430 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction);
6431 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte);
6432 #endif
6433
6434 if (AverageDCCZeroSizeFraction == 1) {
6435 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6436 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate;
6437 } else if (AverageDCCZeroSizeFraction > 0) {
6438 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth;
6439 EffectiveCompressedBufferSize = dml_min(
6440 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6441 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate))
6442 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate,
6443 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6444 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6445 dml_print(
6446 "DML::%s: min 2 = %f\n",
6447 __func__,
6448 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate));
6449 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate);
6450 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate));
6451 } else {
6452 EffectiveCompressedBufferSize = dml_min(
6453 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate,
6454 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate;
6455 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate);
6456 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate);
6457 }
6458
6459 #ifdef __DML_VBA_DEBUG__
6460 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries);
6461 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate);
6462 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6463 #endif
6464
6465 *StutterPeriod = 0;
6466 for (k = 0; k < NumberOfActivePlanes; ++k) {
6467 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth)
6468 / BytePerPixelDETY[k] / SwathWidthY[k];
6469 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]);
6470 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k];
6471 #ifdef __DML_VBA_DEBUG__
6472 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]);
6473 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]);
6474 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]);
6475 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]);
6476 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth);
6477 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY);
6478 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath);
6479 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]);
6480 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6481 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]);
6482 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY);
6483 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]);
6484 #endif
6485
6486 if (k == 0 || DETBufferingTimeY < *StutterPeriod) {
6487 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP;
6488
6489 *StutterPeriod = DETBufferingTimeY;
6490 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k];
6491 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k];
6492 BytePerPixelYCriticalPlane = BytePerPixelY[k];
6493 SwathWidthYCriticalPlane = SwathWidthY[k];
6494 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath);
6495 MinTTUVBlankCriticalPlane = MinTTUVBlank[k];
6496
6497 #ifdef __DML_VBA_DEBUG__
6498 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6499 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane);
6500 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane);
6501 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6502 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane);
6503 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane);
6504 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane);
6505 #endif
6506 }
6507 }
6508
6509 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize);
6510 #ifdef __DML_VBA_DEBUG__
6511 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte);
6512 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate);
6513 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth);
6514 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize);
6515 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize);
6516 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer);
6517 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW);
6518 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth);
6519 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth);
6520 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK);
6521 #endif
6522
6523 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW
6524 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)
6525 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW;
6526 #ifdef __DML_VBA_DEBUG__
6527 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW);
6528 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth));
6529 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64));
6530 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW);
6531 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6532 #endif
6533 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6534
6535 dml_print(
6536 "DML::%s: Time to finish residue swath=%f\n",
6537 __func__,
6538 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW);
6539
6540 TotalActiveWriteback = 0;
6541 for (k = 0; k < NumberOfActivePlanes; ++k) {
6542 if (WritebackEnable[k]) {
6543 TotalActiveWriteback = TotalActiveWriteback + 1;
6544 }
6545 }
6546
6547 if (TotalActiveWriteback == 0) {
6548 #ifdef __DML_VBA_DEBUG__
6549 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime);
6550 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time);
6551 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime);
6552 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6553 #endif
6554 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100;
6555 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100;
6556 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6557 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0);
6558 } else {
6559 *StutterEfficiencyNotIncludingVBlank = 0.;
6560 *Z8StutterEfficiencyNotIncludingVBlank = 0.;
6561 *NumberOfStutterBurstsPerFrame = 0;
6562 *Z8NumberOfStutterBurstsPerFrame = 0;
6563 }
6564 #ifdef __DML_VBA_DEBUG__
6565 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane);
6566 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6567 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank);
6568 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame);
6569 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6570 #endif
6571
6572 for (k = 0; k < NumberOfActivePlanes; ++k) {
6573 if (v->BlendingAndTiming[k] == k) {
6574 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
6575 }
6576 }
6577
6578 if (*StutterEfficiencyNotIncludingVBlank > 0) {
6579 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6580
6581 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) {
6582 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane
6583 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6584 } else {
6585 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank;
6586 }
6587 } else {
6588 *StutterEfficiency = 0;
6589 }
6590
6591 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) {
6592 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod;
6593 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) {
6594 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane
6595 / *StutterPeriod) / FrameTimeCriticalPlane) * 100;
6596 } else {
6597 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank;
6598 }
6599 } else {
6600 *Z8StutterEfficiency = 0.;
6601 }
6602
6603 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
6604 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark);
6605 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime);
6606 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod);
6607 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency);
6608 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency);
6609 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank);
6610 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame);
6611 }
6612
6613 static void CalculateSwathAndDETConfiguration(
6614 bool ForceSingleDPP,
6615 int NumberOfActivePlanes,
6616 bool DETSharedByAllDPP,
6617 unsigned int DETBufferSizeInKByteA[],
6618 double MaximumSwathWidthLuma[],
6619 double MaximumSwathWidthChroma[],
6620 enum scan_direction_class SourceScan[],
6621 enum source_format_class SourcePixelFormat[],
6622 enum dm_swizzle_mode SurfaceTiling[],
6623 int ViewportWidth[],
6624 int ViewportHeight[],
6625 int SurfaceWidthY[],
6626 int SurfaceWidthC[],
6627 int SurfaceHeightY[],
6628 int SurfaceHeightC[],
6629 int Read256BytesBlockHeightY[],
6630 int Read256BytesBlockHeightC[],
6631 int Read256BytesBlockWidthY[],
6632 int Read256BytesBlockWidthC[],
6633 enum odm_combine_mode ODMCombineEnabled[],
6634 int BlendingAndTiming[],
6635 int BytePerPixY[],
6636 int BytePerPixC[],
6637 double BytePerPixDETY[],
6638 double BytePerPixDETC[],
6639 int HActive[],
6640 double HRatio[],
6641 double HRatioChroma[],
6642 int DPPPerPlane[],
6643 int swath_width_luma_ub[],
6644 int swath_width_chroma_ub[],
6645 double SwathWidth[],
6646 double SwathWidthChroma[],
6647 int SwathHeightY[],
6648 int SwathHeightC[],
6649 unsigned int DETBufferSizeY[],
6650 unsigned int DETBufferSizeC[],
6651 bool ViewportSizeSupportPerPlane[],
6652 bool *ViewportSizeSupport)
6653 {
6654 int MaximumSwathHeightY[DC__NUM_DPP__MAX];
6655 int MaximumSwathHeightC[DC__NUM_DPP__MAX];
6656 int MinimumSwathHeightY;
6657 int MinimumSwathHeightC;
6658 int RoundedUpMaxSwathSizeBytesY;
6659 int RoundedUpMaxSwathSizeBytesC;
6660 int RoundedUpMinSwathSizeBytesY;
6661 int RoundedUpMinSwathSizeBytesC;
6662 int RoundedUpSwathSizeBytesY;
6663 int RoundedUpSwathSizeBytesC;
6664 double SwathWidthSingleDPP[DC__NUM_DPP__MAX];
6665 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX];
6666 int k;
6667
6668 CalculateSwathWidth(
6669 ForceSingleDPP,
6670 NumberOfActivePlanes,
6671 SourcePixelFormat,
6672 SourceScan,
6673 ViewportWidth,
6674 ViewportHeight,
6675 SurfaceWidthY,
6676 SurfaceWidthC,
6677 SurfaceHeightY,
6678 SurfaceHeightC,
6679 ODMCombineEnabled,
6680 BytePerPixY,
6681 BytePerPixC,
6682 Read256BytesBlockHeightY,
6683 Read256BytesBlockHeightC,
6684 Read256BytesBlockWidthY,
6685 Read256BytesBlockWidthC,
6686 BlendingAndTiming,
6687 HActive,
6688 HRatio,
6689 DPPPerPlane,
6690 SwathWidthSingleDPP,
6691 SwathWidthSingleDPPChroma,
6692 SwathWidth,
6693 SwathWidthChroma,
6694 MaximumSwathHeightY,
6695 MaximumSwathHeightC,
6696 swath_width_luma_ub,
6697 swath_width_chroma_ub);
6698
6699 *ViewportSizeSupport = true;
6700 for (k = 0; k < NumberOfActivePlanes; ++k) {
6701 unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k];
6702
6703 if (DETSharedByAllDPP && DPPPerPlane[k])
6704 DETBufferSizeInKByte /= DPPPerPlane[k];
6705 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16
6706 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) {
6707 if (SurfaceTiling[k] == dm_sw_linear
6708 || (SourcePixelFormat[k] == dm_444_64
6709 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x)
6710 && SourceScan[k] != dm_vert)) {
6711 MinimumSwathHeightY = MaximumSwathHeightY[k];
6712 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) {
6713 MinimumSwathHeightY = MaximumSwathHeightY[k];
6714 } else {
6715 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6716 }
6717 MinimumSwathHeightC = MaximumSwathHeightC[k];
6718 } else {
6719 if (SurfaceTiling[k] == dm_sw_linear) {
6720 MinimumSwathHeightY = MaximumSwathHeightY[k];
6721 MinimumSwathHeightC = MaximumSwathHeightC[k];
6722 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) {
6723 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6724 MinimumSwathHeightC = MaximumSwathHeightC[k];
6725 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) {
6726 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6727 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6728 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) {
6729 MinimumSwathHeightY = MaximumSwathHeightY[k];
6730 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6731 } else {
6732 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2;
6733 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2;
6734 }
6735 }
6736
6737 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k];
6738 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY;
6739 if (SourcePixelFormat[k] == dm_420_10) {
6740 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256);
6741 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256);
6742 }
6743 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k];
6744 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC;
6745 if (SourcePixelFormat[k] == dm_420_10) {
6746 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256);
6747 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256);
6748 }
6749
6750 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6751 SwathHeightY[k] = MaximumSwathHeightY[k];
6752 SwathHeightC[k] = MaximumSwathHeightC[k];
6753 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6754 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6755 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC
6756 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6757 SwathHeightY[k] = MinimumSwathHeightY;
6758 SwathHeightC[k] = MaximumSwathHeightC[k];
6759 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6760 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC;
6761 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC
6762 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) {
6763 SwathHeightY[k] = MaximumSwathHeightY[k];
6764 SwathHeightC[k] = MinimumSwathHeightC;
6765 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY;
6766 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6767 } else {
6768 SwathHeightY[k] = MinimumSwathHeightY;
6769 SwathHeightC[k] = MinimumSwathHeightC;
6770 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY;
6771 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC;
6772 }
6773 {
6774 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
6775 if (SwathHeightC[k] == 0) {
6776 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024;
6777 DETBufferSizeC[k] = 0;
6778 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) {
6779 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2;
6780 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2;
6781 } else {
6782 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024);
6783 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3;
6784 }
6785
6786 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k]
6787 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) {
6788 *ViewportSizeSupport = false;
6789 ViewportSizeSupportPerPlane[k] = false;
6790 } else {
6791 ViewportSizeSupportPerPlane[k] = true;
6792 }
6793 }
6794 }
6795 }
6796
6797 static void CalculateSwathWidth(
6798 bool ForceSingleDPP,
6799 int NumberOfActivePlanes,
6800 enum source_format_class SourcePixelFormat[],
6801 enum scan_direction_class SourceScan[],
6802 int ViewportWidth[],
6803 int ViewportHeight[],
6804 int SurfaceWidthY[],
6805 int SurfaceWidthC[],
6806 int SurfaceHeightY[],
6807 int SurfaceHeightC[],
6808 enum odm_combine_mode ODMCombineEnabled[],
6809 int BytePerPixY[],
6810 int BytePerPixC[],
6811 int Read256BytesBlockHeightY[],
6812 int Read256BytesBlockHeightC[],
6813 int Read256BytesBlockWidthY[],
6814 int Read256BytesBlockWidthC[],
6815 int BlendingAndTiming[],
6816 int HActive[],
6817 double HRatio[],
6818 int DPPPerPlane[],
6819 double SwathWidthSingleDPPY[],
6820 double SwathWidthSingleDPPC[],
6821 double SwathWidthY[],
6822 double SwathWidthC[],
6823 int MaximumSwathHeightY[],
6824 int MaximumSwathHeightC[],
6825 int swath_width_luma_ub[],
6826 int swath_width_chroma_ub[])
6827 {
6828 enum odm_combine_mode MainPlaneODMCombine;
6829 int j, k;
6830
6831 #ifdef __DML_VBA_DEBUG__
6832 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes);
6833 #endif
6834
6835 for (k = 0; k < NumberOfActivePlanes; ++k) {
6836 if (SourceScan[k] != dm_vert) {
6837 SwathWidthSingleDPPY[k] = ViewportWidth[k];
6838 } else {
6839 SwathWidthSingleDPPY[k] = ViewportHeight[k];
6840 }
6841
6842 #ifdef __DML_VBA_DEBUG__
6843 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]);
6844 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]);
6845 #endif
6846
6847 MainPlaneODMCombine = ODMCombineEnabled[k];
6848 for (j = 0; j < NumberOfActivePlanes; ++j) {
6849 if (BlendingAndTiming[k] == j) {
6850 MainPlaneODMCombine = ODMCombineEnabled[j];
6851 }
6852 }
6853
6854 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) {
6855 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k]));
6856 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) {
6857 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k]));
6858 } else if (DPPPerPlane[k] == 2) {
6859 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
6860 } else {
6861 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6862 }
6863
6864 #ifdef __DML_VBA_DEBUG__
6865 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]);
6866 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]);
6867 #endif
6868
6869 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) {
6870 SwathWidthC[k] = SwathWidthY[k] / 2;
6871 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
6872 } else {
6873 SwathWidthC[k] = SwathWidthY[k];
6874 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
6875 }
6876
6877 if (ForceSingleDPP == true) {
6878 SwathWidthY[k] = SwathWidthSingleDPPY[k];
6879 SwathWidthC[k] = SwathWidthSingleDPPC[k];
6880 }
6881 {
6882 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]);
6883 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]);
6884
6885 #ifdef __DML_VBA_DEBUG__
6886 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l);
6887 #endif
6888
6889 if (SourceScan[k] != dm_vert) {
6890 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
6891 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
6892 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]);
6893 if (BytePerPixC[k] > 0) {
6894 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]);
6895
6896 swath_width_chroma_ub[k] = dml_min(
6897 surface_width_ub_c,
6898 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]);
6899 } else {
6900 swath_width_chroma_ub[k] = 0;
6901 }
6902 } else {
6903 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
6904 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
6905 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]);
6906 if (BytePerPixC[k] > 0) {
6907 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]);
6908
6909 swath_width_chroma_ub[k] = dml_min(
6910 surface_height_ub_c,
6911 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]);
6912 } else {
6913 swath_width_chroma_ub[k] = 0;
6914 }
6915 }
6916 }
6917 }
6918 }
6919
6920 static double CalculateExtraLatency(
6921 int RoundTripPingLatencyCycles,
6922 int ReorderingBytes,
6923 double DCFCLK,
6924 int TotalNumberOfActiveDPP,
6925 int PixelChunkSizeInKByte,
6926 int TotalNumberOfDCCActiveDPP,
6927 int MetaChunkSize,
6928 double ReturnBW,
6929 bool GPUVMEnable,
6930 bool HostVMEnable,
6931 int NumberOfActivePlanes,
6932 int NumberOfDPP[],
6933 int dpte_group_bytes[],
6934 double HostVMInefficiencyFactor,
6935 double HostVMMinPageSize,
6936 int HostVMMaxNonCachedPageTableLevels)
6937 {
6938 double ExtraLatencyBytes;
6939 double ExtraLatency;
6940
6941 ExtraLatencyBytes = CalculateExtraLatencyBytes(
6942 ReorderingBytes,
6943 TotalNumberOfActiveDPP,
6944 PixelChunkSizeInKByte,
6945 TotalNumberOfDCCActiveDPP,
6946 MetaChunkSize,
6947 GPUVMEnable,
6948 HostVMEnable,
6949 NumberOfActivePlanes,
6950 NumberOfDPP,
6951 dpte_group_bytes,
6952 HostVMInefficiencyFactor,
6953 HostVMMinPageSize,
6954 HostVMMaxNonCachedPageTableLevels);
6955
6956 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW;
6957
6958 #ifdef __DML_VBA_DEBUG__
6959 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles);
6960 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
6961 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes);
6962 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
6963 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency);
6964 #endif
6965
6966 return ExtraLatency;
6967 }
6968
6969 static double CalculateExtraLatencyBytes(
6970 int ReorderingBytes,
6971 int TotalNumberOfActiveDPP,
6972 int PixelChunkSizeInKByte,
6973 int TotalNumberOfDCCActiveDPP,
6974 int MetaChunkSize,
6975 bool GPUVMEnable,
6976 bool HostVMEnable,
6977 int NumberOfActivePlanes,
6978 int NumberOfDPP[],
6979 int dpte_group_bytes[],
6980 double HostVMInefficiencyFactor,
6981 double HostVMMinPageSize,
6982 int HostVMMaxNonCachedPageTableLevels)
6983 {
6984 double ret;
6985 int HostVMDynamicLevels = 0, k;
6986
6987 if (GPUVMEnable == true && HostVMEnable == true) {
6988 if (HostVMMinPageSize < 2048) {
6989 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
6990 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) {
6991 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1);
6992 } else {
6993 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2);
6994 }
6995 } else {
6996 HostVMDynamicLevels = 0;
6997 }
6998
6999 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0;
7000
7001 if (GPUVMEnable == true) {
7002 for (k = 0; k < NumberOfActivePlanes; ++k) {
7003 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
7004 }
7005 }
7006 return ret;
7007 }
7008
7009 static double CalculateUrgentLatency(
7010 double UrgentLatencyPixelDataOnly,
7011 double UrgentLatencyPixelMixedWithVMData,
7012 double UrgentLatencyVMDataOnly,
7013 bool DoUrgentLatencyAdjustment,
7014 double UrgentLatencyAdjustmentFabricClockComponent,
7015 double UrgentLatencyAdjustmentFabricClockReference,
7016 double FabricClock)
7017 {
7018 double ret;
7019
7020 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
7021 if (DoUrgentLatencyAdjustment == true) {
7022 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
7023 }
7024 return ret;
7025 }
7026
7027 static noinline_for_stack void UseMinimumDCFCLK(
7028 struct display_mode_lib *mode_lib,
7029 int MaxPrefetchMode,
7030 int ReorderingBytes)
7031 {
7032 struct vba_vars_st *v = &mode_lib->vba;
7033 int dummy1, i, j, k;
7034 double NormalEfficiency, dummy2, dummy3;
7035 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
7036
7037 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
7038 for (i = 0; i < v->soc.num_states; ++i) {
7039 for (j = 0; j <= 1; ++j) {
7040 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
7041 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX];
7042 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX];
7043 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX];
7044 double MinimumTWait;
7045 double NonDPTEBandwidth;
7046 double DPTEBandwidth;
7047 double DCFCLKRequiredForAverageBandwidth;
7048 double ExtraLatencyBytes;
7049 double ExtraLatencyCycles;
7050 double DCFCLKRequiredForPeakBandwidth;
7051 int NoOfDPPState[DC__NUM_DPP__MAX];
7052 double MinimumTvmPlus2Tr0;
7053
7054 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
7055 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7056 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
7057 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
7058 }
7059
7060 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7061 NoOfDPPState[k] = v->NoOfDPP[i][j][k];
7062 }
7063
7064 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
7065 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
7066 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
7067 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
7068 DCFCLKRequiredForAverageBandwidth = dml_max3(
7069 v->ProjectedDCFCLKDeepSleep[i][j],
7070 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
7071 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
7072 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
7073
7074 ExtraLatencyBytes = CalculateExtraLatencyBytes(
7075 ReorderingBytes,
7076 v->TotalNumberOfActiveDPP[i][j],
7077 v->PixelChunkSizeInKByte,
7078 v->TotalNumberOfDCCActiveDPP[i][j],
7079 v->MetaChunkSize,
7080 v->GPUVMEnable,
7081 v->HostVMEnable,
7082 v->NumberOfActivePlanes,
7083 NoOfDPPState,
7084 v->dpte_group_bytes,
7085 1,
7086 v->HostVMMinPageSize,
7087 v->HostVMMaxNonCachedPageTableLevels);
7088 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
7089 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7090 double DCFCLKCyclesRequiredInPrefetch;
7091 double ExpectedPrefetchBWAcceleration;
7092 double PrefetchTime;
7093
7094 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
7095 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
7096 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
7097 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
7098 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
7099 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
7100 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
7101 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
7102 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
7103 DynamicMetadataVMExtraLatency[k] =
7104 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
7105 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
7106 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
7107 - v->UrgLatency[i]
7108 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
7109 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
7110 - DynamicMetadataVMExtraLatency[k];
7111
7112 if (PrefetchTime > 0) {
7113 double ExpectedVRatioPrefetch;
7114 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k]
7115 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
7116 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
7117 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
7118 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
7119 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
7120 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
7121 }
7122 } else {
7123 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7124 }
7125 if (v->DynamicMetadataEnable[k] == true) {
7126 double TSetupPipe;
7127 double TdmbfPipe;
7128 double TdmsksPipe;
7129 double TdmecPipe;
7130 double AllowedTimeForUrgentExtraLatency;
7131
7132 CalculateVupdateAndDynamicMetadataParameters(
7133 v->MaxInterDCNTileRepeaters,
7134 v->RequiredDPPCLK[i][j][k],
7135 v->RequiredDISPCLK[i][j],
7136 v->ProjectedDCFCLKDeepSleep[i][j],
7137 v->PixelClock[k],
7138 v->HTotal[k],
7139 v->VTotal[k] - v->VActive[k],
7140 v->DynamicMetadataTransmittedBytes[k],
7141 v->DynamicMetadataLinesBeforeActiveRequired[k],
7142 v->Interlace[k],
7143 v->ProgressiveToInterlaceUnitInOPP,
7144 &TSetupPipe,
7145 &TdmbfPipe,
7146 &TdmecPipe,
7147 &TdmsksPipe,
7148 &dummy1,
7149 &dummy2,
7150 &dummy3);
7151 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
7152 - TdmsksPipe - DynamicMetadataVMExtraLatency[k];
7153 if (AllowedTimeForUrgentExtraLatency > 0) {
7154 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
7155 DCFCLKRequiredForPeakBandwidthPerPlane[k],
7156 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
7157 } else {
7158 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
7159 }
7160 }
7161 }
7162 DCFCLKRequiredForPeakBandwidth = 0;
7163 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
7164 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
7165 }
7166 MinimumTvmPlus2Tr0 = v->UrgLatency[i]
7167 * (v->GPUVMEnable == true ?
7168 (v->HostVMEnable == true ?
7169 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
7170 0);
7171 for (k = 0; k < v->NumberOfActivePlanes; ++k) {
7172 double MaximumTvmPlus2Tr0PlusTsw;
7173 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
7174 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
7175 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
7176 } else {
7177 DCFCLKRequiredForPeakBandwidth = dml_max3(
7178 DCFCLKRequiredForPeakBandwidth,
7179 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4),
7180 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
7181 }
7182 }
7183 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
7184 }
7185 }
7186 }
7187
7188 static void CalculateUnboundedRequestAndCompressedBufferSize(
7189 unsigned int DETBufferSizeInKByte,
7190 int ConfigReturnBufferSizeInKByte,
7191 enum unbounded_requesting_policy UseUnboundedRequestingFinal,
7192 int TotalActiveDPP,
7193 bool NoChromaPlanes,
7194 int MaxNumDPP,
7195 int CompressedBufferSegmentSizeInkByteFinal,
7196 enum output_encoder_class *Output,
7197 bool *UnboundedRequestEnabled,
7198 int *CompressedBufferSizeInkByte)
7199 {
7200 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64);
7201
7202 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]);
7203 *CompressedBufferSizeInkByte = (
7204 *UnboundedRequestEnabled == true ?
7205 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte :
7206 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte);
7207 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64;
7208
7209 #ifdef __DML_VBA_DEBUG__
7210 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP);
7211 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte);
7212 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte);
7213 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal);
7214 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte);
7215 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled);
7216 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte);
7217 #endif
7218 }
7219
7220 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output)
7221 {
7222 bool ret_val = false;
7223
7224 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma);
7225 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) {
7226 ret_val = false;
7227 }
7228 return (ret_val);
7229 }
7230
7231