/* * Copyright 2017 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: AMD * */ #include "dc.h" #include "../display_mode_lib.h" #include "../dcn30/display_mode_vba_30.h" #include "display_mode_vba_31.h" #include "../dml_inline_defs.h" /* * NOTE: * This file is gcc-parsable HW gospel, coming straight from HW engineers. * * It doesn't adhere to Linux kernel style and sometimes will do things in odd * ways. Unless there is something clearly wrong with it the code should * remain as-is as it provides us with a guarantee from HW that it is correct. */ #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff #define DCN31_MAX_DSC_IMAGE_WIDTH 5184 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096 #define DCN3_15_MIN_COMPBUF_SIZE_KB 128 #define DCN3_15_MAX_DET_SIZE 384 // For DML-C changes that hasn't been propagated to VBA yet //#define __DML_VBA_ALLOW_DELTA__ // Move these to ip paramaters/constant // At which vstartup the DML start to try if the mode can be supported #define __DML_VBA_MIN_VSTARTUP__ 9 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) #define __DML_ARB_TO_RET_DELAY__ (7 + 95) // fudge factor for min dcfclk calclation #define __DML_MIN_DCFCLK_FACTOR__ 1.15 typedef struct { double DPPCLK; double DISPCLK; double PixelClock; double DCFCLKDeepSleep; unsigned int DPPPerPlane; bool ScalerEnabled; double VRatio; double VRatioChroma; enum scan_direction_class SourceScan; unsigned int BlockWidth256BytesY; unsigned int BlockHeight256BytesY; unsigned int BlockWidth256BytesC; unsigned int BlockHeight256BytesC; unsigned int InterlaceEnable; unsigned int NumberOfCursors; unsigned int VBlank; unsigned int HTotal; unsigned int DCCEnable; bool ODMCombineIsEnabled; enum source_format_class SourcePixelFormat; int BytePerPixelY; int BytePerPixelC; bool ProgressiveToInterlaceUnitInOPP; } Pipe; #define BPP_INVALID 0 #define BPP_BLENDED_PIPE 0xffffffff static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); static unsigned int dscceComputeDelay( unsigned int bpc, double BPP, unsigned int sliceWidth, unsigned int numSlices, enum output_format_class pixelFormat, enum output_encoder_class Output); static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); static bool CalculatePrefetchSchedule( struct display_mode_lib *mode_lib, double HostVMInefficiencyFactor, Pipe *myPipe, unsigned int DSCDelay, double DPPCLKDelaySubtotalPlusCNVCFormater, double DPPCLKDelaySCL, double DPPCLKDelaySCLLBOnly, double DPPCLKDelayCNVCCursor, double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, enum output_format_class OutputFormat, unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, unsigned int GPUVMPageTableLevels, bool GPUVMEnable, bool HostVMEnable, unsigned int HostVMMaxNonCachedPageTableLevels, double HostVMMinPageSize, bool DynamicMetadataEnable, bool DynamicMetadataVMEnabled, int DynamicMetadataLinesBeforeActiveRequired, unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, unsigned int PDEAndMetaPTEBytesFrame, unsigned int MetaRowByte, unsigned int PixelPTEBytesPerRow, double PrefetchSourceLinesY, unsigned int SwathWidthY, double VInitPreFillY, unsigned int MaxNumSwathY, double PrefetchSourceLinesC, unsigned int SwathWidthC, double VInitPreFillC, unsigned int MaxNumSwathC, int swath_width_luma_ub, int swath_width_chroma_ub, unsigned int SwathHeightY, unsigned int SwathHeightC, double TWait, double *DSTXAfterScaler, double *DSTYAfterScaler, double *DestinationLinesForPrefetch, double *PrefetchBandwidth, double *DestinationLinesToRequestVMInVBlank, double *DestinationLinesToRequestRowInVBlank, double *VRatioPrefetchY, double *VRatioPrefetchC, double *RequiredPrefetchPixDataBWLuma, double *RequiredPrefetchPixDataBWChroma, bool *NotEnoughTimeForDynamicMetadata, double *Tno_bw, double *prefetch_vmrow_bw, double *Tdmdl_vm, double *Tdmdl, double *TSetup, int *VUpdateOffsetPix, double *VUpdateWidthPix, double *VReadyOffsetPix); static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); static void CalculateDCCConfiguration( bool DCCEnabled, bool DCCProgrammingAssumesScanDirectionUnknown, enum source_format_class SourcePixelFormat, unsigned int SurfaceWidthLuma, unsigned int SurfaceWidthChroma, unsigned int SurfaceHeightLuma, unsigned int SurfaceHeightChroma, double DETBufferSize, unsigned int RequestHeight256ByteLuma, unsigned int RequestHeight256ByteChroma, enum dm_swizzle_mode TilingFormat, unsigned int BytePerPixelY, unsigned int BytePerPixelC, double BytePerPixelDETY, double BytePerPixelDETC, enum scan_direction_class ScanOrientation, unsigned int *MaxUncompressedBlockLuma, unsigned int *MaxUncompressedBlockChroma, unsigned int *MaxCompressedBlockLuma, unsigned int *MaxCompressedBlockChroma, unsigned int *IndependentBlockLuma, unsigned int *IndependentBlockChroma); static double CalculatePrefetchSourceLines( struct display_mode_lib *mode_lib, double VRatio, double vtaps, bool Interlace, bool ProgressiveToInterlaceUnitInOPP, unsigned int SwathHeight, unsigned int ViewportYStart, double *VInitPreFill, unsigned int *MaxNumSwath); static unsigned int CalculateVMAndRowBytes( struct display_mode_lib *mode_lib, bool DCCEnable, unsigned int BlockHeight256Bytes, unsigned int BlockWidth256Bytes, enum source_format_class SourcePixelFormat, unsigned int SurfaceTiling, unsigned int BytePerPixel, enum scan_direction_class ScanDirection, unsigned int SwathWidth, unsigned int ViewportHeight, bool GPUVMEnable, bool HostVMEnable, unsigned int HostVMMaxNonCachedPageTableLevels, unsigned int GPUVMMinPageSize, unsigned int HostVMMinPageSize, unsigned int PTEBufferSizeInRequests, unsigned int Pitch, unsigned int DCCMetaPitch, unsigned int *MacroTileWidth, unsigned int *MetaRowByte, unsigned int *PixelPTEBytesPerRow, bool *PTEBufferSizeNotExceeded, int *dpte_row_width_ub, unsigned int *dpte_row_height, unsigned int *MetaRequestWidth, unsigned int *MetaRequestHeight, unsigned int *meta_row_width, unsigned int *meta_row_height, int *vm_group_bytes, unsigned int *dpte_group_bytes, unsigned int *PixelPTEReqWidth, unsigned int *PixelPTEReqHeight, unsigned int *PTERequestSize, int *DPDE0BytesFrame, int *MetaPTEBytesFrame); static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); static void CalculateRowBandwidth( bool GPUVMEnable, enum source_format_class SourcePixelFormat, double VRatio, double VRatioChroma, bool DCCEnable, double LineTime, unsigned int MetaRowByteLuma, unsigned int MetaRowByteChroma, unsigned int meta_row_height_luma, unsigned int meta_row_height_chroma, unsigned int PixelPTEBytesPerRowLuma, unsigned int PixelPTEBytesPerRowChroma, unsigned int dpte_row_height_luma, unsigned int dpte_row_height_chroma, double *meta_row_bw, double *dpte_row_bw); static void CalculateFlipSchedule( struct display_mode_lib *mode_lib, unsigned int k, double HostVMInefficiencyFactor, double UrgentExtraLatency, double UrgentLatency, double PDEAndMetaPTEBytesPerFrame, double MetaRowBytes, double DPTEBytesPerRow); static double CalculateWriteBackDelay( enum source_format_class WritebackPixelFormat, double WritebackHRatio, double WritebackVRatio, unsigned int WritebackVTaps, int WritebackDestinationWidth, int WritebackDestinationHeight, int WritebackSourceHeight, unsigned int HTotal); static void CalculateVupdateAndDynamicMetadataParameters( int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK, double DCFClkDeepSleep, double PixelClock, int HTotal, int VBlank, int DynamicMetadataTransmittedBytes, int DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP, double *TSetup, double *Tdmbf, double *Tdmec, double *Tdmsks, int *VUpdateOffsetPix, double *VUpdateWidthPix, double *VReadyOffsetPix); static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, double DCFCLK, double ReturnBW, double UrgentLatency, double ExtraLatency, double SOCCLK, double DCFCLKDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], double SwathWidthY[], double SwathWidthC[], unsigned int DPPPerPlane[], double BytePerPixelDETY[], double BytePerPixelDETC[], bool UnboundedRequestEnabled, int unsigned CompressedBufferSizeInkByte, enum clock_change_support *DRAMClockChangeSupport, double *StutterExitWatermark, double *StutterEnterPlusExitWatermark, double *Z8StutterExitWatermark, double *Z8StutterEnterPlusExitWatermark); static void CalculateDCFCLKDeepSleep( struct display_mode_lib *mode_lib, unsigned int NumberOfActivePlanes, int BytePerPixelY[], int BytePerPixelC[], double VRatio[], double VRatioChroma[], double SwathWidthY[], double SwathWidthC[], unsigned int DPPPerPlane[], double HRatio[], double HRatioChroma[], double PixelClock[], double PSCL_THROUGHPUT[], double PSCL_THROUGHPUT_CHROMA[], double DPPCLK[], double ReadBandwidthLuma[], double ReadBandwidthChroma[], int ReturnBusWidth, double *DCFCLKDeepSleep); static void CalculateUrgentBurstFactor( int swath_width_luma_ub, int swath_width_chroma_ub, unsigned int SwathHeightY, unsigned int SwathHeightC, double LineTime, double UrgentLatency, double CursorBufferSize, unsigned int CursorWidth, unsigned int CursorBPP, double VRatio, double VRatioC, double BytePerPixelInDETY, double BytePerPixelInDETC, double DETBufferSizeY, double DETBufferSizeC, double *UrgentBurstFactorCursor, double *UrgentBurstFactorLuma, double *UrgentBurstFactorChroma, bool *NotEnoughUrgentLatencyHiding); static void UseMinimumDCFCLK( struct display_mode_lib *mode_lib, int MaxPrefetchMode, int ReorderingBytes); static void CalculatePixelDeliveryTimes( unsigned int NumberOfActivePlanes, double VRatio[], double VRatioChroma[], double VRatioPrefetchY[], double VRatioPrefetchC[], unsigned int swath_width_luma_ub[], unsigned int swath_width_chroma_ub[], unsigned int DPPPerPlane[], double HRatio[], double HRatioChroma[], double PixelClock[], double PSCL_THROUGHPUT[], double PSCL_THROUGHPUT_CHROMA[], double DPPCLK[], int BytePerPixelC[], enum scan_direction_class SourceScan[], unsigned int NumberOfCursors[], unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], unsigned int BlockWidth256BytesY[], unsigned int BlockHeight256BytesY[], unsigned int BlockWidth256BytesC[], unsigned int BlockHeight256BytesC[], double DisplayPipeLineDeliveryTimeLuma[], double DisplayPipeLineDeliveryTimeChroma[], double DisplayPipeLineDeliveryTimeLumaPrefetch[], double DisplayPipeLineDeliveryTimeChromaPrefetch[], double DisplayPipeRequestDeliveryTimeLuma[], double DisplayPipeRequestDeliveryTimeChroma[], double DisplayPipeRequestDeliveryTimeLumaPrefetch[], double DisplayPipeRequestDeliveryTimeChromaPrefetch[], double CursorRequestDeliveryTime[], double CursorRequestDeliveryTimePrefetch[]); static void CalculateMetaAndPTETimes( int NumberOfActivePlanes, bool GPUVMEnable, int MetaChunkSize, int MinMetaChunkSizeBytes, int HTotal[], double VRatio[], double VRatioChroma[], double DestinationLinesToRequestRowInVBlank[], double DestinationLinesToRequestRowInImmediateFlip[], bool DCCEnable[], double PixelClock[], int BytePerPixelY[], int BytePerPixelC[], enum scan_direction_class SourceScan[], int dpte_row_height[], int dpte_row_height_chroma[], int meta_row_width[], int meta_row_width_chroma[], int meta_row_height[], int meta_row_height_chroma[], int meta_req_width[], int meta_req_width_chroma[], int meta_req_height[], int meta_req_height_chroma[], int dpte_group_bytes[], int PTERequestSizeY[], int PTERequestSizeC[], int PixelPTEReqWidthY[], int PixelPTEReqHeightY[], int PixelPTEReqWidthC[], int PixelPTEReqHeightC[], int dpte_row_width_luma_ub[], int dpte_row_width_chroma_ub[], double DST_Y_PER_PTE_ROW_NOM_L[], double DST_Y_PER_PTE_ROW_NOM_C[], double DST_Y_PER_META_ROW_NOM_L[], double DST_Y_PER_META_ROW_NOM_C[], double TimePerMetaChunkNominal[], double TimePerChromaMetaChunkNominal[], double TimePerMetaChunkVBlank[], double TimePerChromaMetaChunkVBlank[], double TimePerMetaChunkFlip[], double TimePerChromaMetaChunkFlip[], double time_per_pte_group_nom_luma[], double time_per_pte_group_vblank_luma[], double time_per_pte_group_flip_luma[], double time_per_pte_group_nom_chroma[], double time_per_pte_group_vblank_chroma[], double time_per_pte_group_flip_chroma[]); static void CalculateVMGroupAndRequestTimes( unsigned int NumberOfActivePlanes, bool GPUVMEnable, unsigned int GPUVMMaxPageTableLevels, unsigned int HTotal[], int BytePerPixelC[], double DestinationLinesToRequestVMInVBlank[], double DestinationLinesToRequestVMInImmediateFlip[], bool DCCEnable[], double PixelClock[], int dpte_row_width_luma_ub[], int dpte_row_width_chroma_ub[], int vm_group_bytes[], unsigned int dpde0_bytes_per_frame_ub_l[], unsigned int dpde0_bytes_per_frame_ub_c[], int meta_pte_bytes_per_frame_ub_l[], int meta_pte_bytes_per_frame_ub_c[], double TimePerVMGroupVBlank[], double TimePerVMGroupFlip[], double TimePerVMRequestVBlank[], double TimePerVMRequestFlip[]); static void CalculateStutterEfficiency( struct display_mode_lib *mode_lib, int CompressedBufferSizeInkByte, bool UnboundedRequestEnabled, int ConfigReturnBufferSizeInKByte, int MetaFIFOSizeInKEntries, int ZeroSizeBufferEntries, int NumberOfActivePlanes, int ROBBufferSizeInKByte, double TotalDataReadBandwidth, double DCFCLK, double ReturnBW, double COMPBUF_RESERVED_SPACE_64B, double COMPBUF_RESERVED_SPACE_ZS, double SRExitTime, double SRExitZ8Time, bool SynchronizedVBlank, double Z8StutterEnterPlusExitWatermark, double StutterEnterPlusExitWatermark, bool ProgressiveToInterlaceUnitInOPP, bool Interlace[], double MinTTUVBlank[], int DPPPerPlane[], unsigned int DETBufferSizeY[], int BytePerPixelY[], double BytePerPixelDETY[], double SwathWidthY[], int SwathHeightY[], int SwathHeightC[], double NetDCCRateLuma[], double NetDCCRateChroma[], double DCCFractionOfZeroSizeRequestsLuma[], double DCCFractionOfZeroSizeRequestsChroma[], int HTotal[], int VTotal[], double PixelClock[], double VRatio[], enum scan_direction_class SourceScan[], int BlockHeight256BytesY[], int BlockWidth256BytesY[], int BlockHeight256BytesC[], int BlockWidth256BytesC[], int DCCYMaxUncompressedBlock[], int DCCCMaxUncompressedBlock[], int VActive[], bool DCCEnable[], bool WritebackEnable[], double ReadBandwidthPlaneLuma[], double ReadBandwidthPlaneChroma[], double meta_row_bw[], double dpte_row_bw[], double *StutterEfficiencyNotIncludingVBlank, double *StutterEfficiency, int *NumberOfStutterBurstsPerFrame, double *Z8StutterEfficiencyNotIncludingVBlank, double *Z8StutterEfficiency, int *Z8NumberOfStutterBurstsPerFrame, double *StutterPeriod); static void CalculateSwathAndDETConfiguration( bool ForceSingleDPP, int NumberOfActivePlanes, bool DETSharedByAllDPP, unsigned int DETBufferSizeInKByte[], double MaximumSwathWidthLuma[], double MaximumSwathWidthChroma[], enum scan_direction_class SourceScan[], enum source_format_class SourcePixelFormat[], enum dm_swizzle_mode SurfaceTiling[], int ViewportWidth[], int ViewportHeight[], int SurfaceWidthY[], int SurfaceWidthC[], int SurfaceHeightY[], int SurfaceHeightC[], int Read256BytesBlockHeightY[], int Read256BytesBlockHeightC[], int Read256BytesBlockWidthY[], int Read256BytesBlockWidthC[], enum odm_combine_mode ODMCombineEnabled[], int BlendingAndTiming[], int BytePerPixY[], int BytePerPixC[], double BytePerPixDETY[], double BytePerPixDETC[], int HActive[], double HRatio[], double HRatioChroma[], int DPPPerPlane[], int swath_width_luma_ub[], int swath_width_chroma_ub[], double SwathWidth[], double SwathWidthChroma[], int SwathHeightY[], int SwathHeightC[], unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], bool ViewportSizeSupportPerPlane[], bool *ViewportSizeSupport); static void CalculateSwathWidth( bool ForceSingleDPP, int NumberOfActivePlanes, enum source_format_class SourcePixelFormat[], enum scan_direction_class SourceScan[], int ViewportWidth[], int ViewportHeight[], int SurfaceWidthY[], int SurfaceWidthC[], int SurfaceHeightY[], int SurfaceHeightC[], enum odm_combine_mode ODMCombineEnabled[], int BytePerPixY[], int BytePerPixC[], int Read256BytesBlockHeightY[], int Read256BytesBlockHeightC[], int Read256BytesBlockWidthY[], int Read256BytesBlockWidthC[], int BlendingAndTiming[], int HActive[], double HRatio[], int DPPPerPlane[], double SwathWidthSingleDPPY[], double SwathWidthSingleDPPC[], double SwathWidthY[], double SwathWidthC[], int MaximumSwathHeightY[], int MaximumSwathHeightC[], int swath_width_luma_ub[], int swath_width_chroma_ub[]); static double CalculateExtraLatency( int RoundTripPingLatencyCycles, int ReorderingBytes, double DCFCLK, int TotalNumberOfActiveDPP, int PixelChunkSizeInKByte, int TotalNumberOfDCCActiveDPP, int MetaChunkSize, double ReturnBW, bool GPUVMEnable, bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], int dpte_group_bytes[], double HostVMInefficiencyFactor, double HostVMMinPageSize, int HostVMMaxNonCachedPageTableLevels); static double CalculateExtraLatencyBytes( int ReorderingBytes, int TotalNumberOfActiveDPP, int PixelChunkSizeInKByte, int TotalNumberOfDCCActiveDPP, int MetaChunkSize, bool GPUVMEnable, bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], int dpte_group_bytes[], double HostVMInefficiencyFactor, double HostVMMinPageSize, int HostVMMaxNonCachedPageTableLevels); static double CalculateUrgentLatency( double UrgentLatencyPixelDataOnly, double UrgentLatencyPixelMixedWithVMData, double UrgentLatencyVMDataOnly, bool DoUrgentLatencyAdjustment, double UrgentLatencyAdjustmentFabricClockComponent, double UrgentLatencyAdjustmentFabricClockReference, double FabricClockSingle); static void CalculateUnboundedRequestAndCompressedBufferSize( unsigned int DETBufferSizeInKByte, int ConfigReturnBufferSizeInKByte, enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalActiveDPP, bool NoChromaPlanes, int MaxNumDPP, int CompressedBufferSegmentSizeInkByteFinal, enum output_encoder_class *Output, bool *UnboundedRequestEnabled, int *CompressedBufferSizeInkByte); static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); void dml31_recalculate(struct display_mode_lib *mode_lib) { ModeSupportAndSystemConfiguration(mode_lib); PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); DisplayPipeConfiguration(mode_lib); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); #endif DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); } static unsigned int dscceComputeDelay( unsigned int bpc, double BPP, unsigned int sliceWidth, unsigned int numSlices, enum output_format_class pixelFormat, enum output_encoder_class Output) { // valid bpc = source bits per component in the set of {8, 10, 12} // valid bpp = increments of 1/16 of a bit // min = 6/7/8 in N420/N422/444, respectively // max = such that compression is 1:1 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} // fixed value unsigned int rcModelSize = 8192; // N422/N420 operate at 2 pixels per clock unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; if (pixelFormat == dm_420) pixelsPerClock = 2; else if (pixelFormat == dm_444) pixelsPerClock = 1; else if (pixelFormat == dm_n422) pixelsPerClock = 2; // #all other modes operate at 1 pixel per clock else pixelsPerClock = 1; //initial transmit delay as per PPS initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); //compute ssm delay if (bpc == 8) D = 81; else if (bpc == 10) D = 89; else D = 113; //divide by pixel per cycle to compute slice width as seen by DSC w = sliceWidth / pixelsPerClock; //422 mode has an additional cycle of delay if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) s = 0; else s = 1; //main calculation for the dscce ix = initalXmitDelay + 45; wx = (w + 2) / 3; P = 3 * wx - w; l0 = ix / w; a = ix + P * l0; ax = (a + 2) / 3 + D + 6 + 1; L = (ax + wx - 1) / wx; if ((ix % w) == 0 && P != 0) lstall = 1; else lstall = 0; Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels pixels = Delay * 3 * pixelsPerClock; return pixels; } static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) { unsigned int Delay = 0; if (pixelFormat == dm_420) { // sfr Delay = Delay + 2; // dsccif Delay = Delay + 0; // dscc - input deserializer Delay = Delay + 3; // dscc gets pixels every other cycle Delay = Delay + 2; // dscc - input cdc fifo Delay = Delay + 12; // dscc gets pixels every other cycle Delay = Delay + 13; // dscc - cdc uncertainty Delay = Delay + 2; // dscc - output cdc fifo Delay = Delay + 7; // dscc gets pixels every other cycle Delay = Delay + 3; // dscc - cdc uncertainty Delay = Delay + 2; // dscc - output serializer Delay = Delay + 1; // sft Delay = Delay + 1; } else if (pixelFormat == dm_n422) { // sfr Delay = Delay + 2; // dsccif Delay = Delay + 1; // dscc - input deserializer Delay = Delay + 5; // dscc - input cdc fifo Delay = Delay + 25; // dscc - cdc uncertainty Delay = Delay + 2; // dscc - output cdc fifo Delay = Delay + 10; // dscc - cdc uncertainty Delay = Delay + 2; // dscc - output serializer Delay = Delay + 1; // sft Delay = Delay + 1; } else { // sfr Delay = Delay + 2; // dsccif Delay = Delay + 0; // dscc - input deserializer Delay = Delay + 3; // dscc - input cdc fifo Delay = Delay + 12; // dscc - cdc uncertainty Delay = Delay + 2; // dscc - output cdc fifo Delay = Delay + 7; // dscc - output serializer Delay = Delay + 1; // dscc - cdc uncertainty Delay = Delay + 2; // sft Delay = Delay + 1; } return Delay; } static bool CalculatePrefetchSchedule( struct display_mode_lib *mode_lib, double HostVMInefficiencyFactor, Pipe *myPipe, unsigned int DSCDelay, double DPPCLKDelaySubtotalPlusCNVCFormater, double DPPCLKDelaySCL, double DPPCLKDelaySCLLBOnly, double DPPCLKDelayCNVCCursor, double DISPCLKDelaySubtotal, unsigned int DPP_RECOUT_WIDTH, enum output_format_class OutputFormat, unsigned int MaxInterDCNTileRepeaters, unsigned int VStartup, unsigned int MaxVStartup, unsigned int GPUVMPageTableLevels, bool GPUVMEnable, bool HostVMEnable, unsigned int HostVMMaxNonCachedPageTableLevels, double HostVMMinPageSize, bool DynamicMetadataEnable, bool DynamicMetadataVMEnabled, int DynamicMetadataLinesBeforeActiveRequired, unsigned int DynamicMetadataTransmittedBytes, double UrgentLatency, double UrgentExtraLatency, double TCalc, unsigned int PDEAndMetaPTEBytesFrame, unsigned int MetaRowByte, unsigned int PixelPTEBytesPerRow, double PrefetchSourceLinesY, unsigned int SwathWidthY, double VInitPreFillY, unsigned int MaxNumSwathY, double PrefetchSourceLinesC, unsigned int SwathWidthC, double VInitPreFillC, unsigned int MaxNumSwathC, int swath_width_luma_ub, int swath_width_chroma_ub, unsigned int SwathHeightY, unsigned int SwathHeightC, double TWait, double *DSTXAfterScaler, double *DSTYAfterScaler, double *DestinationLinesForPrefetch, double *PrefetchBandwidth, double *DestinationLinesToRequestVMInVBlank, double *DestinationLinesToRequestRowInVBlank, double *VRatioPrefetchY, double *VRatioPrefetchC, double *RequiredPrefetchPixDataBWLuma, double *RequiredPrefetchPixDataBWChroma, bool *NotEnoughTimeForDynamicMetadata, double *Tno_bw, double *prefetch_vmrow_bw, double *Tdmdl_vm, double *Tdmdl, double *TSetup, int *VUpdateOffsetPix, double *VUpdateWidthPix, double *VReadyOffsetPix) { bool MyError = false; unsigned int DPPCycles, DISPCLKCycles; double DSTTotalPixelsAfterScaler; double LineTime; double dst_y_prefetch_equ; #ifdef __DML_VBA_DEBUG__ double Tsw_oto; #endif double prefetch_bw_oto; double prefetch_bw_pr; double Tvm_oto; double Tr0_oto; double Tvm_oto_lines; double Tr0_oto_lines; double dst_y_prefetch_oto; double TimeForFetchingMetaPTE = 0; double TimeForFetchingRowInVBlank = 0; double LinesToRequestPrefetchPixelData = 0; unsigned int HostVMDynamicLevelsTrips; double trip_to_mem; double Tvm_trips; double Tr0_trips; double Tvm_trips_rounded; double Tr0_trips_rounded; double Lsw_oto; double Tpre_rounded; double prefetch_bw_equ; double Tvm_equ; double Tr0_equ; double Tdmbf; double Tdmec; double Tdmsks; double prefetch_sw_bytes; double bytes_pp; double dep_bytes; int max_vratio_pre = 4; double min_Lsw; double Tsw_est1 = 0; double Tsw_est3 = 0; double max_Tsw = 0; if (GPUVMEnable == true && HostVMEnable == true) { HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; } else { HostVMDynamicLevelsTrips = 0; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); #endif CalculateVupdateAndDynamicMetadataParameters( MaxInterDCNTileRepeaters, myPipe->DPPCLK, myPipe->DISPCLK, myPipe->DCFCLKDeepSleep, myPipe->PixelClock, myPipe->HTotal, myPipe->VBlank, DynamicMetadataTransmittedBytes, DynamicMetadataLinesBeforeActiveRequired, myPipe->InterlaceEnable, myPipe->ProgressiveToInterlaceUnitInOPP, TSetup, &Tdmbf, &Tdmec, &Tdmsks, VUpdateOffsetPix, VUpdateWidthPix, VReadyOffsetPix); LineTime = myPipe->HTotal / myPipe->PixelClock; trip_to_mem = UrgentLatency; Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); #ifdef __DML_VBA_ALLOW_DELTA__ if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { #else if (DynamicMetadataVMEnabled == true) { #endif *Tdmdl = TWait + Tvm_trips + trip_to_mem; } else { *Tdmdl = TWait + UrgentExtraLatency; } #ifdef __DML_VBA_ALLOW_DELTA__ if (DynamicMetadataEnable == false) { *Tdmdl = 0.0; } #endif if (DynamicMetadataEnable == true) { if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { *NotEnoughTimeForDynamicMetadata = true; dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl); } else { *NotEnoughTimeForDynamicMetadata = false; } } else { *NotEnoughTimeForDynamicMetadata = false; } *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); if (myPipe->ScalerEnabled) DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; else DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; DISPCLKCycles = DISPCLKDelaySubtotal; if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) return true; *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); #endif *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) *DSTYAfterScaler = 1; else *DSTYAfterScaler = 0; DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); #endif MyError = false; Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; #ifdef __DML_VBA_ALLOW_DELTA__ if (!myPipe->DCCEnable) { Tr0_trips = 0.0; Tr0_trips_rounded = 0.0; } #endif if (!GPUVMEnable) { Tvm_trips = 0.0; Tvm_trips_rounded = 0.0; } if (GPUVMEnable) { if (GPUVMPageTableLevels >= 3) { *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); } else { *Tno_bw = 0; } } else if (!myPipe->DCCEnable) { *Tno_bw = LineTime; } else { *Tno_bw = LineTime / 4; } if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; else bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; /*rev 99*/ prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane; prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr; max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; #ifdef __DML_VBA_DEBUG__ Tsw_oto = Lsw_oto * LineTime; #endif #ifdef __DML_VBA_DEBUG__ dml_print("DML: HTotal: %d\n", myPipe->HTotal); dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); dml_print("DML: Tsw_oto: %f\n", Tsw_oto); #endif if (GPUVMEnable == true) Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); else Tvm_oto = LineTime / 4.0; if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) LineTime - Tvm_oto, LineTime / 4); } else { Tr0_oto = (LineTime - Tvm_oto) / 2.0; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); #endif Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; Tpre_rounded = dst_y_prefetch_equ * LineTime; dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); if (prefetch_sw_bytes < dep_bytes) prefetch_sw_bytes = 2 * dep_bytes; dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); dml_print("DML: Lsw_oto: %f\n", Lsw_oto); dml_print("DML: LineTime: %f\n", LineTime); dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); dml_print("DML: LineTime: %f\n", LineTime); dml_print("DML: VStartup: %d\n", VStartup); dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler); *PrefetchBandwidth = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; if (dst_y_prefetch_equ > 1) { double PrefetchBandwidth1; double PrefetchBandwidth2; double PrefetchBandwidth3; double PrefetchBandwidth4; if (Tpre_rounded - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; } else { PrefetchBandwidth1 = 0; } if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); } if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); else PrefetchBandwidth2 = 0; if (Tpre_rounded - Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; } else { PrefetchBandwidth3 = 0; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); #endif if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); } if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); else PrefetchBandwidth4 = 0; { bool Case1OK; bool Case2OK; bool Case3OK; if (PrefetchBandwidth1 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { Case1OK = true; } else { Case1OK = false; } } else { Case1OK = false; } if (PrefetchBandwidth2 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { Case2OK = true; } else { Case2OK = false; } } else { Case2OK = false; } if (PrefetchBandwidth3 > 0) { if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { Case3OK = true; } else { Case3OK = false; } } else { Case3OK = false; } if (Case1OK) { prefetch_bw_equ = PrefetchBandwidth1; } else if (Case2OK) { prefetch_bw_equ = PrefetchBandwidth2; } else if (Case3OK) { prefetch_bw_equ = PrefetchBandwidth3; } else { prefetch_bw_equ = PrefetchBandwidth4; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); #endif if (prefetch_bw_equ > 0) { if (GPUVMEnable == true) { Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); } else { Tvm_equ = LineTime / 4; } if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { Tr0_equ = dml_max4( (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, Tr0_trips, (LineTime - Tvm_equ) / 2, LineTime / 4); } else { Tr0_equ = (LineTime - Tvm_equ) / 2; } } else { Tvm_equ = 0; Tr0_equ = 0; dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); } } if (dst_y_prefetch_oto < dst_y_prefetch_equ) { *DestinationLinesForPrefetch = dst_y_prefetch_oto; TimeForFetchingMetaPTE = Tvm_oto; TimeForFetchingRowInVBlank = Tr0_oto; *PrefetchBandwidth = prefetch_bw_oto; } else { *DestinationLinesForPrefetch = dst_y_prefetch_equ; TimeForFetchingMetaPTE = Tvm_equ; TimeForFetchingRowInVBlank = Tr0_equ; *PrefetchBandwidth = prefetch_bw_equ; } *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; #ifdef __DML_VBA_ALLOW_DELTA__ LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch // See note above dated 5/30/2018 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? #else LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; #endif #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); #endif if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); #endif if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { *VRatioPrefetchY = dml_max( (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); } else { MyError = true; dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); *VRatioPrefetchY = 0; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); #endif } *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); #endif if ((SwathHeightC > 4) || VInitPreFillC > 3) { if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { *VRatioPrefetchC = dml_max( *VRatioPrefetchC, (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); } else { MyError = true; dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); *VRatioPrefetchC = 0; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); #endif } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); #endif *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); #endif *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub / LineTime; } else { MyError = true; dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; *RequiredPrefetchPixDataBWChroma = 0; } dml_print( "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); dml_print( "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double) LinesToRequestPrefetchPixelData * LineTime); dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); } else { MyError = true; dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); } { double prefetch_vm_bw; double prefetch_row_bw; if (PDEAndMetaPTEBytesFrame == 0) { prefetch_vm_bw = 0; } else if (*DestinationLinesToRequestVMInVBlank > 0) { #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); #endif prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif } else { prefetch_vm_bw = 0; MyError = true; dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); } if (MetaRowByte + PixelPTEBytesPerRow == 0) { prefetch_row_bw = 0; } else if (*DestinationLinesToRequestRowInVBlank > 0) { prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); #endif } else { prefetch_row_bw = 0; MyError = true; dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); } *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); } if (MyError) { *PrefetchBandwidth = 0; TimeForFetchingMetaPTE = 0; TimeForFetchingRowInVBlank = 0; *DestinationLinesToRequestVMInVBlank = 0; *DestinationLinesToRequestRowInVBlank = 0; *DestinationLinesForPrefetch = 0; LinesToRequestPrefetchPixelData = 0; *VRatioPrefetchY = 0; *VRatioPrefetchC = 0; *RequiredPrefetchPixDataBWLuma = 0; *RequiredPrefetchPixDataBWChroma = 0; } return MyError; } static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) { return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); } static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) { return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); } static void CalculateDCCConfiguration( bool DCCEnabled, bool DCCProgrammingAssumesScanDirectionUnknown, enum source_format_class SourcePixelFormat, unsigned int SurfaceWidthLuma, unsigned int SurfaceWidthChroma, unsigned int SurfaceHeightLuma, unsigned int SurfaceHeightChroma, double DETBufferSize, unsigned int RequestHeight256ByteLuma, unsigned int RequestHeight256ByteChroma, enum dm_swizzle_mode TilingFormat, unsigned int BytePerPixelY, unsigned int BytePerPixelC, double BytePerPixelDETY, double BytePerPixelDETC, enum scan_direction_class ScanOrientation, unsigned int *MaxUncompressedBlockLuma, unsigned int *MaxUncompressedBlockChroma, unsigned int *MaxCompressedBlockLuma, unsigned int *MaxCompressedBlockChroma, unsigned int *IndependentBlockLuma, unsigned int *IndependentBlockChroma) { int yuv420; int horz_div_l; int horz_div_c; int vert_div_l; int vert_div_c; int swath_buf_size; double detile_buf_vp_horz_limit; double detile_buf_vp_vert_limit; int MAS_vp_horz_limit; int MAS_vp_vert_limit; int max_vp_horz_width; int max_vp_vert_height; int eff_surf_width_l; int eff_surf_width_c; int eff_surf_height_l; int eff_surf_height_c; int full_swath_bytes_horz_wc_l; int full_swath_bytes_horz_wc_c; int full_swath_bytes_vert_wc_l; int full_swath_bytes_vert_wc_c; int req128_horz_wc_l; int req128_horz_wc_c; int req128_vert_wc_l; int req128_vert_wc_c; int segment_order_horz_contiguous_luma; int segment_order_horz_contiguous_chroma; int segment_order_vert_contiguous_luma; int segment_order_vert_contiguous_chroma; typedef enum { REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA } RequestType; RequestType RequestLuma; RequestType RequestChroma; yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); horz_div_l = 1; horz_div_c = 1; vert_div_l = 1; vert_div_c = 1; if (BytePerPixelY == 1) vert_div_l = 0; if (BytePerPixelC == 1) vert_div_c = 0; if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) horz_div_l = 0; if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) horz_div_c = 0; if (BytePerPixelC == 0) { swath_buf_size = DETBufferSize / 2 - 2 * 256; detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); } else { swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); } if (SourcePixelFormat == dm_420_10) { detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; } detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); eff_surf_width_c = eff_surf_width_l / (1 + yuv420); eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); eff_surf_height_c = eff_surf_height_l / (1 + yuv420); full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; if (BytePerPixelC > 0) { full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; } else { full_swath_bytes_horz_wc_c = 0; full_swath_bytes_vert_wc_c = 0; } if (SourcePixelFormat == dm_420_10) { full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); } if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { req128_horz_wc_l = 0; req128_horz_wc_c = 0; } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { req128_horz_wc_l = 0; req128_horz_wc_c = 1; } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { req128_horz_wc_l = 1; req128_horz_wc_c = 0; } else { req128_horz_wc_l = 1; req128_horz_wc_c = 1; } if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { req128_vert_wc_l = 0; req128_vert_wc_c = 0; } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { req128_vert_wc_l = 0; req128_vert_wc_c = 1; } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { req128_vert_wc_l = 1; req128_vert_wc_c = 0; } else { req128_vert_wc_l = 1; req128_vert_wc_c = 1; } if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { segment_order_horz_contiguous_luma = 0; } else { segment_order_horz_contiguous_luma = 1; } if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { segment_order_vert_contiguous_luma = 0; } else { segment_order_vert_contiguous_luma = 1; } if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { segment_order_horz_contiguous_chroma = 0; } else { segment_order_horz_contiguous_chroma = 1; } if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { segment_order_vert_contiguous_chroma = 0; } else { segment_order_vert_contiguous_chroma = 1; } if (DCCProgrammingAssumesScanDirectionUnknown == true) { if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { RequestLuma = REQ_256Bytes; } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { RequestLuma = REQ_128BytesNonContiguous; } else { RequestLuma = REQ_128BytesContiguous; } if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { RequestChroma = REQ_256Bytes; } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { RequestChroma = REQ_128BytesNonContiguous; } else { RequestChroma = REQ_128BytesContiguous; } } else if (ScanOrientation != dm_vert) { if (req128_horz_wc_l == 0) { RequestLuma = REQ_256Bytes; } else if (segment_order_horz_contiguous_luma == 0) { RequestLuma = REQ_128BytesNonContiguous; } else { RequestLuma = REQ_128BytesContiguous; } if (req128_horz_wc_c == 0) { RequestChroma = REQ_256Bytes; } else if (segment_order_horz_contiguous_chroma == 0) { RequestChroma = REQ_128BytesNonContiguous; } else { RequestChroma = REQ_128BytesContiguous; } } else { if (req128_vert_wc_l == 0) { RequestLuma = REQ_256Bytes; } else if (segment_order_vert_contiguous_luma == 0) { RequestLuma = REQ_128BytesNonContiguous; } else { RequestLuma = REQ_128BytesContiguous; } if (req128_vert_wc_c == 0) { RequestChroma = REQ_256Bytes; } else if (segment_order_vert_contiguous_chroma == 0) { RequestChroma = REQ_128BytesNonContiguous; } else { RequestChroma = REQ_128BytesContiguous; } } if (RequestLuma == REQ_256Bytes) { *MaxUncompressedBlockLuma = 256; *MaxCompressedBlockLuma = 256; *IndependentBlockLuma = 0; } else if (RequestLuma == REQ_128BytesContiguous) { *MaxUncompressedBlockLuma = 256; *MaxCompressedBlockLuma = 128; *IndependentBlockLuma = 128; } else { *MaxUncompressedBlockLuma = 256; *MaxCompressedBlockLuma = 64; *IndependentBlockLuma = 64; } if (RequestChroma == REQ_256Bytes) { *MaxUncompressedBlockChroma = 256; *MaxCompressedBlockChroma = 256; *IndependentBlockChroma = 0; } else if (RequestChroma == REQ_128BytesContiguous) { *MaxUncompressedBlockChroma = 256; *MaxCompressedBlockChroma = 128; *IndependentBlockChroma = 128; } else { *MaxUncompressedBlockChroma = 256; *MaxCompressedBlockChroma = 64; *IndependentBlockChroma = 64; } if (DCCEnabled != true || BytePerPixelC == 0) { *MaxUncompressedBlockChroma = 0; *MaxCompressedBlockChroma = 0; *IndependentBlockChroma = 0; } if (DCCEnabled != true) { *MaxUncompressedBlockLuma = 0; *MaxCompressedBlockLuma = 0; *IndependentBlockLuma = 0; } } static double CalculatePrefetchSourceLines( struct display_mode_lib *mode_lib, double VRatio, double vtaps, bool Interlace, bool ProgressiveToInterlaceUnitInOPP, unsigned int SwathHeight, unsigned int ViewportYStart, double *VInitPreFill, unsigned int *MaxNumSwath) { struct vba_vars_st *v = &mode_lib->vba; unsigned int MaxPartialSwath; if (ProgressiveToInterlaceUnitInOPP) *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); else *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); if (!v->IgnoreViewportPositioning) { *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; if (*VInitPreFill > 1.0) MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; else MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; MaxPartialSwath = dml_max(1U, MaxPartialSwath); } else { if (ViewportYStart != 0) dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); if (*VInitPreFill > 1.0) MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; else MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); #endif return *MaxNumSwath * SwathHeight + MaxPartialSwath; } static unsigned int CalculateVMAndRowBytes( struct display_mode_lib *mode_lib, bool DCCEnable, unsigned int BlockHeight256Bytes, unsigned int BlockWidth256Bytes, enum source_format_class SourcePixelFormat, unsigned int SurfaceTiling, unsigned int BytePerPixel, enum scan_direction_class ScanDirection, unsigned int SwathWidth, unsigned int ViewportHeight, bool GPUVMEnable, bool HostVMEnable, unsigned int HostVMMaxNonCachedPageTableLevels, unsigned int GPUVMMinPageSize, unsigned int HostVMMinPageSize, unsigned int PTEBufferSizeInRequests, unsigned int Pitch, unsigned int DCCMetaPitch, unsigned int *MacroTileWidth, unsigned int *MetaRowByte, unsigned int *PixelPTEBytesPerRow, bool *PTEBufferSizeNotExceeded, int *dpte_row_width_ub, unsigned int *dpte_row_height, unsigned int *MetaRequestWidth, unsigned int *MetaRequestHeight, unsigned int *meta_row_width, unsigned int *meta_row_height, int *vm_group_bytes, unsigned int *dpte_group_bytes, unsigned int *PixelPTEReqWidth, unsigned int *PixelPTEReqHeight, unsigned int *PTERequestSize, int *DPDE0BytesFrame, int *MetaPTEBytesFrame) { struct vba_vars_st *v = &mode_lib->vba; unsigned int MPDEBytesFrame; unsigned int DCCMetaSurfaceBytes; unsigned int MacroTileSizeBytes; unsigned int MacroTileHeight; unsigned int ExtraDPDEBytesFrame; unsigned int PDEAndMetaPTEBytesFrame; unsigned int PixelPTEReqHeightPTEs = 0; unsigned int HostVMDynamicLevels = 0; double FractionOfPTEReturnDrop; if (GPUVMEnable == true && HostVMEnable == true) { if (HostVMMinPageSize < 2048) { HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); } else { HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); } } *MetaRequestHeight = 8 * BlockHeight256Bytes; *MetaRequestWidth = 8 * BlockWidth256Bytes; if (ScanDirection != dm_vert) { *meta_row_height = *MetaRequestHeight; *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; } else { *meta_row_height = *MetaRequestWidth; *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; } DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; if (GPUVMEnable == true) { *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); } else { *MetaPTEBytesFrame = 0; MPDEBytesFrame = 0; } if (DCCEnable != true) { *MetaPTEBytesFrame = 0; MPDEBytesFrame = 0; *MetaRowByte = 0; } if (SurfaceTiling == dm_sw_linear) { MacroTileSizeBytes = 256; MacroTileHeight = BlockHeight256Bytes; } else { MacroTileSizeBytes = 65536; MacroTileHeight = 16 * BlockHeight256Bytes; } *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { if (ScanDirection != dm_vert) { *DPDE0BytesFrame = 64 * (dml_ceil( ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); } else { *DPDE0BytesFrame = 64 * (dml_ceil( ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) / (8 * 2097152), 1) + 1); } ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); } else { *DPDE0BytesFrame = 0; ExtraDPDEBytesFrame = 0; } PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); #endif if (HostVMEnable == true) { PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); #endif if (SurfaceTiling == dm_sw_linear) { PixelPTEReqHeightPTEs = 1; *PixelPTEReqHeight = 1; *PixelPTEReqWidth = 32768.0 / BytePerPixel; *PTERequestSize = 64; FractionOfPTEReturnDrop = 0; } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { PixelPTEReqHeightPTEs = 16; *PixelPTEReqHeight = 16 * BlockHeight256Bytes; *PixelPTEReqWidth = 16 * BlockWidth256Bytes; *PTERequestSize = 128; FractionOfPTEReturnDrop = 0; } else { PixelPTEReqHeightPTEs = 1; *PixelPTEReqHeight = MacroTileHeight; *PixelPTEReqWidth = 8 * *MacroTileWidth; *PTERequestSize = 64; FractionOfPTEReturnDrop = 0; } if (SurfaceTiling == dm_sw_linear) { *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; } else if (ScanDirection != dm_vert) { *dpte_row_height = *PixelPTEReqHeight; *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; } else { *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; } if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { *PTEBufferSizeNotExceeded = true; } else { *PTEBufferSizeNotExceeded = false; } if (GPUVMEnable != true) { *PixelPTEBytesPerRow = 0; *PTEBufferSizeNotExceeded = true; } dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); if (HostVMEnable == true) { *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); } if (HostVMEnable == true) { *vm_group_bytes = 512; *dpte_group_bytes = 512; } else if (GPUVMEnable == true) { *vm_group_bytes = 2048; if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { *dpte_group_bytes = 512; } else { *dpte_group_bytes = 2048; } } else { *vm_group_bytes = 0; *dpte_group_bytes = 0; } return PDEAndMetaPTEBytesFrame; } static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) { struct vba_vars_st *v = &mode_lib->vba; unsigned int j, k; double HostVMInefficiencyFactor = 1.0; bool NoChromaPlanes = true; int ReorderBytes; double VMDataOnlyReturnBW; double MaxTotalRDBandwidth = 0; int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; v->WritebackDISPCLK = 0.0; v->DISPCLKWithRamping = 0; v->DISPCLKWithoutRamping = 0; v->GlobalDPPCLK = 0.0; /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */ { double IdealFabricAndSDPPortBandwidthPerState = dml_min( v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; if (v->HostVMEnable != true) { v->ReturnBW = dml_min( IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); } else { v->ReturnBW = dml_min( IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); } } /* End DAL custom code */ // DISPCLK and DPPCLK Calculation // for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->WritebackEnable[k]) { v->WritebackDISPCLK = dml_max( v->WritebackDISPCLK, dml31_CalculateWriteBackDISPCLK( v->WritebackPixelFormat[k], v->PixelClock[k], v->WritebackHRatio[k], v->WritebackVRatio[k], v->WritebackHTaps[k], v->WritebackVTaps[k], v->WritebackSourceWidth[k], v->WritebackDestinationWidth[k], v->HTotal[k], v->WritebackLineBufferSize)); } } for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->HRatio[k] > 1) { v->PSCL_THROUGHPUT_LUMA[k] = dml_min( v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); } else { v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); } v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] * dml_max( v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; } if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; } else { if (v->HRatioChroma[k] > 1) { v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); } else { v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); } v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] * dml_max3( v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 1.0); if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; } v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); } } for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] != k) continue; if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { v->DISPCLKWithRamping = dml_max( v->DISPCLKWithRamping, v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); v->DISPCLKWithoutRamping = dml_max( v->DISPCLKWithoutRamping, v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { v->DISPCLKWithRamping = dml_max( v->DISPCLKWithRamping, v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); v->DISPCLKWithoutRamping = dml_max( v->DISPCLKWithoutRamping, v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); } else { v->DISPCLKWithRamping = dml_max( v->DISPCLKWithRamping, v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); v->DISPCLKWithoutRamping = dml_max( v->DISPCLKWithoutRamping, v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); } } v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, v->DISPCLKDPPCLKVCOSpeed); if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; } else { v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; } v->DISPCLK = v->DISPCLK_calculated; DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); } v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); } for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->DPPCLK[k] = v->DPPCLK_calculated[k]; } // Urgent and B P-State/DRAM Clock Change Watermark DTRACE(" dcfclk_mhz = %f", v->DCFCLK); DTRACE(" return_bus_bw = %f", v->ReturnBW); for (k = 0; k < v->NumberOfActivePlanes; ++k) { dml30_CalculateBytePerPixelAnd256BBlockSizes( v->SourcePixelFormat[k], v->SurfaceTiling[k], &v->BytePerPixelY[k], &v->BytePerPixelC[k], &v->BytePerPixelDETY[k], &v->BytePerPixelDETC[k], &v->BlockHeight256BytesY[k], &v->BlockHeight256BytesC[k], &v->BlockWidth256BytesY[k], &v->BlockWidth256BytesC[k]); } CalculateSwathWidth( false, v->NumberOfActivePlanes, v->SourcePixelFormat, v->SourceScan, v->ViewportWidth, v->ViewportHeight, v->SurfaceWidthY, v->SurfaceWidthC, v->SurfaceHeightY, v->SurfaceHeightC, v->ODMCombineEnabled, v->BytePerPixelY, v->BytePerPixelC, v->BlockHeight256BytesY, v->BlockHeight256BytesC, v->BlockWidth256BytesY, v->BlockWidth256BytesC, v->BlendingAndTiming, v->HActive, v->HRatio, v->DPPPerPlane, v->SwathWidthSingleDPPY, v->SwathWidthSingleDPPC, v->SwathWidthY, v->SwathWidthC, v->dummyinteger3, v->dummyinteger4, v->swath_width_luma_ub, v->swath_width_chroma_ub); for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioChroma[k]; DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); } // DCFCLK Deep Sleep CalculateDCFCLKDeepSleep( mode_lib, v->NumberOfActivePlanes, v->BytePerPixelY, v->BytePerPixelC, v->VRatio, v->VRatioChroma, v->SwathWidthY, v->SwathWidthC, v->DPPPerPlane, v->HRatio, v->HRatioChroma, v->PixelClock, v->PSCL_THROUGHPUT_LUMA, v->PSCL_THROUGHPUT_CHROMA, v->DPPCLK, v->ReadBandwidthPlaneLuma, v->ReadBandwidthPlaneChroma, v->ReturnBusWidth, &v->DCFCLKDeepSleep); // DSCCLK for (k = 0; k < v->NumberOfActivePlanes; ++k) { if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { v->DSCCLK_calculated[k] = 0.0; } else { if (v->OutputFormat[k] == dm_420) v->DSCFormatFactor = 2; else if (v->OutputFormat[k] == dm_444) v->DSCFormatFactor = 1; else if (v->OutputFormat[k] == dm_n422) v->DSCFormatFactor = 2; else v->DSCFormatFactor = 1; if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); else v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); } } // DSC Delay for (k = 0; k < v->NumberOfActivePlanes; ++k) { double BPP = v->OutputBpp[k]; if (v->DSCEnabled[k] && BPP != 0) { if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { v->DSCDelay[k] = dscceComputeDelay( v->DSCInputBitPerComponent[k], BPP, dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), v->NumberOfDSCSlices[k], v->OutputFormat[k], v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { v->DSCDelay[k] = 2 * (dscceComputeDelay( v->DSCInputBitPerComponent[k], BPP, dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), v->NumberOfDSCSlices[k] / 2.0, v->OutputFormat[k], v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); } else { v->DSCDelay[k] = 4 * (dscceComputeDelay( v->DSCInputBitPerComponent[k], BPP, dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), v->NumberOfDSCSlices[k] / 4.0, v->OutputFormat[k], v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); } v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; } else { v->DSCDelay[k] = 0; } } for (k = 0; k < v->NumberOfActivePlanes; ++k) for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) v->DSCDelay[k] = v->DSCDelay[j]; // Prefetch for (k = 0; k < v->NumberOfActivePlanes; ++k) { unsigned int PDEAndMetaPTEBytesFrameY; unsigned int PixelPTEBytesPerRowY; unsigned int MetaRowByteY; unsigned int MetaRowByteC; unsigned int PDEAndMetaPTEBytesFrameC; unsigned int PixelPTEBytesPerRowC; bool PTEBufferSizeNotExceededY; bool PTEBufferSizeNotExceededC; if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; } else { v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; } PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( mode_lib, v->DCCEnable[k], v->BlockHeight256BytesC[k], v->BlockWidth256BytesC[k], v->SourcePixelFormat[k], v->SurfaceTiling[k], v->BytePerPixelC[k], v->SourceScan[k], v->SwathWidthC[k], v->ViewportHeightChroma[k], v->GPUVMEnable, v->HostVMEnable, v->HostVMMaxNonCachedPageTableLevels, v->GPUVMMinPageSize, v->HostVMMinPageSize, v->PTEBufferSizeInRequestsForChroma, v->PitchC[k], v->DCCMetaPitchC[k], &v->MacroTileWidthC[k], &MetaRowByteC, &PixelPTEBytesPerRowC, &PTEBufferSizeNotExceededC, &v->dpte_row_width_chroma_ub[k], &v->dpte_row_height_chroma[k], &v->meta_req_width_chroma[k], &v->meta_req_height_chroma[k], &v->meta_row_width_chroma[k], &v->meta_row_height_chroma[k], &v->dummyinteger1, &v->dummyinteger2, &v->PixelPTEReqWidthC[k], &v->PixelPTEReqHeightC[k], &v->PTERequestSizeC[k], &v->dpde0_bytes_per_frame_ub_c[k], &v->meta_pte_bytes_per_frame_ub_c[k]); v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( mode_lib, v->VRatioChroma[k], v->VTAPsChroma[k], v->Interlace[k], v->ProgressiveToInterlaceUnitInOPP, v->SwathHeightC[k], v->ViewportYStartC[k], &v->VInitPreFillC[k], &v->MaxNumSwathC[k]); } else { v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; v->PTEBufferSizeInRequestsForChroma = 0; PixelPTEBytesPerRowC = 0; PDEAndMetaPTEBytesFrameC = 0; MetaRowByteC = 0; v->MaxNumSwathC[k] = 0; v->PrefetchSourceLinesC[k] = 0; } PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( mode_lib, v->DCCEnable[k], v->BlockHeight256BytesY[k], v->BlockWidth256BytesY[k], v->SourcePixelFormat[k], v->SurfaceTiling[k], v->BytePerPixelY[k], v->SourceScan[k], v->SwathWidthY[k], v->ViewportHeight[k], v->GPUVMEnable, v->HostVMEnable, v->HostVMMaxNonCachedPageTableLevels, v->GPUVMMinPageSize, v->HostVMMinPageSize, v->PTEBufferSizeInRequestsForLuma, v->PitchY[k], v->DCCMetaPitchY[k], &v->MacroTileWidthY[k], &MetaRowByteY, &PixelPTEBytesPerRowY, &PTEBufferSizeNotExceededY, &v->dpte_row_width_luma_ub[k], &v->dpte_row_height[k], &v->meta_req_width[k], &v->meta_req_height[k], &v->meta_row_width[k], &v->meta_row_height[k], &v->vm_group_bytes[k], &v->dpte_group_bytes[k], &v->PixelPTEReqWidthY[k], &v->PixelPTEReqHeightY[k], &v->PTERequestSizeY[k], &v->dpde0_bytes_per_frame_ub_l[k], &v->meta_pte_bytes_per_frame_ub_l[k]); v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( mode_lib, v->VRatio[k], v->vtaps[k], v->Interlace[k], v->ProgressiveToInterlaceUnitInOPP, v->SwathHeightY[k], v->ViewportYStartY[k], &v->VInitPreFillY[k], &v->MaxNumSwathY[k]); v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; CalculateRowBandwidth( v->GPUVMEnable, v->SourcePixelFormat[k], v->VRatio[k], v->VRatioChroma[k], v->DCCEnable[k], v->HTotal[k] / v->PixelClock[k], MetaRowByteY, MetaRowByteC, v->meta_row_height[k], v->meta_row_height_chroma[k], PixelPTEBytesPerRowY, PixelPTEBytesPerRowC, v->dpte_row_height[k], v->dpte_row_height_chroma[k], &v->meta_row_bw[k], &v->dpte_row_bw[k]); } v->TotalDCCActiveDPP = 0; v->TotalActiveDPP = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; if (v->DCCEnable[k]) v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) NoChromaPlanes = false; } ReorderBytes = v->NumberOfChannels * dml_max3( v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, v->UrgentOutOfOrderReturnPerChannelVMDataOnly); VMDataOnlyReturnBW = dml_min( dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); #endif if (v->GPUVMEnable && v->HostVMEnable) HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; v->UrgentExtraLatency = CalculateExtraLatency( v->RoundTripPingLatencyCycles, ReorderBytes, v->DCFCLK, v->TotalActiveDPP, v->PixelChunkSizeInKByte, v->TotalDCCActiveDPP, v->MetaChunkSize, v->ReturnBW, v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, v->DPPPerPlane, v->dpte_group_bytes, HostVMInefficiencyFactor, v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels); v->TCalc = 24.0 / v->DCFCLKDeepSleep; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] == k) { if (v->WritebackEnable[k] == true) { v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency + CalculateWriteBackDelay( v->WritebackPixelFormat[k], v->WritebackHRatio[k], v->WritebackVRatio[k], v->WritebackVTaps[k], v->WritebackDestinationWidth[k], v->WritebackDestinationHeight[k], v->WritebackSourceHeight[k], v->HTotal[k]) / v->DISPCLK; } else v->WritebackDelay[v->VoltageLevel][k] = 0; for (j = 0; j < v->NumberOfActivePlanes; ++j) { if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { v->WritebackDelay[v->VoltageLevel][k] = dml_max( v->WritebackDelay[v->VoltageLevel][k], v->WritebackLatency + CalculateWriteBackDelay( v->WritebackPixelFormat[j], v->WritebackHRatio[j], v->WritebackVRatio[j], v->WritebackVTaps[j], v->WritebackDestinationWidth[j], v->WritebackDestinationHeight[j], v->WritebackSourceHeight[j], v->HTotal[k]) / v->DISPCLK); } } } } for (k = 0; k < v->NumberOfActivePlanes; ++k) for (j = 0; j < v->NumberOfActivePlanes; ++j) if (v->BlendingAndTiming[k] == j) v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->MaxVStartupLines[k] = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : v->VTotal[k] - v->VActive[k] - dml_max( 1.0, dml_ceil( (double) v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1)); if (v->MaxVStartupLines[k] > 1023) v->MaxVStartupLines[k] = 1023; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); #endif } v->MaximumMaxVStartupLines = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); // VBA_DELTA // We don't really care to iterate between the various prefetch modes //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); v->UrgentLatency = CalculateUrgentLatency( v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock); v->FractionOfUrgentBandwidth = 0.0; v->FractionOfUrgentBandwidthImmediateFlip = 0.0; v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; do { double MaxTotalRDBandwidthNoUrgentBurst = 0.0; bool DestinationLineTimesForPrefetchLessThan2 = false; bool VRatioPrefetchMoreThan4 = false; double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); MaxTotalRDBandwidth = 0; dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); for (k = 0; k < v->NumberOfActivePlanes; ++k) { Pipe myPipe; myPipe.DPPCLK = v->DPPCLK[k]; myPipe.DISPCLK = v->DISPCLK; myPipe.PixelClock = v->PixelClock[k]; myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; myPipe.DPPPerPlane = v->DPPPerPlane[k]; myPipe.ScalerEnabled = v->ScalerEnabled[k]; myPipe.VRatio = v->VRatio[k]; myPipe.VRatioChroma = v->VRatioChroma[k]; myPipe.SourceScan = v->SourceScan[k]; myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; myPipe.InterlaceEnable = v->Interlace[k]; myPipe.NumberOfCursors = v->NumberOfCursors[k]; myPipe.VBlank = v->VTotal[k] - v->VActive[k]; myPipe.HTotal = v->HTotal[k]; myPipe.DCCEnable = v->DCCEnable[k]; myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; myPipe.BytePerPixelY = v->BytePerPixelY[k]; myPipe.BytePerPixelC = v->BytePerPixelC[k]; myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; v->ErrorResult[k] = CalculatePrefetchSchedule( mode_lib, HostVMInefficiencyFactor, &myPipe, v->DSCDelay[k], v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, v->DPPCLKDelaySCL, v->DPPCLKDelaySCLLBOnly, v->DPPCLKDelayCNVCCursor, v->DISPCLKDelaySubtotal, (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), v->OutputFormat[k], v->MaxInterDCNTileRepeaters, dml_min(v->VStartupLines, v->MaxVStartupLines[k]), v->MaxVStartupLines[k], v->GPUVMMaxPageTableLevels, v->GPUVMEnable, v->HostVMEnable, v->HostVMMaxNonCachedPageTableLevels, v->HostVMMinPageSize, v->DynamicMetadataEnable[k], v->DynamicMetadataVMEnabled, v->DynamicMetadataLinesBeforeActiveRequired[k], v->DynamicMetadataTransmittedBytes[k], v->UrgentLatency, v->UrgentExtraLatency, v->TCalc, v->PDEAndMetaPTEBytesFrame[k], v->MetaRowByte[k], v->PixelPTEBytesPerRow[k], v->PrefetchSourceLinesY[k], v->SwathWidthY[k], v->VInitPreFillY[k], v->MaxNumSwathY[k], v->PrefetchSourceLinesC[k], v->SwathWidthC[k], v->VInitPreFillC[k], v->MaxNumSwathC[k], v->swath_width_luma_ub[k], v->swath_width_chroma_ub[k], v->SwathHeightY[k], v->SwathHeightC[k], TWait, &v->DSTXAfterScaler[k], &v->DSTYAfterScaler[k], &v->DestinationLinesForPrefetch[k], &v->PrefetchBandwidth[k], &v->DestinationLinesToRequestVMInVBlank[k], &v->DestinationLinesToRequestRowInVBlank[k], &v->VRatioPrefetchY[k], &v->VRatioPrefetchC[k], &v->RequiredPrefetchPixDataBWLuma[k], &v->RequiredPrefetchPixDataBWChroma[k], &v->NotEnoughTimeForDynamicMetadata[k], &v->Tno_bw[k], &v->prefetch_vmrow_bw[k], &v->Tdmdl_vm[k], &v->Tdmdl[k], &v->TSetup[k], &v->VUpdateOffsetPix[k], &v->VUpdateWidthPix[k], &v->VReadyOffsetPix[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); #endif v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); } v->NoEnoughUrgentLatencyHiding = false; v->NoEnoughUrgentLatencyHidingPre = false; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; CalculateUrgentBurstFactor( v->swath_width_luma_ub[k], v->swath_width_chroma_ub[k], v->SwathHeightY[k], v->SwathHeightC[k], v->HTotal[k] / v->PixelClock[k], v->UrgentLatency, v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], v->VRatio[k], v->VRatioChroma[k], v->BytePerPixelDETY[k], v->BytePerPixelDETC[k], v->DETBufferSizeY[k], v->DETBufferSizeC[k], &v->UrgBurstFactorCursor[k], &v->UrgBurstFactorLuma[k], &v->UrgBurstFactorChroma[k], &v->NoUrgentLatencyHiding[k]); CalculateUrgentBurstFactor( v->swath_width_luma_ub[k], v->swath_width_chroma_ub[k], v->SwathHeightY[k], v->SwathHeightC[k], v->HTotal[k] / v->PixelClock[k], v->UrgentLatency, v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], v->VRatioPrefetchY[k], v->VRatioPrefetchC[k], v->BytePerPixelDETY[k], v->BytePerPixelDETC[k], v->DETBufferSizeY[k], v->DETBufferSizeC[k], &v->UrgBurstFactorCursorPre[k], &v->UrgBurstFactorLumaPre[k], &v->UrgBurstFactorChromaPre[k], &v->NoUrgentLatencyHidingPre[k]); MaxTotalRDBandwidth = MaxTotalRDBandwidth + dml_max3( v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst + dml_max3( v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); #endif if (v->DestinationLinesForPrefetch[k] < 2) DestinationLineTimesForPrefetchLessThan2 = true; if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) VRatioPrefetchMoreThan4 = true; if (v->NoUrgentLatencyHiding[k] == true) v->NoEnoughUrgentLatencyHiding = true; if (v->NoUrgentLatencyHidingPre[k] == true) v->NoEnoughUrgentLatencyHidingPre = true; } v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst); dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW); dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth); #endif if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) v->PrefetchModeSupported = true; else { v->PrefetchModeSupported = false; dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); } // PREVIOUS_ERROR // This error result check was done after the PrefetchModeSupported. So we will // still try to calculate flip schedule even prefetch mode not supported for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { v->PrefetchModeSupported = false; dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); } } if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { v->BandwidthAvailableForImmediateFlip = v->ReturnBW; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip - dml_max( v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); } v->TotImmediateFlipBytes = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); } for (k = 0; k < v->NumberOfActivePlanes; ++k) { CalculateFlipSchedule( mode_lib, k, HostVMInefficiencyFactor, v->UrgentExtraLatency, v->UrgentLatency, v->PDEAndMetaPTEBytesFrame[k], v->MetaRowByte[k], v->PixelPTEBytesPerRow[k]); } v->total_dcn_read_bw_with_flip = 0.0; v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3( v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst + dml_max3( v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], v->DPPPerPlane[k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); } v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; v->ImmediateFlipSupported = true; if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); #endif v->ImmediateFlipSupported = false; v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; } for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->ImmediateFlipSupportedForPipe[k] == false) { #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); #endif v->ImmediateFlipSupported = false; } } } else { v->ImmediateFlipSupported = false; } v->PrefetchAndImmediateFlipSupported = (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || v->ImmediateFlipSupported)) ? true : false; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); dml_print("DML::%s: ImmediateFlipRequirement[0] %d\n", __func__, v->ImmediateFlipRequirement[0] == dm_immediate_flip_required); dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); #endif dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); v->VStartupLines = v->VStartupLines + 1; } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); ASSERT(v->PrefetchAndImmediateFlipSupported); // Unbounded Request Enabled CalculateUnboundedRequestAndCompressedBufferSize( v->DETBufferSizeInKByte[0], v->ConfigReturnBufferSizeInKByte, v->UseUnboundedRequesting, v->TotalActiveDPP, NoChromaPlanes, v->MaxNumDPP, v->CompressedBufferSegmentSizeInkByte, v->Output, &v->UnboundedRequestEnabled, &v->CompressedBufferSizeInkByte); //Watermarks and NB P-State/DRAM Clock Change Support { enum clock_change_support DRAMClockChangeSupport; // dummy CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, PrefetchMode, v->DCFCLK, v->ReturnBW, v->UrgentLatency, v->UrgentExtraLatency, v->SOCCLK, v->DCFCLKDeepSleep, v->DETBufferSizeY, v->DETBufferSizeC, v->SwathHeightY, v->SwathHeightC, v->SwathWidthY, v->SwathWidthC, v->DPPPerPlane, v->BytePerPixelDETY, v->BytePerPixelDETC, v->UnboundedRequestEnabled, v->CompressedBufferSizeInkByte, &DRAMClockChangeSupport, &v->StutterExitWatermark, &v->StutterEnterPlusExitWatermark, &v->Z8StutterExitWatermark, &v->Z8StutterEnterPlusExitWatermark); for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->WritebackEnable[k] == true) { v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 0, v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); } else { v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; } } } //Display Pipeline Delivery Time in Prefetch, Groups CalculatePixelDeliveryTimes( v->NumberOfActivePlanes, v->VRatio, v->VRatioChroma, v->VRatioPrefetchY, v->VRatioPrefetchC, v->swath_width_luma_ub, v->swath_width_chroma_ub, v->DPPPerPlane, v->HRatio, v->HRatioChroma, v->PixelClock, v->PSCL_THROUGHPUT_LUMA, v->PSCL_THROUGHPUT_CHROMA, v->DPPCLK, v->BytePerPixelC, v->SourceScan, v->NumberOfCursors, v->CursorWidth, v->CursorBPP, v->BlockWidth256BytesY, v->BlockHeight256BytesY, v->BlockWidth256BytesC, v->BlockHeight256BytesC, v->DisplayPipeLineDeliveryTimeLuma, v->DisplayPipeLineDeliveryTimeChroma, v->DisplayPipeLineDeliveryTimeLumaPrefetch, v->DisplayPipeLineDeliveryTimeChromaPrefetch, v->DisplayPipeRequestDeliveryTimeLuma, v->DisplayPipeRequestDeliveryTimeChroma, v->DisplayPipeRequestDeliveryTimeLumaPrefetch, v->DisplayPipeRequestDeliveryTimeChromaPrefetch, v->CursorRequestDeliveryTime, v->CursorRequestDeliveryTimePrefetch); CalculateMetaAndPTETimes( v->NumberOfActivePlanes, v->GPUVMEnable, v->MetaChunkSize, v->MinMetaChunkSizeBytes, v->HTotal, v->VRatio, v->VRatioChroma, v->DestinationLinesToRequestRowInVBlank, v->DestinationLinesToRequestRowInImmediateFlip, v->DCCEnable, v->PixelClock, v->BytePerPixelY, v->BytePerPixelC, v->SourceScan, v->dpte_row_height, v->dpte_row_height_chroma, v->meta_row_width, v->meta_row_width_chroma, v->meta_row_height, v->meta_row_height_chroma, v->meta_req_width, v->meta_req_width_chroma, v->meta_req_height, v->meta_req_height_chroma, v->dpte_group_bytes, v->PTERequestSizeY, v->PTERequestSizeC, v->PixelPTEReqWidthY, v->PixelPTEReqHeightY, v->PixelPTEReqWidthC, v->PixelPTEReqHeightC, v->dpte_row_width_luma_ub, v->dpte_row_width_chroma_ub, v->DST_Y_PER_PTE_ROW_NOM_L, v->DST_Y_PER_PTE_ROW_NOM_C, v->DST_Y_PER_META_ROW_NOM_L, v->DST_Y_PER_META_ROW_NOM_C, v->TimePerMetaChunkNominal, v->TimePerChromaMetaChunkNominal, v->TimePerMetaChunkVBlank, v->TimePerChromaMetaChunkVBlank, v->TimePerMetaChunkFlip, v->TimePerChromaMetaChunkFlip, v->time_per_pte_group_nom_luma, v->time_per_pte_group_vblank_luma, v->time_per_pte_group_flip_luma, v->time_per_pte_group_nom_chroma, v->time_per_pte_group_vblank_chroma, v->time_per_pte_group_flip_chroma); CalculateVMGroupAndRequestTimes( v->NumberOfActivePlanes, v->GPUVMEnable, v->GPUVMMaxPageTableLevels, v->HTotal, v->BytePerPixelC, v->DestinationLinesToRequestVMInVBlank, v->DestinationLinesToRequestVMInImmediateFlip, v->DCCEnable, v->PixelClock, v->dpte_row_width_luma_ub, v->dpte_row_width_chroma_ub, v->vm_group_bytes, v->dpde0_bytes_per_frame_ub_l, v->dpde0_bytes_per_frame_ub_c, v->meta_pte_bytes_per_frame_ub_l, v->meta_pte_bytes_per_frame_ub_c, v->TimePerVMGroupVBlank, v->TimePerVMGroupFlip, v->TimePerVMRequestVBlank, v->TimePerVMRequestFlip); // Min TTUVBlank for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (PrefetchMode == 0) { v->AllowDRAMClockChangeDuringVBlank[k] = true; v->AllowDRAMSelfRefreshDuringVBlank[k] = true; v->MinTTUVBlank[k] = dml_max( v->DRAMClockChangeWatermark, dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); } else if (PrefetchMode == 1) { v->AllowDRAMClockChangeDuringVBlank[k] = false; v->AllowDRAMSelfRefreshDuringVBlank[k] = true; v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); } else { v->AllowDRAMClockChangeDuringVBlank[k] = false; v->AllowDRAMSelfRefreshDuringVBlank[k] = false; v->MinTTUVBlank[k] = v->UrgentWatermark; } if (!v->DynamicMetadataEnable[k]) v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; } // DCC Configuration v->ActiveDPPs = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, v->SourcePixelFormat[k], v->SurfaceWidthY[k], v->SurfaceWidthC[k], v->SurfaceHeightY[k], v->SurfaceHeightC[k], v->DETBufferSizeInKByte[k] * 1024, v->BlockHeight256BytesY[k], v->BlockHeight256BytesC[k], v->SurfaceTiling[k], v->BytePerPixelY[k], v->BytePerPixelC[k], v->BytePerPixelDETY[k], v->BytePerPixelDETC[k], v->SourceScan[k], &v->DCCYMaxUncompressedBlock[k], &v->DCCCMaxUncompressedBlock[k], &v->DCCYMaxCompressedBlock[k], &v->DCCCMaxCompressedBlock[k], &v->DCCYIndependentBlock[k], &v->DCCCIndependentBlock[k]); } // VStartup Adjustment for (k = 0; k < v->NumberOfActivePlanes; ++k) { bool isInterlaceTiming; double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); #endif v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); #endif v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; } isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k]) - v->VFrontPorch[k]) + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)) + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) <= (isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { v->VREADY_AT_OR_AFTER_VSYNC[k] = true; } else { v->VREADY_AT_OR_AFTER_VSYNC[k] = false; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); #endif } { //Maximum Bandwidth Used double TotalWRBandwidth = 0; double MaxPerPlaneVActiveWRBandwidth = 0; double WRBandwidth = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; } else if (v->WritebackEnable[k] == true) { WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; } TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); } v->TotalDataReadBandwidth = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; } } // Stutter Efficiency CalculateStutterEfficiency( mode_lib, v->CompressedBufferSizeInkByte, v->UnboundedRequestEnabled, v->ConfigReturnBufferSizeInKByte, v->MetaFIFOSizeInKEntries, v->ZeroSizeBufferEntries, v->NumberOfActivePlanes, v->ROBBufferSizeInKByte, v->TotalDataReadBandwidth, v->DCFCLK, v->ReturnBW, v->COMPBUF_RESERVED_SPACE_64B, v->COMPBUF_RESERVED_SPACE_ZS, v->SRExitTime, v->SRExitZ8Time, v->SynchronizedVBlank, v->StutterEnterPlusExitWatermark, v->Z8StutterEnterPlusExitWatermark, v->ProgressiveToInterlaceUnitInOPP, v->Interlace, v->MinTTUVBlank, v->DPPPerPlane, v->DETBufferSizeY, v->BytePerPixelY, v->BytePerPixelDETY, v->SwathWidthY, v->SwathHeightY, v->SwathHeightC, v->DCCRateLuma, v->DCCRateChroma, v->DCCFractionOfZeroSizeRequestsLuma, v->DCCFractionOfZeroSizeRequestsChroma, v->HTotal, v->VTotal, v->PixelClock, v->VRatio, v->SourceScan, v->BlockHeight256BytesY, v->BlockWidth256BytesY, v->BlockHeight256BytesC, v->BlockWidth256BytesC, v->DCCYMaxUncompressedBlock, v->DCCCMaxUncompressedBlock, v->VActive, v->DCCEnable, v->WritebackEnable, v->ReadBandwidthPlaneLuma, v->ReadBandwidthPlaneChroma, v->meta_row_bw, v->dpte_row_bw, &v->StutterEfficiencyNotIncludingVBlank, &v->StutterEfficiency, &v->NumberOfStutterBurstsPerFrame, &v->Z8StutterEfficiencyNotIncludingVBlank, &v->Z8StutterEfficiency, &v->Z8NumberOfStutterBurstsPerFrame, &v->StutterPeriod); } static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) { struct vba_vars_st *v = &mode_lib->vba; // Display Pipe Configuration double BytePerPixDETY[DC__NUM_DPP__MAX]; double BytePerPixDETC[DC__NUM_DPP__MAX]; int BytePerPixY[DC__NUM_DPP__MAX]; int BytePerPixC[DC__NUM_DPP__MAX]; int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; double dummy1[DC__NUM_DPP__MAX]; double dummy2[DC__NUM_DPP__MAX]; double dummy3[DC__NUM_DPP__MAX]; double dummy4[DC__NUM_DPP__MAX]; int dummy5[DC__NUM_DPP__MAX]; int dummy6[DC__NUM_DPP__MAX]; bool dummy7[DC__NUM_DPP__MAX]; bool dummysinglestring; unsigned int k; for (k = 0; k < v->NumberOfActivePlanes; ++k) { dml30_CalculateBytePerPixelAnd256BBlockSizes( v->SourcePixelFormat[k], v->SurfaceTiling[k], &BytePerPixY[k], &BytePerPixC[k], &BytePerPixDETY[k], &BytePerPixDETC[k], &Read256BytesBlockHeightY[k], &Read256BytesBlockHeightC[k], &Read256BytesBlockWidthY[k], &Read256BytesBlockWidthC[k]); } CalculateSwathAndDETConfiguration( false, v->NumberOfActivePlanes, mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], v->DETBufferSizeInKByte, dummy1, dummy2, v->SourceScan, v->SourcePixelFormat, v->SurfaceTiling, v->ViewportWidth, v->ViewportHeight, v->SurfaceWidthY, v->SurfaceWidthC, v->SurfaceHeightY, v->SurfaceHeightC, Read256BytesBlockHeightY, Read256BytesBlockHeightC, Read256BytesBlockWidthY, Read256BytesBlockWidthC, v->ODMCombineEnabled, v->BlendingAndTiming, BytePerPixY, BytePerPixC, BytePerPixDETY, BytePerPixDETC, v->HActive, v->HRatio, v->HRatioChroma, v->DPPPerPlane, dummy5, dummy6, dummy3, dummy4, v->SwathHeightY, v->SwathHeightC, v->DETBufferSizeY, v->DETBufferSizeC, dummy7, &dummysinglestring); } static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) { if (PrefetchMode == 0) { return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); } else if (PrefetchMode == 1) { return dml_max(SREnterPlusExitTime, UrgentLatency); } else { return UrgentLatency; } } double dml31_CalculateWriteBackDISPCLK( enum source_format_class WritebackPixelFormat, double PixelClock, double WritebackHRatio, double WritebackVRatio, unsigned int WritebackHTaps, unsigned int WritebackVTaps, long WritebackSourceWidth, long WritebackDestinationWidth, unsigned int HTotal, unsigned int WritebackLineBufferSize) { double DISPCLK_H, DISPCLK_V, DISPCLK_HB; DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); } static double CalculateWriteBackDelay( enum source_format_class WritebackPixelFormat, double WritebackHRatio, double WritebackVRatio, unsigned int WritebackVTaps, int WritebackDestinationWidth, int WritebackDestinationHeight, int WritebackSourceHeight, unsigned int HTotal) { double CalculateWriteBackDelay; double Line_length; double Output_lines_last_notclamped; double WritebackVInit; WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); if (Output_lines_last_notclamped < 0) { CalculateWriteBackDelay = 0; } else { CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; } return CalculateWriteBackDelay; } static void CalculateVupdateAndDynamicMetadataParameters( int MaxInterDCNTileRepeaters, double DPPCLK, double DISPCLK, double DCFClkDeepSleep, double PixelClock, int HTotal, int VBlank, int DynamicMetadataTransmittedBytes, int DynamicMetadataLinesBeforeActiveRequired, int InterlaceEnable, bool ProgressiveToInterlaceUnitInOPP, double *TSetup, double *Tdmbf, double *Tdmec, double *Tdmsks, int *VUpdateOffsetPix, double *VUpdateWidthPix, double *VReadyOffsetPix) { double TotalRepeaterDelayTime; TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; *Tdmec = HTotal / PixelClock; if (DynamicMetadataLinesBeforeActiveRequired == 0) { *Tdmsks = VBlank * HTotal / PixelClock / 2.0; } else { *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; } if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { *Tdmsks = *Tdmsks / 2; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); #endif } static void CalculateRowBandwidth( bool GPUVMEnable, enum source_format_class SourcePixelFormat, double VRatio, double VRatioChroma, bool DCCEnable, double LineTime, unsigned int MetaRowByteLuma, unsigned int MetaRowByteChroma, unsigned int meta_row_height_luma, unsigned int meta_row_height_chroma, unsigned int PixelPTEBytesPerRowLuma, unsigned int PixelPTEBytesPerRowChroma, unsigned int dpte_row_height_luma, unsigned int dpte_row_height_chroma, double *meta_row_bw, double *dpte_row_bw) { if (DCCEnable != true) { *meta_row_bw = 0; } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); } else { *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); } if (GPUVMEnable != true) { *dpte_row_bw = 0; } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); } else { *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); } } static void CalculateFlipSchedule( struct display_mode_lib *mode_lib, unsigned int k, double HostVMInefficiencyFactor, double UrgentExtraLatency, double UrgentLatency, double PDEAndMetaPTEBytesPerFrame, double MetaRowBytes, double DPTEBytesPerRow) { struct vba_vars_st *v = &mode_lib->vba; double min_row_time = 0.0; unsigned int HostVMDynamicLevelsTrips; double TimeForFetchingMetaPTEImmediateFlip; double TimeForFetchingRowInVBlankImmediateFlip; double ImmediateFlipBW = 1.0; double LineTime = v->HTotal[k] / v->PixelClock[k]; if (v->GPUVMEnable == true && v->HostVMEnable == true) { HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; } else { HostVMDynamicLevelsTrips = 0; } if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; } if (v->GPUVMEnable == true) { TimeForFetchingMetaPTEImmediateFlip = dml_max3( v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), LineTime / 4.0); } else { TimeForFetchingMetaPTEImmediateFlip = 0; } v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { TimeForFetchingRowInVBlankImmediateFlip = dml_max3( (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, UrgentLatency * (HostVMDynamicLevelsTrips + 1), LineTime / 4); } else { TimeForFetchingRowInVBlankImmediateFlip = 0; } v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; if (v->GPUVMEnable == true) { v->final_flip_bw[k] = dml_max( PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); } else { v->final_flip_bw[k] = 0; } if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); } else { min_row_time = dml_min4( v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); } } else { if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; } else { min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); } } if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { v->ImmediateFlipSupportedForPipe[k] = false; } else { v->ImmediateFlipSupportedForPipe[k] = true; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); #endif } static double TruncToValidBPP( double LinkBitRate, int Lanes, int HTotal, int HActive, double PixelClock, double DesiredBPP, bool DSCEnable, enum output_encoder_class Output, enum output_format_class Format, unsigned int DSCInputBitPerComponent, int DSCSlices, int AudioRate, int AudioLayout, enum odm_combine_mode ODMCombine) { double MaxLinkBPP; int MinDSCBPP; double MaxDSCBPP; int NonDSCBPP0; int NonDSCBPP1; int NonDSCBPP2; if (Format == dm_420) { NonDSCBPP0 = 12; NonDSCBPP1 = 15; NonDSCBPP2 = 18; MinDSCBPP = 6; MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1.0 / 16; } else if (Format == dm_444) { NonDSCBPP0 = 24; NonDSCBPP1 = 30; NonDSCBPP2 = 36; MinDSCBPP = 8; MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; } else { NonDSCBPP0 = 16; NonDSCBPP1 = 20; NonDSCBPP2 = 24; if (Format == dm_n422) { MinDSCBPP = 7; MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; } else { MinDSCBPP = 8; MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; } } if (DSCEnable && Output == dm_dp) { MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); } else { MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; } if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { MaxLinkBPP = 16; } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { MaxLinkBPP = 32; } if (DesiredBPP == 0) { if (DSCEnable) { if (MaxLinkBPP < MinDSCBPP) { return BPP_INVALID; } else if (MaxLinkBPP >= MaxDSCBPP) { return MaxDSCBPP; } else { return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; } } else { if (MaxLinkBPP >= NonDSCBPP2) { return NonDSCBPP2; } else if (MaxLinkBPP >= NonDSCBPP1) { return NonDSCBPP1; } else if (MaxLinkBPP >= NonDSCBPP0) { return 16.0; } else { return BPP_INVALID; } } } else { if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { return BPP_INVALID; } else { return DesiredBPP; } } } static noinline void CalculatePrefetchSchedulePerPlane( struct display_mode_lib *mode_lib, double HostVMInefficiencyFactor, int i, unsigned j, unsigned k) { struct vba_vars_st *v = &mode_lib->vba; Pipe myPipe; myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; myPipe.PixelClock = v->PixelClock[k]; myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; myPipe.ScalerEnabled = v->ScalerEnabled[k]; myPipe.VRatio = mode_lib->vba.VRatio[k]; myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; myPipe.SourceScan = v->SourceScan[k]; myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; myPipe.InterlaceEnable = v->Interlace[k]; myPipe.NumberOfCursors = v->NumberOfCursors[k]; myPipe.VBlank = v->VTotal[k] - v->VActive[k]; myPipe.HTotal = v->HTotal[k]; myPipe.DCCEnable = v->DCCEnable[k]; myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; myPipe.BytePerPixelY = v->BytePerPixelY[k]; myPipe.BytePerPixelC = v->BytePerPixelC[k]; myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( mode_lib, HostVMInefficiencyFactor, &myPipe, v->DSCDelayPerState[i][k], v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, v->DPPCLKDelaySCL, v->DPPCLKDelaySCLLBOnly, v->DPPCLKDelayCNVCCursor, v->DISPCLKDelaySubtotal, v->SwathWidthYThisState[k] / v->HRatio[k], v->OutputFormat[k], v->MaxInterDCNTileRepeaters, dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), v->MaximumVStartup[i][j][k], v->GPUVMMaxPageTableLevels, v->GPUVMEnable, v->HostVMEnable, v->HostVMMaxNonCachedPageTableLevels, v->HostVMMinPageSize, v->DynamicMetadataEnable[k], v->DynamicMetadataVMEnabled, v->DynamicMetadataLinesBeforeActiveRequired[k], v->DynamicMetadataTransmittedBytes[k], v->UrgLatency[i], v->ExtraLatency, v->TimeCalc, v->PDEAndMetaPTEBytesPerFrame[i][j][k], v->MetaRowBytes[i][j][k], v->DPTEBytesPerRow[i][j][k], v->PrefetchLinesY[i][j][k], v->SwathWidthYThisState[k], v->PrefillY[k], v->MaxNumSwY[k], v->PrefetchLinesC[i][j][k], v->SwathWidthCThisState[k], v->PrefillC[k], v->MaxNumSwC[k], v->swath_width_luma_ub_this_state[k], v->swath_width_chroma_ub_this_state[k], v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->TWait, &v->DSTXAfterScaler[k], &v->DSTYAfterScaler[k], &v->LineTimesForPrefetch[k], &v->PrefetchBW[k], &v->LinesForMetaPTE[k], &v->LinesForMetaAndDPTERow[k], &v->VRatioPreY[i][j][k], &v->VRatioPreC[i][j][k], &v->RequiredPrefetchPixelDataBWLuma[i][j][k], &v->RequiredPrefetchPixelDataBWChroma[i][j][k], &v->NoTimeForDynamicMetadata[i][j][k], &v->Tno_bw[k], &v->prefetch_vmrow_bw[k], &v->dummy7[k], &v->dummy8[k], &v->dummy13[k], &v->VUpdateOffsetPix[k], &v->VUpdateWidthPix[k], &v->VReadyOffsetPix[k]); } static void PatchDETBufferSizeInKByte(unsigned int NumberOfActivePlanes, int NoOfDPPThisState[], unsigned int config_return_buffer_size_in_kbytes, unsigned int DETBufferSizeInKByte[]) { int i, total_pipes = 0; for (i = 0; i < NumberOfActivePlanes; i++) total_pipes += NoOfDPPThisState[i]; DETBufferSizeInKByte[0] = ((config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB) / 64 / total_pipes) * 64; if (DETBufferSizeInKByte[0] > DCN3_15_MAX_DET_SIZE) DETBufferSizeInKByte[0] = DCN3_15_MAX_DET_SIZE; for (i = 1; i < NumberOfActivePlanes; i++) DETBufferSizeInKByte[i] = DETBufferSizeInKByte[0]; } void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) { struct vba_vars_st *v = &mode_lib->vba; int i, j; unsigned int k, m; int ReorderingBytes; int MinPrefetchMode = 0, MaxPrefetchMode = 2; bool NoChroma = true; bool EnoughWritebackUnits = true; bool P2IWith420 = false; bool DSCOnlyIfNecessaryWithBPP = false; bool DSC422NativeNotSupported = false; double MaxTotalVActiveRDBandwidth; bool ViewportExceedsSurface = false; bool FMTBufferExceeded = false; /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ CalculateMinAndMaxPrefetchMode( mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &MinPrefetchMode, &MaxPrefetchMode); /*Scale Ratio, taps Support Check*/ v->ScaleRatioAndTapsSupport = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->ScalerEnabled[k] == false && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { v->ScaleRatioAndTapsSupport = false; } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] || v->VRatio[k] > v->vtaps[k] || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) || v->HRatioChroma[k] > v->MaxHSCLRatio || v->VRatioChroma[k] > v->MaxVSCLRatio || v->HRatioChroma[k] > v->HTAPsChroma[k] || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { v->ScaleRatioAndTapsSupport = false; } } /*Source Format, Pixel Format and Scan Support Check*/ v->SourceFormatPixelAndScanSupport = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { v->SourceFormatPixelAndScanSupport = false; } } /*Bandwidth Support Check*/ for (k = 0; k < v->NumberOfActivePlanes; k++) { dml30_CalculateBytePerPixelAnd256BBlockSizes( v->SourcePixelFormat[k], v->SurfaceTiling[k], &v->BytePerPixelY[k], &v->BytePerPixelC[k], &v->BytePerPixelInDETY[k], &v->BytePerPixelInDETC[k], &v->Read256BlockHeightY[k], &v->Read256BlockHeightC[k], &v->Read256BlockWidthY[k], &v->Read256BlockWidthC[k]); } for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->SourceScan[k] != dm_vert) { v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; } else { v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; } } for (k = 0; k < v->NumberOfActivePlanes; k++) { v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; } for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; } else if (v->WritebackEnable[k] == true) { v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; } else { v->WriteBandwidth[k] = 0.0; } } /*Writeback Latency support check*/ v->WritebackLatencySupport = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { v->WritebackLatencySupport = false; } } /*Writeback Mode Support Check*/ v->TotalNumberOfActiveWriteback = 0; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->WritebackEnable[k] == true) { v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; } } if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { EnoughWritebackUnits = false; } /*Writeback Scale Ratio and Taps Support Check*/ v->WritebackScaleRatioAndTapsSupport = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->WritebackEnable[k] == true) { if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { v->WritebackScaleRatioAndTapsSupport = false; } if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { v->WritebackScaleRatioAndTapsSupport = false; } } } /*Maximum DISPCLK/DPPCLK Support check*/ v->WritebackRequiredDISPCLK = 0.0; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->WritebackEnable[k] == true) { v->WritebackRequiredDISPCLK = dml_max( v->WritebackRequiredDISPCLK, dml31_CalculateWriteBackDISPCLK( v->WritebackPixelFormat[k], v->PixelClock[k], v->WritebackHRatio[k], v->WritebackVRatio[k], v->WritebackHTaps[k], v->WritebackVTaps[k], v->WritebackSourceWidth[k], v->WritebackDestinationWidth[k], v->HTotal[k], v->WritebackLineBufferSize)); } } for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->HRatio[k] > 1.0) { v->PSCL_FACTOR[k] = dml_min( v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); } else { v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); } if (v->BytePerPixelC[k] == 0.0) { v->PSCL_FACTOR_CHROMA[k] = 0.0; v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max3( v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 1.0); if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; } } else { if (v->HRatioChroma[k] > 1.0) { v->PSCL_FACTOR_CHROMA[k] = dml_min( v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); } else { v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); } v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] * dml_max5( v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 1.0); if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; } } } for (k = 0; k < v->NumberOfActivePlanes; k++) { int MaximumSwathWidthSupportLuma; int MaximumSwathWidthSupportChroma; if (v->SurfaceTiling[k] == dm_sw_linear) { MaximumSwathWidthSupportLuma = 8192.0; } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { MaximumSwathWidthSupportLuma = 2880.0; } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { MaximumSwathWidthSupportLuma = 3840.0; } else { MaximumSwathWidthSupportLuma = 5760.0; } if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; } else { MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; } v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); if (v->BytePerPixelC[k] == 0.0) { v->MaximumSwathWidthInLineBufferChroma = 0; } else { v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); } v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); } CalculateSwathAndDETConfiguration( true, v->NumberOfActivePlanes, mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], v->DETBufferSizeInKByte, v->MaximumSwathWidthLuma, v->MaximumSwathWidthChroma, v->SourceScan, v->SourcePixelFormat, v->SurfaceTiling, v->ViewportWidth, v->ViewportHeight, v->SurfaceWidthY, v->SurfaceWidthC, v->SurfaceHeightY, v->SurfaceHeightC, v->Read256BlockHeightY, v->Read256BlockHeightC, v->Read256BlockWidthY, v->Read256BlockWidthC, v->odm_combine_dummy, v->BlendingAndTiming, v->BytePerPixelY, v->BytePerPixelC, v->BytePerPixelInDETY, v->BytePerPixelInDETC, v->HActive, v->HRatio, v->HRatioChroma, v->NoOfDPPThisState, v->swath_width_luma_ub_this_state, v->swath_width_chroma_ub_this_state, v->SwathWidthYThisState, v->SwathWidthCThisState, v->SwathHeightYThisState, v->SwathHeightCThisState, v->DETBufferSizeYThisState, v->DETBufferSizeCThisState, v->SingleDPPViewportSizeSupportPerPlane, &v->ViewportSizeSupport[0][0]); for (i = 0; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); v->RequiredDISPCLK[i][j] = 0.0; v->DISPCLK_DPPCLK_Support[i][j] = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1.0 + v->DISPCLKRampingMargin / 100.0); if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); } v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + v->DISPCLKRampingMargin / 100.0); if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); } v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1 + v->DISPCLKRampingMargin / 100.0); if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); } if (v->ODMCombinePolicy == dm_odm_combine_policy_none || !(v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0 || v->Output[k] == dm_edp)) { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) FMTBufferExceeded = true; } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; } else { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; } if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; } else { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; } } if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { if (v->Output[k] == dm_hdmi) { FMTBufferExceeded = true; } else if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH) FMTBufferExceeded = true; } else { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; } } if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { v->MPCCombine[i][j][k] = false; v->NoOfDPP[i][j][k] = 4; v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { v->MPCCombine[i][j][k] = false; v->NoOfDPP[i][j][k] = 2; v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; } else if ((v->WhenToDoMPCCombine == dm_mpc_never || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { v->MPCCombine[i][j][k] = false; v->NoOfDPP[i][j][k] = 1; v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); } else { v->MPCCombine[i][j][k] = true; v->NoOfDPP[i][j][k] = 2; v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; } v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { v->DISPCLK_DPPCLK_Support[i][j] = false; } if (mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[k] > DCN3_15_MAX_DET_SIZE && v->NoOfDPP[i][j][k] < 2) { v->MPCCombine[i][j][k] = true; v->NoOfDPP[i][j][k] = 2; } } v->TotalNumberOfActiveDPP[i][j] = 0; v->TotalNumberOfSingleDPPPlanes[i][j] = 0; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; if (v->NoOfDPP[i][j][k] == 1) v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) NoChroma = false; } // UPTO if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { double BWOfNonSplitPlaneOfMaximumBandwidth; unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; BWOfNonSplitPlaneOfMaximumBandwidth = 0; NumberOfNonSplitPlaneOfMaximumBandwidth = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; NumberOfNonSplitPlaneOfMaximumBandwidth = k; } } v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; } } if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { v->RequiredDISPCLK[i][j] = 0.0; v->DISPCLK_DPPCLK_Support[i][j] = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { v->MPCCombine[i][j][k] = true; v->NoOfDPP[i][j][k] = 2; v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; } else { v->MPCCombine[i][j][k] = false; v->NoOfDPP[i][j][k] = 1; v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); } if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) * (1.0 + v->DISPCLKRampingMargin / 100.0); } else { v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); } v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) > v->MaxDppclkRoundedDownToDFSGranularity) || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { v->DISPCLK_DPPCLK_Support[i][j] = false; } } v->TotalNumberOfActiveDPP[i][j] = 0.0; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; } } v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { v->DISPCLK_DPPCLK_Support[i][j] = false; } } } /*Total Available Pipes Support Check*/ for (i = 0; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { v->TotalAvailablePipesSupport[i][j] = true; } else { v->TotalAvailablePipesSupport[i][j] = false; } } } /*Display IO and DSC Support Check*/ v->NonsupportedDSCInputBPC = false; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { v->NonsupportedDSCInputBPC = true; } } /*Number Of DSC Slices*/ for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] == k) { if (v->PixelClockBackEnd[k] > 3200) { v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); } else if (v->PixelClockBackEnd[k] > 1360) { v->NumberOfDSCSlices[k] = 8; } else if (v->PixelClockBackEnd[k] > 680) { v->NumberOfDSCSlices[k] = 4; } else if (v->PixelClockBackEnd[k] > 340) { v->NumberOfDSCSlices[k] = 2; } else { v->NumberOfDSCSlices[k] = 1; } } else { v->NumberOfDSCSlices[k] = 0; } } for (i = 0; i < v->soc.num_states; i++) { for (k = 0; k < v->NumberOfActivePlanes; k++) { v->RequiresDSC[i][k] = false; v->RequiresFEC[i][k] = false; if (v->BlendingAndTiming[k] == k) { if (v->Output[k] == dm_hdmi) { v->RequiresDSC[i][k] = false; v->RequiresFEC[i][k] = false; v->OutputBppPerState[i][k] = TruncToValidBPP( dml_min(600.0, v->PHYCLKPerState[i]) * 10, 3, v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], false, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) { if (v->DSCEnable[k] == true) { v->RequiresDSC[i][k] = true; v->LinkDSCEnable = true; if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) { v->RequiresFEC[i][k] = true; } else { v->RequiresFEC[i][k] = false; } } else { v->RequiresDSC[i][k] = false; v->LinkDSCEnable = false; if (v->Output[k] == dm_dp2p0) { v->RequiresFEC[i][k] = true; } else { v->RequiresFEC[i][k] = false; } } if (v->Output[k] == dm_dp2p0) { v->Outbpp = BPP_INVALID; if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) && v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) { v->Outbpp = TruncToValidBPP( (1.0 - v->Downspreading / 100.0) * 10000, v->OutputLinkDPLanes[k], v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], v->LinkDSCEnable, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 && v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { v->RequiresDSC[i][k] = true; v->LinkDSCEnable = true; v->Outbpp = TruncToValidBPP( (1.0 - v->Downspreading / 100.0) * 10000, v->OutputLinkDPLanes[k], v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], v->LinkDSCEnable, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); } v->OutputBppPerState[i][k] = v->Outbpp; // TODO: Need some other way to handle this nonsense // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10" } if (v->Outbpp == BPP_INVALID && (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) && v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) { v->Outbpp = TruncToValidBPP( (1.0 - v->Downspreading / 100.0) * 13500, v->OutputLinkDPLanes[k], v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], v->LinkDSCEnable, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 && v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { v->RequiresDSC[i][k] = true; v->LinkDSCEnable = true; v->Outbpp = TruncToValidBPP( (1.0 - v->Downspreading / 100.0) * 13500, v->OutputLinkDPLanes[k], v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], v->LinkDSCEnable, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); } v->OutputBppPerState[i][k] = v->Outbpp; // TODO: Need some other way to handle this nonsense // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5" } if (v->Outbpp == BPP_INVALID && (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) && v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) { v->Outbpp = TruncToValidBPP( (1.0 - v->Downspreading / 100.0) * 20000, v->OutputLinkDPLanes[k], v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], v->LinkDSCEnable, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { v->RequiresDSC[i][k] = true; v->LinkDSCEnable = true; v->Outbpp = TruncToValidBPP( (1.0 - v->Downspreading / 100.0) * 20000, v->OutputLinkDPLanes[k], v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], v->LinkDSCEnable, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); } v->OutputBppPerState[i][k] = v->Outbpp; // TODO: Need some other way to handle this nonsense // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20" } } else { v->Outbpp = BPP_INVALID; if (v->PHYCLKPerState[i] >= 270.0) { v->Outbpp = TruncToValidBPP( (1.0 - v->Downspreading / 100.0) * 2700, v->OutputLinkDPLanes[k], v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], v->LinkDSCEnable, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); v->OutputBppPerState[i][k] = v->Outbpp; // TODO: Need some other way to handle this nonsense // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" } if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { v->Outbpp = TruncToValidBPP( (1.0 - v->Downspreading / 100.0) * 5400, v->OutputLinkDPLanes[k], v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], v->LinkDSCEnable, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); v->OutputBppPerState[i][k] = v->Outbpp; // TODO: Need some other way to handle this nonsense // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" } if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { v->Outbpp = TruncToValidBPP( (1.0 - v->Downspreading / 100.0) * 8100, v->OutputLinkDPLanes[k], v->HTotal[k], v->HActive[k], v->PixelClockBackEnd[k], v->ForcedOutputLinkBPP[k], v->LinkDSCEnable, v->Output[k], v->OutputFormat[k], v->DSCInputBitPerComponent[k], v->NumberOfDSCSlices[k], v->AudioSampleRate[k], v->AudioSampleLayout[k], v->ODMCombineEnablePerState[i][k]); v->OutputBppPerState[i][k] = v->Outbpp; // TODO: Need some other way to handle this nonsense // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" } } } } else { v->OutputBppPerState[i][k] = 0; } } } for (i = 0; i < v->soc.num_states; i++) { v->LinkCapacitySupport[i] = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { v->LinkCapacitySupport[i] = false; } } } // UPTO 2172 for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] == k && (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) { if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { P2IWith420 = true; } if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 && !v->DSC422NativeSupport) { DSC422NativeNotSupported = true; } } } for (i = 0; i < v->soc.num_states; ++i) { v->ODMCombine4To1SupportCheckOK[i] = true; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_hdmi)) { v->ODMCombine4To1SupportCheckOK[i] = false; } } } /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ for (i = 0; i < v->soc.num_states; i++) { v->NotEnoughDSCUnits[i] = false; v->TotalDSCUnitsRequired = 0.0; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->RequiresDSC[i][k] == true) { if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; } else { v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; } } } if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { v->NotEnoughDSCUnits[i] = true; } } /*DSC Delay per state*/ for (i = 0; i < v->soc.num_states; i++) { for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->OutputBppPerState[i][k] == BPP_INVALID) { v->BPP = 0.0; } else { v->BPP = v->OutputBppPerState[i][k]; } if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { v->DSCDelayPerState[i][k] = dscceComputeDelay( v->DSCInputBitPerComponent[k], v->BPP, dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), v->NumberOfDSCSlices[k], v->OutputFormat[k], v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { v->DSCDelayPerState[i][k] = 2.0 * (dscceComputeDelay( v->DSCInputBitPerComponent[k], v->BPP, dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), v->NumberOfDSCSlices[k] / 2, v->OutputFormat[k], v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); } else { v->DSCDelayPerState[i][k] = 4.0 * (dscceComputeDelay( v->DSCInputBitPerComponent[k], v->BPP, dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), v->NumberOfDSCSlices[k] / 4, v->OutputFormat[k], v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); } v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; } else { v->DSCDelayPerState[i][k] = 0.0; } } for (k = 0; k < v->NumberOfActivePlanes; k++) { for (m = 0; m < v->NumberOfActivePlanes; m++) { if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; } } } } //Calculate Swath, DET Configuration, DCFCLKDeepSleep // for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; } if (v->NumberOfActivePlanes > 1 && mode_lib->project == DML_PROJECT_DCN315 && !v->DETSizeOverride[0]) PatchDETBufferSizeInKByte(v->NumberOfActivePlanes, v->NoOfDPPThisState, v->ip.config_return_buffer_size_in_kbytes, v->DETBufferSizeInKByte); CalculateSwathAndDETConfiguration( false, v->NumberOfActivePlanes, mode_lib->project == DML_PROJECT_DCN315 && v->DETSizeOverride[0], v->DETBufferSizeInKByte, v->MaximumSwathWidthLuma, v->MaximumSwathWidthChroma, v->SourceScan, v->SourcePixelFormat, v->SurfaceTiling, v->ViewportWidth, v->ViewportHeight, v->SurfaceWidthY, v->SurfaceWidthC, v->SurfaceHeightY, v->SurfaceHeightC, v->Read256BlockHeightY, v->Read256BlockHeightC, v->Read256BlockWidthY, v->Read256BlockWidthC, v->ODMCombineEnableThisState, v->BlendingAndTiming, v->BytePerPixelY, v->BytePerPixelC, v->BytePerPixelInDETY, v->BytePerPixelInDETC, v->HActive, v->HRatio, v->HRatioChroma, v->NoOfDPPThisState, v->swath_width_luma_ub_this_state, v->swath_width_chroma_ub_this_state, v->SwathWidthYThisState, v->SwathWidthCThisState, v->SwathHeightYThisState, v->SwathHeightCThisState, v->DETBufferSizeYThisState, v->DETBufferSizeCThisState, v->dummystring, &v->ViewportSizeSupport[i][j]); CalculateDCFCLKDeepSleep( mode_lib, v->NumberOfActivePlanes, v->BytePerPixelY, v->BytePerPixelC, v->VRatio, v->VRatioChroma, v->SwathWidthYThisState, v->SwathWidthCThisState, v->NoOfDPPThisState, v->HRatio, v->HRatioChroma, v->PixelClock, v->PSCL_FACTOR, v->PSCL_FACTOR_CHROMA, v->RequiredDPPCLKThisState, v->ReadBandwidthLuma, v->ReadBandwidthChroma, v->ReturnBusWidth, &v->ProjectedDCFCLKDeepSleep[i][j]); for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; } } } for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; } for (i = 0; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; } v->TotalNumberOfDCCActiveDPP[i][j] = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->DCCEnable[k] == true) { v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; } } for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; } else { v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; } v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( mode_lib, v->DCCEnable[k], v->Read256BlockHeightC[k], v->Read256BlockWidthC[k], v->SourcePixelFormat[k], v->SurfaceTiling[k], v->BytePerPixelC[k], v->SourceScan[k], v->SwathWidthCThisState[k], v->ViewportHeightChroma[k], v->GPUVMEnable, v->HostVMEnable, v->HostVMMaxNonCachedPageTableLevels, v->GPUVMMinPageSize, v->HostVMMinPageSize, v->PTEBufferSizeInRequestsForChroma, v->PitchC[k], 0.0, &v->MacroTileWidthC[k], &v->MetaRowBytesC, &v->DPTEBytesPerRowC, &v->PTEBufferSizeNotExceededC[i][j][k], &v->dummyinteger7, &v->dpte_row_height_chroma[k], &v->dummyinteger28, &v->dummyinteger26, &v->dummyinteger23, &v->meta_row_height_chroma[k], &v->dummyinteger8, &v->dummyinteger9, &v->dummyinteger19, &v->dummyinteger20, &v->dummyinteger17, &v->dummyinteger10, &v->dummyinteger11); v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( mode_lib, v->VRatioChroma[k], v->VTAPsChroma[k], v->Interlace[k], v->ProgressiveToInterlaceUnitInOPP, v->SwathHeightCThisState[k], v->ViewportYStartC[k], &v->PrefillC[k], &v->MaxNumSwC[k]); } else { v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; v->PTEBufferSizeInRequestsForChroma = 0; v->PDEAndMetaPTEBytesPerFrameC = 0.0; v->MetaRowBytesC = 0.0; v->DPTEBytesPerRowC = 0.0; v->PrefetchLinesC[i][j][k] = 0.0; v->PTEBufferSizeNotExceededC[i][j][k] = true; } v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( mode_lib, v->DCCEnable[k], v->Read256BlockHeightY[k], v->Read256BlockWidthY[k], v->SourcePixelFormat[k], v->SurfaceTiling[k], v->BytePerPixelY[k], v->SourceScan[k], v->SwathWidthYThisState[k], v->ViewportHeight[k], v->GPUVMEnable, v->HostVMEnable, v->HostVMMaxNonCachedPageTableLevels, v->GPUVMMinPageSize, v->HostVMMinPageSize, v->PTEBufferSizeInRequestsForLuma, v->PitchY[k], v->DCCMetaPitchY[k], &v->MacroTileWidthY[k], &v->MetaRowBytesY, &v->DPTEBytesPerRowY, &v->PTEBufferSizeNotExceededY[i][j][k], &v->dummyinteger7, &v->dpte_row_height[k], &v->dummyinteger29, &v->dummyinteger27, &v->dummyinteger24, &v->meta_row_height[k], &v->dummyinteger25, &v->dpte_group_bytes[k], &v->dummyinteger21, &v->dummyinteger22, &v->dummyinteger18, &v->dummyinteger5, &v->dummyinteger6); v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( mode_lib, v->VRatio[k], v->vtaps[k], v->Interlace[k], v->ProgressiveToInterlaceUnitInOPP, v->SwathHeightYThisState[k], v->ViewportYStartY[k], &v->PrefillY[k], &v->MaxNumSwY[k]); v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; CalculateRowBandwidth( v->GPUVMEnable, v->SourcePixelFormat[k], v->VRatio[k], v->VRatioChroma[k], v->DCCEnable[k], v->HTotal[k] / v->PixelClock[k], v->MetaRowBytesY, v->MetaRowBytesC, v->meta_row_height[k], v->meta_row_height_chroma[k], v->DPTEBytesPerRowY, v->DPTEBytesPerRowC, v->dpte_row_height[k], v->dpte_row_height_chroma[k], &v->meta_row_bandwidth[i][j][k], &v->dpte_row_bandwidth[i][j][k]); } /*DCCMetaBufferSizeSupport(i, j) = True For k = 0 To NumberOfActivePlanes - 1 If MetaRowBytes(i, j, k) > 24064 Then DCCMetaBufferSizeSupport(i, j) = False End If Next k*/ v->DCCMetaBufferSizeSupport[i][j] = true; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->MetaRowBytes[i][j][k] > 24064) v->DCCMetaBufferSizeSupport[i][j] = false; } v->UrgLatency[i] = CalculateUrgentLatency( v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClockPerState[i]); for (k = 0; k < v->NumberOfActivePlanes; ++k) { CalculateUrgentBurstFactor( v->swath_width_luma_ub_this_state[k], v->swath_width_chroma_ub_this_state[k], v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->HTotal[k] / v->PixelClock[k], v->UrgLatency[i], v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], v->VRatio[k], v->VRatioChroma[k], v->BytePerPixelInDETY[k], v->BytePerPixelInDETC[k], v->DETBufferSizeYThisState[k], v->DETBufferSizeCThisState[k], &v->UrgentBurstFactorCursor[k], &v->UrgentBurstFactorLuma[k], &v->UrgentBurstFactorChroma[k], &NotUrgentLatencyHiding[k]); } v->NotEnoughUrgentLatencyHidingA[i][j] = false; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (NotUrgentLatencyHiding[k]) { v->NotEnoughUrgentLatencyHidingA[i][j] = true; } } for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; } v->TotalVActivePixelBandwidth[i][j] = 0; v->TotalVActiveCursorBandwidth[i][j] = 0; v->TotalMetaRowBandwidth[i][j] = 0; v->TotalDPTERowBandwidth[i][j] = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; } } } //Calculate Return BW for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->BlendingAndTiming[k] == k) { if (v->WritebackEnable[k] == true) { v->WritebackDelayTime[k] = v->WritebackLatency + CalculateWriteBackDelay( v->WritebackPixelFormat[k], v->WritebackHRatio[k], v->WritebackVRatio[k], v->WritebackVTaps[k], v->WritebackDestinationWidth[k], v->WritebackDestinationHeight[k], v->WritebackSourceHeight[k], v->HTotal[k]) / v->RequiredDISPCLK[i][j]; } else { v->WritebackDelayTime[k] = 0.0; } for (m = 0; m < v->NumberOfActivePlanes; m++) { if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { v->WritebackDelayTime[k] = dml_max( v->WritebackDelayTime[k], v->WritebackLatency + CalculateWriteBackDelay( v->WritebackPixelFormat[m], v->WritebackHRatio[m], v->WritebackVRatio[m], v->WritebackVTaps[m], v->WritebackDestinationWidth[m], v->WritebackDestinationHeight[m], v->WritebackSourceHeight[m], v->HTotal[m]) / v->RequiredDISPCLK[i][j]); } } } } for (k = 0; k < v->NumberOfActivePlanes; k++) { for (m = 0; m < v->NumberOfActivePlanes; m++) { if (v->BlendingAndTiming[k] == m) { v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; } } } v->MaxMaxVStartup[i][j] = 0; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->MaximumVStartup[i][j][k] = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : v->VTotal[k] - v->VActive[k] - dml_max( 1.0, dml_ceil( 1.0 * v->WritebackDelayTime[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)); if (v->MaximumVStartup[i][j][k] > 1023) v->MaximumVStartup[i][j][k] = 1023; v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); } } } ReorderingBytes = v->NumberOfChannels * dml_max3( v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, v->UrgentOutOfOrderReturnPerChannelVMDataOnly); for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; } } if (v->UseMinimumRequiredDCFCLK == true) UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { double IdealFabricAndSDPPortBandwidthPerState = dml_min( v->ReturnBusWidth * v->DCFCLKState[i][j], v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; double PixelDataOnlyReturnBWPerState = dml_min( IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); double PixelMixedWithVMDataReturnBWPerState = dml_min( IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); if (v->HostVMEnable != true) { v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; } else { v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; } } } //Re-ordering Buffer Support Check for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { v->ROBSupport[i][j] = true; } else { v->ROBSupport[i][j] = false; } } } //Vertical Active BW support check MaxTotalVActiveRDBandwidth = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; } for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( dml_min( v->ReturnBusWidth * v->DCFCLKState[i][j], v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { v->TotalVerticalActiveBandwidthSupport[i][j] = true; } else { v->TotalVerticalActiveBandwidthSupport[i][j] = false; } } } v->UrgentLatency = CalculateUrgentLatency( v->UrgentLatencyPixelDataOnly, v->UrgentLatencyPixelMixedWithVMData, v->UrgentLatencyVMDataOnly, v->DoUrgentLatencyAdjustment, v->UrgentLatencyAdjustmentFabricClockComponent, v->UrgentLatencyAdjustmentFabricClockReference, v->FabricClock); //Prefetch Check for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { double VMDataOnlyReturnBWPerState; double HostVMInefficiencyFactor = 1; int NextPrefetchModeState = MinPrefetchMode; bool UnboundedRequestEnabledThisState = false; int CompressedBufferSizeInkByteThisState = 0; double dummy; v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; v->BandwidthWithoutPrefetchSupported[i][j] = true; if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { v->BandwidthWithoutPrefetchSupported[i][j] = false; } for (k = 0; k < v->NumberOfActivePlanes; ++k) { v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; } VMDataOnlyReturnBWPerState = dml_min( dml_min( v->ReturnBusWidth * v->DCFCLKState[i][j], v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); if (v->GPUVMEnable && v->HostVMEnable) HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; v->ExtraLatency = CalculateExtraLatency( v->RoundTripPingLatencyCycles, ReorderingBytes, v->DCFCLKState[i][j], v->TotalNumberOfActiveDPP[i][j], v->PixelChunkSizeInKByte, v->TotalNumberOfDCCActiveDPP[i][j], v->MetaChunkSize, v->ReturnBWPerState[i][j], v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, v->NoOfDPPThisState, v->dpte_group_bytes, HostVMInefficiencyFactor, v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels); v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; do { v->PrefetchModePerState[i][j] = NextPrefetchModeState; v->MaxVStartup = v->NextMaxVStartup; v->TWait = CalculateTWait( v->PrefetchModePerState[i][j], v->DRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); for (k = 0; k < v->NumberOfActivePlanes; k++) { CalculatePrefetchSchedulePerPlane(mode_lib, HostVMInefficiencyFactor, i, j, k); } for (k = 0; k < v->NumberOfActivePlanes; k++) { CalculateUrgentBurstFactor( v->swath_width_luma_ub_this_state[k], v->swath_width_chroma_ub_this_state[k], v->SwathHeightYThisState[k], v->SwathHeightCThisState[k], v->HTotal[k] / v->PixelClock[k], v->UrgLatency[i], v->CursorBufferSize, v->CursorWidth[k][0], v->CursorBPP[k][0], v->VRatioPreY[i][j][k], v->VRatioPreC[i][j][k], v->BytePerPixelInDETY[k], v->BytePerPixelInDETC[k], v->DETBufferSizeYThisState[k], v->DETBufferSizeCThisState[k], &v->UrgentBurstFactorCursorPre[k], &v->UrgentBurstFactorLumaPre[k], &v->UrgentBurstFactorChromaPre[k], &v->NotUrgentLatencyHidingPre[k]); } v->MaximumReadBandwidthWithPrefetch = 0.0; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; v->MaximumReadBandwidthWithPrefetch = v->MaximumReadBandwidthWithPrefetch + dml_max3( v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k] + v->NoOfDPP[i][j][k] * (v->meta_row_bandwidth[i][j][k] + v->dpte_row_bandwidth[i][j][k]), v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], v->NoOfDPP[i][j][k] * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k] + v->RequiredPrefetchPixelDataBWChroma[i][j][k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); } v->NotEnoughUrgentLatencyHidingPre = false; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->NotUrgentLatencyHidingPre[k] == true) { v->NotEnoughUrgentLatencyHidingPre = true; } } v->PrefetchSupported[i][j] = true; if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingPre == 1) { v->PrefetchSupported[i][j] = false; } for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 || v->NoTimeForPrefetch[i][j][k] == true) { v->PrefetchSupported[i][j] = false; } } v->DynamicMetadataSupported[i][j] = true; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->NoTimeForDynamicMetadata[i][j][k] == true) { v->DynamicMetadataSupported[i][j] = false; } } v->VRatioInPrefetchSupported[i][j] = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { v->VRatioInPrefetchSupported[i][j] = false; } } v->AnyLinesForVMOrRowTooLarge = false; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { v->AnyLinesForVMOrRowTooLarge = true; } } v->NextPrefetchMode = v->NextPrefetchMode + 1; if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip - dml_max( v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], v->NoOfDPP[i][j][k] * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k] + v->RequiredPrefetchPixelDataBWChroma[i][j][k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); } v->TotImmediateFlipBytes = 0.0; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->TotImmediateFlipBytes = v->TotImmediateFlipBytes + v->NoOfDPP[i][j][k] * (v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] + v->DPTEBytesPerRow[i][j][k]); } for (k = 0; k < v->NumberOfActivePlanes; k++) { CalculateFlipSchedule( mode_lib, k, HostVMInefficiencyFactor, v->ExtraLatency, v->UrgLatency[i], v->PDEAndMetaPTEBytesPerFrame[i][j][k], v->MetaRowBytes[i][j][k], v->DPTEBytesPerRow[i][j][k]); } v->total_dcn_read_bw_with_flip = 0.0; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip + dml_max3( v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], v->NoOfDPP[i][j][k] * (v->final_flip_bw[k] + v->RequiredPrefetchPixelDataBWLuma[i][j][k] * v->UrgentBurstFactorLumaPre[k] + v->RequiredPrefetchPixelDataBWChroma[i][j][k] * v->UrgentBurstFactorChromaPre[k]) + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); } v->ImmediateFlipSupportedForState[i][j] = true; if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { v->ImmediateFlipSupportedForState[i][j] = false; } for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->ImmediateFlipSupportedForPipe[k] == false) { v->ImmediateFlipSupportedForState[i][j] = false; } } } else { v->ImmediateFlipSupportedForState[i][j] = false; } if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; NextPrefetchModeState = NextPrefetchModeState + 1; } else { v->NextMaxVStartup = v->NextMaxVStartup - 1; } v->NextPrefetchMode = v->NextPrefetchMode + 1; } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || v->ImmediateFlipSupportedForState[i][j] == true)) || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); CalculateUnboundedRequestAndCompressedBufferSize( v->DETBufferSizeInKByte[0], v->ConfigReturnBufferSizeInKByte, v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->MaxNumDPP, v->CompressedBufferSegmentSizeInkByte, v->Output, &UnboundedRequestEnabledThisState, &CompressedBufferSizeInkByteThisState); CalculateWatermarksAndDRAMSpeedChangeSupport( mode_lib, v->PrefetchModePerState[i][j], v->DCFCLKState[i][j], v->ReturnBWPerState[i][j], v->UrgLatency[i], v->ExtraLatency, v->SOCCLKPerState[i], v->ProjectedDCFCLKDeepSleep[i][j], v->DETBufferSizeYThisState, v->DETBufferSizeCThisState, v->SwathHeightYThisState, v->SwathHeightCThisState, v->SwathWidthYThisState, v->SwathWidthCThisState, v->NoOfDPPThisState, v->BytePerPixelInDETY, v->BytePerPixelInDETC, UnboundedRequestEnabledThisState, CompressedBufferSizeInkByteThisState, &v->DRAMClockChangeSupport[i][j], &dummy, &dummy, &dummy, &dummy); } } /*PTE Buffer Size Check*/ for (i = 0; i < v->soc.num_states; i++) { for (j = 0; j < 2; j++) { v->PTEBufferSizeNotExceeded[i][j] = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { v->PTEBufferSizeNotExceeded[i][j] = false; } } } } /*Cursor Support Check*/ v->CursorSupport = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->CursorWidth[k][0] > 0.0) { if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { v->CursorSupport = false; } } } /*Valid Pitch Check*/ v->PitchSupport = true; for (k = 0; k < v->NumberOfActivePlanes; k++) { v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); if (v->DCCEnable[k] == true) { v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); } else { v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; } if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe && v->SourcePixelFormat[k] != dm_mono_8) { v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); if (v->DCCEnable[k] == true) { v->AlignedDCCMetaPitchC[k] = dml_ceil( dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 64.0 * v->Read256BlockWidthC[k]); } else { v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; } } else { v->AlignedCPitch[k] = v->PitchC[k]; v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; } if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { v->PitchSupport = false; } } for (k = 0; k < v->NumberOfActivePlanes; k++) { if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { ViewportExceedsSurface = true; if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 && v->SourcePixelFormat[k] != dm_rgbe) { if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { ViewportExceedsSurface = true; } } } } /*Mode Support, Voltage State and SOC Configuration*/ for (i = v->soc.num_states - 1; i >= 0; i--) { for (j = 0; j < 2; j++) { if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false && v->DTBCLKRequiredMoreThanSupported[i] == false && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false && ((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || v->ImmediateFlipSupportedForState[i][j] == true) && FMTBufferExceeded == false) { v->ModeSupport[i][j] = true; } else { v->ModeSupport[i][j] = false; #ifdef __DML_VBA_DEBUG__ if (v->ScaleRatioAndTapsSupport == false) dml_print("DML SUPPORT: ScaleRatioAndTapsSupport failed"); if (v->SourceFormatPixelAndScanSupport == false) dml_print("DML SUPPORT: SourceFormatPixelAndScanSupport failed"); if (v->ViewportSizeSupport[i][j] == false) dml_print("DML SUPPORT: ViewportSizeSupport failed"); if (v->LinkCapacitySupport[i] == false) dml_print("DML SUPPORT: LinkCapacitySupport failed"); if (v->ODMCombine4To1SupportCheckOK[i] == false) dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); if (v->NotEnoughDSCUnits[i] == true) dml_print("DML SUPPORT: NotEnoughDSCUnits"); if (v->DTBCLKRequiredMoreThanSupported[i] == true) dml_print("DML SUPPORT: DTBCLKRequiredMoreThanSupported"); if (v->ROBSupport[i][j] == false) dml_print("DML SUPPORT: ROBSupport failed"); if (v->DISPCLK_DPPCLK_Support[i][j] == false) dml_print("DML SUPPORT: DISPCLK_DPPCLK_Support failed"); if (v->TotalAvailablePipesSupport[i][j] == false) dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); if (EnoughWritebackUnits == false) dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); if (v->WritebackLatencySupport == false) dml_print("DML SUPPORT: WritebackLatencySupport failed"); if (v->WritebackScaleRatioAndTapsSupport == false) dml_print("DML SUPPORT: DSC422NativeNotSupported "); if (v->CursorSupport == false) dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); if (v->PitchSupport == false) dml_print("DML SUPPORT: PitchSupport failed"); if (ViewportExceedsSurface == true) dml_print("DML SUPPORT: ViewportExceedsSurface failed"); if (v->PrefetchSupported[i][j] == false) dml_print("DML SUPPORT: PrefetchSupported failed"); if (v->DynamicMetadataSupported[i][j] == false) dml_print("DML SUPPORT: DSC422NativeNotSupported failed"); if (v->TotalVerticalActiveBandwidthSupport[i][j] == false) dml_print("DML SUPPORT: TotalVerticalActiveBandwidthSupport failed"); if (v->VRatioInPrefetchSupported[i][j] == false) dml_print("DML SUPPORT: VRatioInPrefetchSupported failed"); if (v->PTEBufferSizeNotExceeded[i][j] == false) dml_print("DML SUPPORT: PTEBufferSizeNotExceeded failed"); if (v->NonsupportedDSCInputBPC == true) dml_print("DML SUPPORT: NonsupportedDSCInputBPC failed"); if (!((v->HostVMEnable == false && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || v->ImmediateFlipSupportedForState[i][j] == true)) dml_print("DML SUPPORT: ImmediateFlipRequirement failed"); if (FMTBufferExceeded == true) dml_print("DML SUPPORT: FMTBufferExceeded failed"); #endif } } } { unsigned int MaximumMPCCombine = 0; for (i = v->soc.num_states; i >= 0; i--) { if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { v->VoltageLevel = i; v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; if (v->ModeSupport[i][0] == true) { MaximumMPCCombine = 0; } else { MaximumMPCCombine = 1; } } } v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; } v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; v->maxMpcComb = MaximumMPCCombine; } } static void CalculateWatermarksAndDRAMSpeedChangeSupport( struct display_mode_lib *mode_lib, unsigned int PrefetchMode, double DCFCLK, double ReturnBW, double UrgentLatency, double ExtraLatency, double SOCCLK, double DCFCLKDeepSleep, unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], unsigned int SwathHeightY[], unsigned int SwathHeightC[], double SwathWidthY[], double SwathWidthC[], unsigned int DPPPerPlane[], double BytePerPixelDETY[], double BytePerPixelDETC[], bool UnboundedRequestEnabled, int unsigned CompressedBufferSizeInkByte, enum clock_change_support *DRAMClockChangeSupport, double *StutterExitWatermark, double *StutterEnterPlusExitWatermark, double *Z8StutterExitWatermark, double *Z8StutterEnterPlusExitWatermark) { struct vba_vars_st *v = &mode_lib->vba; double EffectiveLBLatencyHidingY; double EffectiveLBLatencyHidingC; double LinesInDETY[DC__NUM_DPP__MAX]; double LinesInDETC; unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; unsigned int LinesInDETCRoundedDownToSwath; double FullDETBufferingTimeY; double FullDETBufferingTimeC; double ActiveDRAMClockChangeLatencyMarginY; double ActiveDRAMClockChangeLatencyMarginC; double WritebackDRAMClockChangeLatencyMargin; double PlaneWithMinActiveDRAMClockChangeMargin; double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; double WritebackDRAMClockChangeLatencyHiding; double TotalPixelBW = 0.0; int k, j; v->UrgentWatermark = UrgentLatency + ExtraLatency; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); #endif v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); #endif v->TotalActiveWriteback = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->WritebackEnable[k] == true) { v->TotalActiveWriteback = v->TotalActiveWriteback + 1; } } if (v->TotalActiveWriteback <= 1) { v->WritebackUrgentWatermark = v->WritebackLatency; } else { v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } if (v->TotalActiveWriteback <= 1) { v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; } else { v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; } for (k = 0; k < v->NumberOfActivePlanes; ++k) { TotalPixelBW = TotalPixelBW + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) / (v->HTotal[k] / v->PixelClock[k]); } for (k = 0; k < v->NumberOfActivePlanes; ++k) { double EffectiveDETBufferSizeY = DETBufferSizeY[k]; v->LBLatencyHidingSourceLinesY = dml_min( (double) v->MaxLineBufferLines, dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); v->LBLatencyHidingSourceLinesC = dml_min( (double) v->MaxLineBufferLines, dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); if (UnboundedRequestEnabled) { EffectiveDETBufferSizeY = EffectiveDETBufferSizeY + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; } LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; if (BytePerPixelDETC[k] > 0) { LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; } else { LinesInDETC = 0; FullDETBufferingTimeC = 999999; } ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; if (v->NumberOfActivePlanes > 1) { ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; } if (BytePerPixelDETC[k] > 0) { ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; if (v->NumberOfActivePlanes > 1) { ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; } v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); } else { v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; } if (v->WritebackEnable[k] == true) { WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); if (v->WritebackPixelFormat[k] == dm_444_64) { WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; } WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); } } v->MinActiveDRAMClockChangeMargin = 999999; PlaneWithMinActiveDRAMClockChangeMargin = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; if (v->BlendingAndTiming[k] == k) { PlaneWithMinActiveDRAMClockChangeMargin = k; } else { for (j = 0; j < v->NumberOfActivePlanes; ++j) { if (v->BlendingAndTiming[k] == j) { PlaneWithMinActiveDRAMClockChangeMargin = j; } } } } } v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; } } v->TotalNumberOfActiveOTG = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] == k) { v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; } } if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { *DRAMClockChangeSupport = dm_dram_clock_change_vactive; } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { *DRAMClockChangeSupport = dm_dram_clock_change_vblank; } else { *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; } *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); #endif } static void CalculateDCFCLKDeepSleep( struct display_mode_lib *mode_lib, unsigned int NumberOfActivePlanes, int BytePerPixelY[], int BytePerPixelC[], double VRatio[], double VRatioChroma[], double SwathWidthY[], double SwathWidthC[], unsigned int DPPPerPlane[], double HRatio[], double HRatioChroma[], double PixelClock[], double PSCL_THROUGHPUT[], double PSCL_THROUGHPUT_CHROMA[], double DPPCLK[], double ReadBandwidthLuma[], double ReadBandwidthChroma[], int ReturnBusWidth, double *DCFCLKDeepSleep) { struct vba_vars_st *v = &mode_lib->vba; double DisplayPipeLineDeliveryTimeLuma; double DisplayPipeLineDeliveryTimeChroma; double ReadBandwidth = 0.0; int k; for (k = 0; k < NumberOfActivePlanes; ++k) { if (VRatio[k] <= 1) { DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; } else { DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; } if (BytePerPixelC[k] == 0) { DisplayPipeLineDeliveryTimeChroma = 0; } else { if (VRatioChroma[k] <= 1) { DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; } else { DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; } } if (BytePerPixelC[k] > 0) { v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); } else { v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; } v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); } for (k = 0; k < NumberOfActivePlanes; ++k) { ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; } *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); for (k = 0; k < NumberOfActivePlanes; ++k) { *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); } } static void CalculateUrgentBurstFactor( int swath_width_luma_ub, int swath_width_chroma_ub, unsigned int SwathHeightY, unsigned int SwathHeightC, double LineTime, double UrgentLatency, double CursorBufferSize, unsigned int CursorWidth, unsigned int CursorBPP, double VRatio, double VRatioC, double BytePerPixelInDETY, double BytePerPixelInDETC, double DETBufferSizeY, double DETBufferSizeC, double *UrgentBurstFactorCursor, double *UrgentBurstFactorLuma, double *UrgentBurstFactorChroma, bool *NotEnoughUrgentLatencyHiding) { double LinesInDETLuma; double LinesInDETChroma; unsigned int LinesInCursorBuffer; double CursorBufferSizeInTime; double DETBufferSizeInTimeLuma; double DETBufferSizeInTimeChroma; *NotEnoughUrgentLatencyHiding = 0; if (CursorWidth > 0) { LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); if (VRatio > 0) { CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; if (CursorBufferSizeInTime - UrgentLatency <= 0) { *NotEnoughUrgentLatencyHiding = 1; *UrgentBurstFactorCursor = 0; } else { *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); } } else { *UrgentBurstFactorCursor = 1; } } LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; if (VRatio > 0) { DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { *NotEnoughUrgentLatencyHiding = 1; *UrgentBurstFactorLuma = 0; } else { *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); } } else { *UrgentBurstFactorLuma = 1; } if (BytePerPixelInDETC > 0) { LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; if (VRatio > 0) { DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { *NotEnoughUrgentLatencyHiding = 1; *UrgentBurstFactorChroma = 0; } else { *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); } } else { *UrgentBurstFactorChroma = 1; } } } static void CalculatePixelDeliveryTimes( unsigned int NumberOfActivePlanes, double VRatio[], double VRatioChroma[], double VRatioPrefetchY[], double VRatioPrefetchC[], unsigned int swath_width_luma_ub[], unsigned int swath_width_chroma_ub[], unsigned int DPPPerPlane[], double HRatio[], double HRatioChroma[], double PixelClock[], double PSCL_THROUGHPUT[], double PSCL_THROUGHPUT_CHROMA[], double DPPCLK[], int BytePerPixelC[], enum scan_direction_class SourceScan[], unsigned int NumberOfCursors[], unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], unsigned int BlockWidth256BytesY[], unsigned int BlockHeight256BytesY[], unsigned int BlockWidth256BytesC[], unsigned int BlockHeight256BytesC[], double DisplayPipeLineDeliveryTimeLuma[], double DisplayPipeLineDeliveryTimeChroma[], double DisplayPipeLineDeliveryTimeLumaPrefetch[], double DisplayPipeLineDeliveryTimeChromaPrefetch[], double DisplayPipeRequestDeliveryTimeLuma[], double DisplayPipeRequestDeliveryTimeChroma[], double DisplayPipeRequestDeliveryTimeLumaPrefetch[], double DisplayPipeRequestDeliveryTimeChromaPrefetch[], double CursorRequestDeliveryTime[], double CursorRequestDeliveryTimePrefetch[]) { double req_per_swath_ub; int k; for (k = 0; k < NumberOfActivePlanes; ++k) { if (VRatio[k] <= 1) { DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; } else { DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; } if (BytePerPixelC[k] == 0) { DisplayPipeLineDeliveryTimeChroma[k] = 0; } else { if (VRatioChroma[k] <= 1) { DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; } else { DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; } } if (VRatioPrefetchY[k] <= 1) { DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; } else { DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; } if (BytePerPixelC[k] == 0) { DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; } else { if (VRatioPrefetchC[k] <= 1) { DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; } else { DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; } } } for (k = 0; k < NumberOfActivePlanes; ++k) { if (SourceScan[k] != dm_vert) { req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; } else { req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; } DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; if (BytePerPixelC[k] == 0) { DisplayPipeRequestDeliveryTimeChroma[k] = 0; DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; } else { if (SourceScan[k] != dm_vert) { req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; } else { req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; } DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); #endif } for (k = 0; k < NumberOfActivePlanes; ++k) { int cursor_req_per_width; cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); if (NumberOfCursors[k] > 0) { if (VRatio[k] <= 1) { CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; } else { CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; } if (VRatioPrefetchY[k] <= 1) { CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; } else { CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; } } else { CursorRequestDeliveryTime[k] = 0; CursorRequestDeliveryTimePrefetch[k] = 0; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); #endif } } static void CalculateMetaAndPTETimes( int NumberOfActivePlanes, bool GPUVMEnable, int MetaChunkSize, int MinMetaChunkSizeBytes, int HTotal[], double VRatio[], double VRatioChroma[], double DestinationLinesToRequestRowInVBlank[], double DestinationLinesToRequestRowInImmediateFlip[], bool DCCEnable[], double PixelClock[], int BytePerPixelY[], int BytePerPixelC[], enum scan_direction_class SourceScan[], int dpte_row_height[], int dpte_row_height_chroma[], int meta_row_width[], int meta_row_width_chroma[], int meta_row_height[], int meta_row_height_chroma[], int meta_req_width[], int meta_req_width_chroma[], int meta_req_height[], int meta_req_height_chroma[], int dpte_group_bytes[], int PTERequestSizeY[], int PTERequestSizeC[], int PixelPTEReqWidthY[], int PixelPTEReqHeightY[], int PixelPTEReqWidthC[], int PixelPTEReqHeightC[], int dpte_row_width_luma_ub[], int dpte_row_width_chroma_ub[], double DST_Y_PER_PTE_ROW_NOM_L[], double DST_Y_PER_PTE_ROW_NOM_C[], double DST_Y_PER_META_ROW_NOM_L[], double DST_Y_PER_META_ROW_NOM_C[], double TimePerMetaChunkNominal[], double TimePerChromaMetaChunkNominal[], double TimePerMetaChunkVBlank[], double TimePerChromaMetaChunkVBlank[], double TimePerMetaChunkFlip[], double TimePerChromaMetaChunkFlip[], double time_per_pte_group_nom_luma[], double time_per_pte_group_vblank_luma[], double time_per_pte_group_flip_luma[], double time_per_pte_group_nom_chroma[], double time_per_pte_group_vblank_chroma[], double time_per_pte_group_flip_chroma[]) { unsigned int meta_chunk_width; unsigned int min_meta_chunk_width; unsigned int meta_chunk_per_row_int; unsigned int meta_row_remainder; unsigned int meta_chunk_threshold; unsigned int meta_chunks_per_row_ub; unsigned int meta_chunk_width_chroma; unsigned int min_meta_chunk_width_chroma; unsigned int meta_chunk_per_row_int_chroma; unsigned int meta_row_remainder_chroma; unsigned int meta_chunk_threshold_chroma; unsigned int meta_chunks_per_row_ub_chroma; unsigned int dpte_group_width_luma; unsigned int dpte_groups_per_row_luma_ub; unsigned int dpte_group_width_chroma; unsigned int dpte_groups_per_row_chroma_ub; int k; for (k = 0; k < NumberOfActivePlanes; ++k) { DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; if (BytePerPixelC[k] == 0) { DST_Y_PER_PTE_ROW_NOM_C[k] = 0; } else { DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; } DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; if (BytePerPixelC[k] == 0) { DST_Y_PER_META_ROW_NOM_C[k] = 0; } else { DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; } } for (k = 0; k < NumberOfActivePlanes; ++k) { if (DCCEnable[k] == true) { meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; meta_row_remainder = meta_row_width[k] % meta_chunk_width; if (SourceScan[k] != dm_vert) { meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; } else { meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; } if (meta_row_remainder <= meta_chunk_threshold) { meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; } else { meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; } TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; if (BytePerPixelC[k] == 0) { TimePerChromaMetaChunkNominal[k] = 0; TimePerChromaMetaChunkVBlank[k] = 0; TimePerChromaMetaChunkFlip[k] = 0; } else { meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; if (SourceScan[k] != dm_vert) { meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; } else { meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; } if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; } else { meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; } TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; } } else { TimePerMetaChunkNominal[k] = 0; TimePerMetaChunkVBlank[k] = 0; TimePerMetaChunkFlip[k] = 0; TimePerChromaMetaChunkNominal[k] = 0; TimePerChromaMetaChunkVBlank[k] = 0; TimePerChromaMetaChunkFlip[k] = 0; } } for (k = 0; k < NumberOfActivePlanes; ++k) { if (GPUVMEnable == true) { if (SourceScan[k] != dm_vert) { dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; } else { dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; } dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; if (BytePerPixelC[k] == 0) { time_per_pte_group_nom_chroma[k] = 0; time_per_pte_group_vblank_chroma[k] = 0; time_per_pte_group_flip_chroma[k] = 0; } else { if (SourceScan[k] != dm_vert) { dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; } else { dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; } dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; } } else { time_per_pte_group_nom_luma[k] = 0; time_per_pte_group_vblank_luma[k] = 0; time_per_pte_group_flip_luma[k] = 0; time_per_pte_group_nom_chroma[k] = 0; time_per_pte_group_vblank_chroma[k] = 0; time_per_pte_group_flip_chroma[k] = 0; } } } static void CalculateVMGroupAndRequestTimes( unsigned int NumberOfActivePlanes, bool GPUVMEnable, unsigned int GPUVMMaxPageTableLevels, unsigned int HTotal[], int BytePerPixelC[], double DestinationLinesToRequestVMInVBlank[], double DestinationLinesToRequestVMInImmediateFlip[], bool DCCEnable[], double PixelClock[], int dpte_row_width_luma_ub[], int dpte_row_width_chroma_ub[], int vm_group_bytes[], unsigned int dpde0_bytes_per_frame_ub_l[], unsigned int dpde0_bytes_per_frame_ub_c[], int meta_pte_bytes_per_frame_ub_l[], int meta_pte_bytes_per_frame_ub_c[], double TimePerVMGroupVBlank[], double TimePerVMGroupFlip[], double TimePerVMRequestVBlank[], double TimePerVMRequestFlip[]) { int num_group_per_lower_vm_stage; int num_req_per_lower_vm_stage; int k; for (k = 0; k < NumberOfActivePlanes; ++k) { if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { if (DCCEnable[k] == false) { if (BytePerPixelC[k] > 0) { num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); } else { num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); } } else { if (GPUVMMaxPageTableLevels == 1) { if (BytePerPixelC[k] > 0) { num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); } else { num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); } } else { if (BytePerPixelC[k] > 0) { num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); } else { num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); } } } if (DCCEnable[k] == false) { if (BytePerPixelC[k] > 0) { num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; } else { num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; } } else { if (GPUVMMaxPageTableLevels == 1) { if (BytePerPixelC[k] > 0) { num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; } else { num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; } } else { if (BytePerPixelC[k] > 0) { num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; } else { num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; } } } TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; if (GPUVMMaxPageTableLevels > 2) { TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; } } else { TimePerVMGroupVBlank[k] = 0; TimePerVMGroupFlip[k] = 0; TimePerVMRequestVBlank[k] = 0; TimePerVMRequestFlip[k] = 0; } } } static void CalculateStutterEfficiency( struct display_mode_lib *mode_lib, int CompressedBufferSizeInkByte, bool UnboundedRequestEnabled, int ConfigReturnBufferSizeInKByte, int MetaFIFOSizeInKEntries, int ZeroSizeBufferEntries, int NumberOfActivePlanes, int ROBBufferSizeInKByte, double TotalDataReadBandwidth, double DCFCLK, double ReturnBW, double COMPBUF_RESERVED_SPACE_64B, double COMPBUF_RESERVED_SPACE_ZS, double SRExitTime, double SRExitZ8Time, bool SynchronizedVBlank, double Z8StutterEnterPlusExitWatermark, double StutterEnterPlusExitWatermark, bool ProgressiveToInterlaceUnitInOPP, bool Interlace[], double MinTTUVBlank[], int DPPPerPlane[], unsigned int DETBufferSizeY[], int BytePerPixelY[], double BytePerPixelDETY[], double SwathWidthY[], int SwathHeightY[], int SwathHeightC[], double NetDCCRateLuma[], double NetDCCRateChroma[], double DCCFractionOfZeroSizeRequestsLuma[], double DCCFractionOfZeroSizeRequestsChroma[], int HTotal[], int VTotal[], double PixelClock[], double VRatio[], enum scan_direction_class SourceScan[], int BlockHeight256BytesY[], int BlockWidth256BytesY[], int BlockHeight256BytesC[], int BlockWidth256BytesC[], int DCCYMaxUncompressedBlock[], int DCCCMaxUncompressedBlock[], int VActive[], bool DCCEnable[], bool WritebackEnable[], double ReadBandwidthPlaneLuma[], double ReadBandwidthPlaneChroma[], double meta_row_bw[], double dpte_row_bw[], double *StutterEfficiencyNotIncludingVBlank, double *StutterEfficiency, int *NumberOfStutterBurstsPerFrame, double *Z8StutterEfficiencyNotIncludingVBlank, double *Z8StutterEfficiency, int *Z8NumberOfStutterBurstsPerFrame, double *StutterPeriod) { struct vba_vars_st *v = &mode_lib->vba; double DETBufferingTimeY; double SwathWidthYCriticalPlane = 0; double VActiveTimeCriticalPlane = 0; double FrameTimeCriticalPlane = 0; int BytePerPixelYCriticalPlane = 0; double LinesToFinishSwathTransferStutterCriticalPlane = 0; double MinTTUVBlankCriticalPlane = 0; double TotalCompressedReadBandwidth; double TotalRowReadBandwidth; double AverageDCCCompressionRate; double EffectiveCompressedBufferSize; double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; double StutterBurstTime; int TotalActiveWriteback; double LinesInDETY; double LinesInDETYRoundedDownToSwath; double MaximumEffectiveCompressionLuma; double MaximumEffectiveCompressionChroma; double TotalZeroSizeRequestReadBandwidth; double TotalZeroSizeCompressedReadBandwidth; double AverageDCCZeroSizeFraction; double AverageZeroSizeCompressionRate; int TotalNumberOfActiveOTG = 0; double LastStutterPeriod = 0.0; double LastZ8StutterPeriod = 0.0; int k; TotalZeroSizeRequestReadBandwidth = 0; TotalZeroSizeCompressedReadBandwidth = 0; TotalRowReadBandwidth = 0; TotalCompressedReadBandwidth = 0; for (k = 0; k < NumberOfActivePlanes; ++k) { if (DCCEnable[k] == true) { if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) || DCCYMaxUncompressedBlock[k] < 256) { MaximumEffectiveCompressionLuma = 2; } else { MaximumEffectiveCompressionLuma = 4; } TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; if (ReadBandwidthPlaneChroma[k] > 0) { if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { MaximumEffectiveCompressionChroma = 2; } else { MaximumEffectiveCompressionChroma = 4; } TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; } } else { TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; } TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); } AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); #endif if (AverageDCCZeroSizeFraction == 1) { AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; } else if (AverageDCCZeroSizeFraction > 0) { AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; EffectiveCompressedBufferSize = dml_min( CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); dml_print( "DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); } else { EffectiveCompressedBufferSize = dml_min( CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); #endif *StutterPeriod = 0; for (k = 0; k < NumberOfActivePlanes; ++k) { LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) / BytePerPixelDETY[k] / SwathWidthY[k]; LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); #endif if (k == 0 || DETBufferingTimeY < *StutterPeriod) { bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; *StutterPeriod = DETBufferingTimeY; FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; BytePerPixelYCriticalPlane = BytePerPixelY[k]; SwathWidthYCriticalPlane = SwathWidthY[k]; LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); #endif } } PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); #endif StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); #endif StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); dml_print( "DML::%s: Time to finish residue swath=%f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); TotalActiveWriteback = 0; for (k = 0; k < NumberOfActivePlanes; ++k) { if (WritebackEnable[k]) { TotalActiveWriteback = TotalActiveWriteback + 1; } } if (TotalActiveWriteback == 0) { #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); #endif *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); } else { *StutterEfficiencyNotIncludingVBlank = 0.; *Z8StutterEfficiencyNotIncludingVBlank = 0.; *NumberOfStutterBurstsPerFrame = 0; *Z8NumberOfStutterBurstsPerFrame = 0; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); #endif for (k = 0; k < NumberOfActivePlanes; ++k) { if (v->BlendingAndTiming[k] == k) { TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; } } if (*StutterEfficiencyNotIncludingVBlank > 0) { LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane / *StutterPeriod) / FrameTimeCriticalPlane) * 100; } else { *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; } } else { *StutterEfficiency = 0; } if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane / *StutterPeriod) / FrameTimeCriticalPlane) * 100; } else { *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; } } else { *Z8StutterEfficiency = 0.; } dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); } static void CalculateSwathAndDETConfiguration( bool ForceSingleDPP, int NumberOfActivePlanes, bool DETSharedByAllDPP, unsigned int DETBufferSizeInKByteA[], double MaximumSwathWidthLuma[], double MaximumSwathWidthChroma[], enum scan_direction_class SourceScan[], enum source_format_class SourcePixelFormat[], enum dm_swizzle_mode SurfaceTiling[], int ViewportWidth[], int ViewportHeight[], int SurfaceWidthY[], int SurfaceWidthC[], int SurfaceHeightY[], int SurfaceHeightC[], int Read256BytesBlockHeightY[], int Read256BytesBlockHeightC[], int Read256BytesBlockWidthY[], int Read256BytesBlockWidthC[], enum odm_combine_mode ODMCombineEnabled[], int BlendingAndTiming[], int BytePerPixY[], int BytePerPixC[], double BytePerPixDETY[], double BytePerPixDETC[], int HActive[], double HRatio[], double HRatioChroma[], int DPPPerPlane[], int swath_width_luma_ub[], int swath_width_chroma_ub[], double SwathWidth[], double SwathWidthChroma[], int SwathHeightY[], int SwathHeightC[], unsigned int DETBufferSizeY[], unsigned int DETBufferSizeC[], bool ViewportSizeSupportPerPlane[], bool *ViewportSizeSupport) { int MaximumSwathHeightY[DC__NUM_DPP__MAX]; int MaximumSwathHeightC[DC__NUM_DPP__MAX]; int MinimumSwathHeightY; int MinimumSwathHeightC; int RoundedUpMaxSwathSizeBytesY; int RoundedUpMaxSwathSizeBytesC; int RoundedUpMinSwathSizeBytesY; int RoundedUpMinSwathSizeBytesC; int RoundedUpSwathSizeBytesY; int RoundedUpSwathSizeBytesC; double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; int k; CalculateSwathWidth( ForceSingleDPP, NumberOfActivePlanes, SourcePixelFormat, SourceScan, ViewportWidth, ViewportHeight, SurfaceWidthY, SurfaceWidthC, SurfaceHeightY, SurfaceHeightC, ODMCombineEnabled, BytePerPixY, BytePerPixC, Read256BytesBlockHeightY, Read256BytesBlockHeightC, Read256BytesBlockWidthY, Read256BytesBlockWidthC, BlendingAndTiming, HActive, HRatio, DPPPerPlane, SwathWidthSingleDPP, SwathWidthSingleDPPChroma, SwathWidth, SwathWidthChroma, MaximumSwathHeightY, MaximumSwathHeightC, swath_width_luma_ub, swath_width_chroma_ub); *ViewportSizeSupport = true; for (k = 0; k < NumberOfActivePlanes; ++k) { unsigned int DETBufferSizeInKByte = DETBufferSizeInKByteA[k]; if (DETSharedByAllDPP && DPPPerPlane[k]) DETBufferSizeInKByte /= DPPPerPlane[k]; if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { if (SurfaceTiling[k] == dm_sw_linear || (SourcePixelFormat[k] == dm_444_64 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) && SourceScan[k] != dm_vert)) { MinimumSwathHeightY = MaximumSwathHeightY[k]; } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { MinimumSwathHeightY = MaximumSwathHeightY[k]; } else { MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; } MinimumSwathHeightC = MaximumSwathHeightC[k]; } else { if (SurfaceTiling[k] == dm_sw_linear) { MinimumSwathHeightY = MaximumSwathHeightY[k]; MinimumSwathHeightC = MaximumSwathHeightC[k]; } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; MinimumSwathHeightC = MaximumSwathHeightC[k]; } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { MinimumSwathHeightY = MaximumSwathHeightY[k]; MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; } else { MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; } } RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; if (SourcePixelFormat[k] == dm_420_10) { RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); } RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; if (SourcePixelFormat[k] == dm_420_10) { RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); } if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { SwathHeightY[k] = MaximumSwathHeightY[k]; SwathHeightC[k] = MaximumSwathHeightC[k]; RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { SwathHeightY[k] = MinimumSwathHeightY; SwathHeightC[k] = MaximumSwathHeightC[k]; RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { SwathHeightY[k] = MaximumSwathHeightY[k]; SwathHeightC[k] = MinimumSwathHeightC; RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; } else { SwathHeightY[k] = MinimumSwathHeightY; SwathHeightC[k] = MinimumSwathHeightC; RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; } { double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); if (SwathHeightC[k] == 0) { DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; DETBufferSizeC[k] = 0; } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; } else { DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; } if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { *ViewportSizeSupport = false; ViewportSizeSupportPerPlane[k] = false; } else { ViewportSizeSupportPerPlane[k] = true; } } } } static void CalculateSwathWidth( bool ForceSingleDPP, int NumberOfActivePlanes, enum source_format_class SourcePixelFormat[], enum scan_direction_class SourceScan[], int ViewportWidth[], int ViewportHeight[], int SurfaceWidthY[], int SurfaceWidthC[], int SurfaceHeightY[], int SurfaceHeightC[], enum odm_combine_mode ODMCombineEnabled[], int BytePerPixY[], int BytePerPixC[], int Read256BytesBlockHeightY[], int Read256BytesBlockHeightC[], int Read256BytesBlockWidthY[], int Read256BytesBlockWidthC[], int BlendingAndTiming[], int HActive[], double HRatio[], int DPPPerPlane[], double SwathWidthSingleDPPY[], double SwathWidthSingleDPPC[], double SwathWidthY[], double SwathWidthC[], int MaximumSwathHeightY[], int MaximumSwathHeightC[], int swath_width_luma_ub[], int swath_width_chroma_ub[]) { enum odm_combine_mode MainPlaneODMCombine; int j, k; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); #endif for (k = 0; k < NumberOfActivePlanes; ++k) { if (SourceScan[k] != dm_vert) { SwathWidthSingleDPPY[k] = ViewportWidth[k]; } else { SwathWidthSingleDPPY[k] = ViewportHeight[k]; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); #endif MainPlaneODMCombine = ODMCombineEnabled[k]; for (j = 0; j < NumberOfActivePlanes; ++j) { if (BlendingAndTiming[k] == j) { MainPlaneODMCombine = ODMCombineEnabled[j]; } } if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) { SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) { SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); } else if (DPPPerPlane[k] == 2) { SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; } else { SwathWidthY[k] = SwathWidthSingleDPPY[k]; } #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); #endif if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { SwathWidthC[k] = SwathWidthY[k] / 2; SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; } else { SwathWidthC[k] = SwathWidthY[k]; SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; } if (ForceSingleDPP == true) { SwathWidthY[k] = SwathWidthSingleDPPY[k]; SwathWidthC[k] = SwathWidthSingleDPPC[k]; } { int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); #endif if (SourceScan[k] != dm_vert) { MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); if (BytePerPixC[k] > 0) { int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); swath_width_chroma_ub[k] = dml_min( surface_width_ub_c, (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); } else { swath_width_chroma_ub[k] = 0; } } else { MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); if (BytePerPixC[k] > 0) { int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); swath_width_chroma_ub[k] = dml_min( surface_height_ub_c, (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); } else { swath_width_chroma_ub[k] = 0; } } } } } static double CalculateExtraLatency( int RoundTripPingLatencyCycles, int ReorderingBytes, double DCFCLK, int TotalNumberOfActiveDPP, int PixelChunkSizeInKByte, int TotalNumberOfDCCActiveDPP, int MetaChunkSize, double ReturnBW, bool GPUVMEnable, bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], int dpte_group_bytes[], double HostVMInefficiencyFactor, double HostVMMinPageSize, int HostVMMaxNonCachedPageTableLevels) { double ExtraLatencyBytes; double ExtraLatency; ExtraLatencyBytes = CalculateExtraLatencyBytes( ReorderingBytes, TotalNumberOfActiveDPP, PixelChunkSizeInKByte, TotalNumberOfDCCActiveDPP, MetaChunkSize, GPUVMEnable, HostVMEnable, NumberOfActivePlanes, NumberOfDPP, dpte_group_bytes, HostVMInefficiencyFactor, HostVMMinPageSize, HostVMMaxNonCachedPageTableLevels); ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); #endif return ExtraLatency; } static double CalculateExtraLatencyBytes( int ReorderingBytes, int TotalNumberOfActiveDPP, int PixelChunkSizeInKByte, int TotalNumberOfDCCActiveDPP, int MetaChunkSize, bool GPUVMEnable, bool HostVMEnable, int NumberOfActivePlanes, int NumberOfDPP[], int dpte_group_bytes[], double HostVMInefficiencyFactor, double HostVMMinPageSize, int HostVMMaxNonCachedPageTableLevels) { double ret; int HostVMDynamicLevels = 0, k; if (GPUVMEnable == true && HostVMEnable == true) { if (HostVMMinPageSize < 2048) { HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); } else { HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); } } else { HostVMDynamicLevels = 0; } ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; if (GPUVMEnable == true) { for (k = 0; k < NumberOfActivePlanes; ++k) { ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; } } return ret; } static double CalculateUrgentLatency( double UrgentLatencyPixelDataOnly, double UrgentLatencyPixelMixedWithVMData, double UrgentLatencyVMDataOnly, bool DoUrgentLatencyAdjustment, double UrgentLatencyAdjustmentFabricClockComponent, double UrgentLatencyAdjustmentFabricClockReference, double FabricClock) { double ret; ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); if (DoUrgentLatencyAdjustment == true) { ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); } return ret; } static noinline_for_stack void UseMinimumDCFCLK( struct display_mode_lib *mode_lib, int MaxPrefetchMode, int ReorderingBytes) { struct vba_vars_st *v = &mode_lib->vba; int dummy1, i, j, k; double NormalEfficiency, dummy2, dummy3; double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; for (i = 0; i < v->soc.num_states; ++i) { for (j = 0; j <= 1; ++j) { double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; double MinimumTWait; double NonDPTEBandwidth; double DPTEBandwidth; double DCFCLKRequiredForAverageBandwidth; double ExtraLatencyBytes; double ExtraLatencyCycles; double DCFCLKRequiredForPeakBandwidth; int NoOfDPPState[DC__NUM_DPP__MAX]; double MinimumTvmPlus2Tr0; TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; for (k = 0; k < v->NumberOfActivePlanes; ++k) { TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); } for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { NoOfDPPState[k] = v->NoOfDPP[i][j][k]; } MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; DCFCLKRequiredForAverageBandwidth = dml_max3( v->ProjectedDCFCLKDeepSleep[i][j], (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); ExtraLatencyBytes = CalculateExtraLatencyBytes( ReorderingBytes, v->TotalNumberOfActiveDPP[i][j], v->PixelChunkSizeInKByte, v->TotalNumberOfDCCActiveDPP[i][j], v->MetaChunkSize, v->GPUVMEnable, v->HostVMEnable, v->NumberOfActivePlanes, NoOfDPPState, v->dpte_group_bytes, 1, v->HostVMMinPageSize, v->HostVMMaxNonCachedPageTableLevels); ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; for (k = 0; k < v->NumberOfActivePlanes; ++k) { double DCFCLKCyclesRequiredInPrefetch; double ExpectedPrefetchBWAcceleration; double PrefetchTime; PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); DynamicMetadataVMExtraLatency[k] = (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - v->UrgLatency[i] * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) - DynamicMetadataVMExtraLatency[k]; if (PrefetchTime > 0) { double ExpectedVRatioPrefetch; ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; } } else { DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; } if (v->DynamicMetadataEnable[k] == true) { double TSetupPipe; double TdmbfPipe; double TdmsksPipe; double TdmecPipe; double AllowedTimeForUrgentExtraLatency; CalculateVupdateAndDynamicMetadataParameters( v->MaxInterDCNTileRepeaters, v->RequiredDPPCLK[i][j][k], v->RequiredDISPCLK[i][j], v->ProjectedDCFCLKDeepSleep[i][j], v->PixelClock[k], v->HTotal[k], v->VTotal[k] - v->VActive[k], v->DynamicMetadataTransmittedBytes[k], v->DynamicMetadataLinesBeforeActiveRequired[k], v->Interlace[k], v->ProgressiveToInterlaceUnitInOPP, &TSetupPipe, &TdmbfPipe, &TdmecPipe, &TdmsksPipe, &dummy1, &dummy2, &dummy3); AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; if (AllowedTimeForUrgentExtraLatency > 0) { DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( DCFCLKRequiredForPeakBandwidthPerPlane[k], ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); } else { DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; } } } DCFCLKRequiredForPeakBandwidth = 0; for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; } MinimumTvmPlus2Tr0 = v->UrgLatency[i] * (v->GPUVMEnable == true ? (v->HostVMEnable == true ? (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 0); for (k = 0; k < v->NumberOfActivePlanes; ++k) { double MaximumTvmPlus2Tr0PlusTsw; MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; } else { DCFCLKRequiredForPeakBandwidth = dml_max3( DCFCLKRequiredForPeakBandwidth, 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); } } v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); } } } static void CalculateUnboundedRequestAndCompressedBufferSize( unsigned int DETBufferSizeInKByte, int ConfigReturnBufferSizeInKByte, enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalActiveDPP, bool NoChromaPlanes, int MaxNumDPP, int CompressedBufferSegmentSizeInkByteFinal, enum output_encoder_class *Output, bool *UnboundedRequestEnabled, int *CompressedBufferSizeInkByte) { double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); *CompressedBufferSizeInkByte = ( *UnboundedRequestEnabled == true ? ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; #ifdef __DML_VBA_DEBUG__ dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); #endif } static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) { bool ret_val = false; ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) { ret_val = false; } return (ret_val); }