1 // SPDX-License-Identifier: MIT
2 //
3 // Copyright 2024 Advanced Micro Devices, Inc.
4
5
6 #include "dml2_internal_shared_types.h"
7 #include "dml2_core_dcn4_calcs.h"
8 #include "dml2_debug.h"
9 #include "lib_float_math.h"
10 #include "dml_top_types.h"
11
12 #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
13 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4
14
dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)15 const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type)
16 {
17 switch (bw_type) {
18 case (dml2_core_internal_bw_sdp):
19 return("dml2_core_internal_bw_sdp");
20 case (dml2_core_internal_bw_dram):
21 return("dml2_core_internal_bw_dram");
22 case (dml2_core_internal_bw_max):
23 return("dml2_core_internal_bw_max");
24 default:
25 return("dml2_core_internal_bw_unknown");
26 }
27 }
28
dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)29 const char *dml2_core_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type)
30 {
31 switch (dml2_core_internal_soc_state_type) {
32 case (dml2_core_internal_soc_state_sys_idle):
33 return("dml2_core_internal_soc_state_sys_idle");
34 case (dml2_core_internal_soc_state_sys_active):
35 return("dml2_core_internal_soc_state_sys_active");
36 case (dml2_core_internal_soc_state_svp_prefetch):
37 return("dml2_core_internal_soc_state_svp_prefetch");
38 case dml2_core_internal_soc_state_max:
39 default:
40 return("dml2_core_internal_soc_state_unknown");
41 }
42 }
43
dml2_core_div_rem(double dividend,unsigned int divisor,unsigned int * remainder)44 static double dml2_core_div_rem(double dividend, unsigned int divisor, unsigned int *remainder)
45 {
46 *remainder = ((dividend / divisor) - (int)(dividend / divisor) > 0);
47 return dividend / divisor;
48 }
49
dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info * support,bool fail_only)50 static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only)
51 {
52 dml2_printf("DML: ===================================== \n");
53 dml2_printf("DML: DML_MODE_SUPPORT_INFO_ST\n");
54 if (!fail_only || support->ScaleRatioAndTapsSupport == 0)
55 dml2_printf("DML: support: ScaleRatioAndTapsSupport = %d\n", support->ScaleRatioAndTapsSupport);
56 if (!fail_only || support->SourceFormatPixelAndScanSupport == 0)
57 dml2_printf("DML: support: SourceFormatPixelAndScanSupport = %d\n", support->SourceFormatPixelAndScanSupport);
58 if (!fail_only || support->ViewportSizeSupport == 0)
59 dml2_printf("DML: support: ViewportSizeSupport = %d\n", support->ViewportSizeSupport);
60 if (!fail_only || support->LinkRateDoesNotMatchDPVersion == 1)
61 dml2_printf("DML: support: LinkRateDoesNotMatchDPVersion = %d\n", support->LinkRateDoesNotMatchDPVersion);
62 if (!fail_only || support->LinkRateForMultistreamNotIndicated == 1)
63 dml2_printf("DML: support: LinkRateForMultistreamNotIndicated = %d\n", support->LinkRateForMultistreamNotIndicated);
64 if (!fail_only || support->BPPForMultistreamNotIndicated == 1)
65 dml2_printf("DML: support: BPPForMultistreamNotIndicated = %d\n", support->BPPForMultistreamNotIndicated);
66 if (!fail_only || support->MultistreamWithHDMIOreDP == 1)
67 dml2_printf("DML: support: MultistreamWithHDMIOreDP = %d\n", support->MultistreamWithHDMIOreDP);
68 if (!fail_only || support->ExceededMultistreamSlots == 1)
69 dml2_printf("DML: support: ExceededMultistreamSlots = %d\n", support->ExceededMultistreamSlots);
70 if (!fail_only || support->MSOOrODMSplitWithNonDPLink == 1)
71 dml2_printf("DML: support: MSOOrODMSplitWithNonDPLink = %d\n", support->MSOOrODMSplitWithNonDPLink);
72 if (!fail_only || support->NotEnoughLanesForMSO == 1)
73 dml2_printf("DML: support: NotEnoughLanesForMSO = %d\n", support->NotEnoughLanesForMSO);
74 if (!fail_only || support->P2IWith420 == 1)
75 dml2_printf("DML: support: P2IWith420 = %d\n", support->P2IWith420);
76 if (!fail_only || support->DSC422NativeNotSupported == 1)
77 dml2_printf("DML: support: DSC422NativeNotSupported = %d\n", support->DSC422NativeNotSupported);
78 if (!fail_only || support->DSCSlicesODMModeSupported == 0)
79 dml2_printf("DML: support: DSCSlicesODMModeSupported = %d\n", support->DSCSlicesODMModeSupported);
80 if (!fail_only || support->NotEnoughDSCUnits == 1)
81 dml2_printf("DML: support: NotEnoughDSCUnits = %d\n", support->NotEnoughDSCUnits);
82 if (!fail_only || support->NotEnoughDSCSlices == 1)
83 dml2_printf("DML: support: NotEnoughDSCSlices = %d\n", support->NotEnoughDSCSlices);
84 if (!fail_only || support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe == 1)
85 dml2_printf("DML: support: ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = %d\n", support->ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe);
86 if (!fail_only || support->InvalidCombinationOfMALLUseForPStateAndStaticScreen == 1)
87 dml2_printf("DML: support: InvalidCombinationOfMALLUseForPStateAndStaticScreen = %d\n", support->InvalidCombinationOfMALLUseForPStateAndStaticScreen);
88 if (!fail_only || support->DSCCLKRequiredMoreThanSupported == 1)
89 dml2_printf("DML: support: DSCCLKRequiredMoreThanSupported = %d\n", support->DSCCLKRequiredMoreThanSupported);
90 if (!fail_only || support->PixelsPerLinePerDSCUnitSupport == 0)
91 dml2_printf("DML: support: PixelsPerLinePerDSCUnitSupport = %d\n", support->PixelsPerLinePerDSCUnitSupport);
92 if (!fail_only || support->DTBCLKRequiredMoreThanSupported == 1)
93 dml2_printf("DML: support: DTBCLKRequiredMoreThanSupported = %d\n", support->DTBCLKRequiredMoreThanSupported);
94 if (!fail_only || support->InvalidCombinationOfMALLUseForPState == 1)
95 dml2_printf("DML: support: InvalidCombinationOfMALLUseForPState = %d\n", support->InvalidCombinationOfMALLUseForPState);
96 if (!fail_only || support->ROBSupport == 0)
97 dml2_printf("DML: support: ROBSupport = %d\n", support->ROBSupport);
98 if (!fail_only || support->OutstandingRequestsSupport == 0)
99 dml2_printf("DML: support: OutstandingRequestsSupport = %d\n", support->OutstandingRequestsSupport);
100 if (!fail_only || support->OutstandingRequestsUrgencyAvoidance == 0)
101 dml2_printf("DML: support: OutstandingRequestsUrgencyAvoidance = %d\n", support->OutstandingRequestsUrgencyAvoidance);
102 if (!fail_only || support->DISPCLK_DPPCLK_Support == 0)
103 dml2_printf("DML: support: DISPCLK_DPPCLK_Support = %d\n", support->DISPCLK_DPPCLK_Support);
104 if (!fail_only || support->TotalAvailablePipesSupport == 0)
105 dml2_printf("DML: support: TotalAvailablePipesSupport = %d\n", support->TotalAvailablePipesSupport);
106 if (!fail_only || support->NumberOfOTGSupport == 0)
107 dml2_printf("DML: support: NumberOfOTGSupport = %d\n", support->NumberOfOTGSupport);
108 if (!fail_only || support->NumberOfHDMIFRLSupport == 0)
109 dml2_printf("DML: support: NumberOfHDMIFRLSupport = %d\n", support->NumberOfHDMIFRLSupport);
110 if (!fail_only || support->NumberOfDP2p0Support == 0)
111 dml2_printf("DML: support: NumberOfDP2p0Support = %d\n", support->NumberOfDP2p0Support);
112 if (!fail_only || support->EnoughWritebackUnits == 0)
113 dml2_printf("DML: support: EnoughWritebackUnits = %d\n", support->EnoughWritebackUnits);
114 if (!fail_only || support->WritebackScaleRatioAndTapsSupport == 0)
115 dml2_printf("DML: support: WritebackScaleRatioAndTapsSupport = %d\n", support->WritebackScaleRatioAndTapsSupport);
116 if (!fail_only || support->WritebackLatencySupport == 0)
117 dml2_printf("DML: support: WritebackLatencySupport = %d\n", support->WritebackLatencySupport);
118 if (!fail_only || support->CursorSupport == 0)
119 dml2_printf("DML: support: CursorSupport = %d\n", support->CursorSupport);
120 if (!fail_only || support->PitchSupport == 0)
121 dml2_printf("DML: support: PitchSupport = %d\n", support->PitchSupport);
122 if (!fail_only || support->ViewportExceedsSurface == 1)
123 dml2_printf("DML: support: ViewportExceedsSurface = %d\n", support->ViewportExceedsSurface);
124 if (!fail_only || support->PrefetchSupported == 0)
125 dml2_printf("DML: support: PrefetchSupported = %d\n", support->PrefetchSupported);
126 if (!fail_only || support->EnoughUrgentLatencyHidingSupport == 0)
127 dml2_printf("DML: support: EnoughUrgentLatencyHidingSupport = %d\n", support->EnoughUrgentLatencyHidingSupport);
128 if (!fail_only || support->AvgBandwidthSupport == 0)
129 dml2_printf("DML: support: AvgBandwidthSupport = %d\n", support->AvgBandwidthSupport);
130 if (!fail_only || support->DynamicMetadataSupported == 0)
131 dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported);
132 if (!fail_only || support->VRatioInPrefetchSupported == 0)
133 dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported);
134 if (!fail_only || support->PTEBufferSizeNotExceeded == 1)
135 dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded);
136 if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1)
137 dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded);
138 if (!fail_only || support->ExceededMALLSize == 1)
139 dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize);
140 if (!fail_only || support->g6_temp_read_support == 0)
141 dml2_printf("DML: support: g6_temp_read_support = %d\n", support->g6_temp_read_support);
142 if (!fail_only || support->ImmediateFlipSupport == 0)
143 dml2_printf("DML: support: ImmediateFlipSupport = %d\n", support->ImmediateFlipSupport);
144 if (!fail_only || support->LinkCapacitySupport == 0)
145 dml2_printf("DML: support: LinkCapacitySupport = %d\n", support->LinkCapacitySupport);
146
147 if (!fail_only || support->ModeSupport == 0)
148 dml2_printf("DML: support: ModeSupport = %d\n", support->ModeSupport);
149 dml2_printf("DML: ===================================== \n");
150 }
151
get_stream_output_bpp(double * out_bpp,const struct dml2_display_cfg * display_cfg)152 static void get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg)
153 {
154 for (unsigned int k = 0; k < display_cfg->num_planes; k++) {
155 double bpc = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.bpc;
156 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_disable) {
157 switch (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format) {
158 case dml2_444:
159 out_bpp[k] = bpc * 3;
160 break;
161 case dml2_s422:
162 out_bpp[k] = bpc * 2;
163 break;
164 case dml2_n422:
165 out_bpp[k] = bpc * 2;
166 break;
167 case dml2_420:
168 default:
169 out_bpp[k] = bpc * 1.5;
170 break;
171 }
172 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable) {
173 out_bpp[k] = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.dsc_compressed_bpp_x16 / 16;
174 } else {
175 out_bpp[k] = 0;
176 }
177 #ifdef __DML_VBA_DEBUG__
178 dml2_printf("DML::%s: k=%d bpc=%f\n", __func__, k, bpc);
179 dml2_printf("DML::%s: k=%d dsc.enable=%d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable);
180 dml2_printf("DML::%s: k=%d out_bpp=%f\n", __func__, k, out_bpp[k]);
181 #endif
182 }
183 }
184
dml_round_to_multiple(unsigned int num,unsigned int multiple,bool up)185 static unsigned int dml_round_to_multiple(unsigned int num, unsigned int multiple, bool up)
186 {
187 unsigned int remainder;
188
189 if (multiple == 0)
190 return num;
191
192 remainder = num % multiple;
193 if (remainder == 0)
194 return num;
195
196 if (up)
197 return (num + multiple - remainder);
198 else
199 return (num - remainder);
200 }
201
dml_get_num_active_pipes(int unsigned num_planes,const struct core_display_cfg_support_info * cfg_support_info)202 static unsigned int dml_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info)
203 {
204 unsigned int num_active_pipes = 0;
205
206 for (unsigned int k = 0; k < num_planes; k++) {
207 num_active_pipes = num_active_pipes + (unsigned int)cfg_support_info->plane_support_info[k].dpps_used;
208 }
209
210 #ifdef __DML_VBA_DEBUG__
211 dml2_printf("DML::%s: num_active_pipes = %d\n", __func__, num_active_pipes);
212 #endif
213 return num_active_pipes;
214 }
215
dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info * cfg_support_info,unsigned int * pipe_plane)216 static void dml_calc_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane)
217 {
218 unsigned int pipe_idx = 0;
219
220 for (unsigned int k = 0; k < DML2_MAX_PLANES; ++k) {
221 pipe_plane[k] = __DML2_CALCS_PIPE_NO_PLANE__;
222 }
223
224 for (unsigned int plane_idx = 0; plane_idx < DML2_MAX_PLANES; plane_idx++) {
225 for (int i = 0; i < cfg_support_info->plane_support_info[plane_idx].dpps_used; i++) {
226 pipe_plane[pipe_idx] = plane_idx;
227 pipe_idx++;
228 }
229 }
230 }
231
dml_is_phantom_pipe(const struct dml2_plane_parameters * plane_cfg)232 static bool dml_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg)
233 {
234 bool is_phantom = false;
235
236 if (plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe ||
237 plane_cfg->overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) {
238 is_phantom = true;
239 }
240
241 return is_phantom;
242 }
243
dml_get_is_phantom_pipe(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)244 static bool dml_get_is_phantom_pipe(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
245 {
246 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
247
248 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_idx]);
249 dml2_printf("DML::%s: pipe_idx=%d legacy_svp_config=%0d is_phantom=%d\n", __func__, pipe_idx, display_cfg->plane_descriptors[plane_idx].overrides.legacy_svp_config, is_phantom);
250 return is_phantom;
251 }
252
253 #define dml_get_per_pipe_var_func(variable, type, interval_var) static type dml_get_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx) \
254 { \
255 unsigned int plane_idx; \
256 plane_idx = mode_lib->mp.pipe_plane[pipe_idx]; \
257 return (type) interval_var[plane_idx]; \
258 }
259
260 dml_get_per_pipe_var_func(dpte_group_size_in_bytes, unsigned int, mode_lib->mp.dpte_group_bytes);
261 dml_get_per_pipe_var_func(vm_group_size_in_bytes, unsigned int, mode_lib->mp.vm_group_bytes);
262 dml_get_per_pipe_var_func(swath_height_l, unsigned int, mode_lib->mp.SwathHeightY);
263 dml_get_per_pipe_var_func(swath_height_c, unsigned int, mode_lib->mp.SwathHeightC);
264 dml_get_per_pipe_var_func(dpte_row_height_linear_l, unsigned int, mode_lib->mp.dpte_row_height_linear);
265 dml_get_per_pipe_var_func(dpte_row_height_linear_c, unsigned int, mode_lib->mp.dpte_row_height_linear_chroma);
266
267 dml_get_per_pipe_var_func(vstartup_calculated, unsigned int, mode_lib->mp.VStartup);
268 dml_get_per_pipe_var_func(vupdate_offset, unsigned int, mode_lib->mp.VUpdateOffsetPix);
269 dml_get_per_pipe_var_func(vupdate_width, unsigned int, mode_lib->mp.VUpdateWidthPix);
270 dml_get_per_pipe_var_func(vready_offset, unsigned int, mode_lib->mp.VReadyOffsetPix);
271 dml_get_per_pipe_var_func(pstate_keepout_dst_lines, unsigned int, mode_lib->mp.pstate_keepout_dst_lines);
272 dml_get_per_pipe_var_func(det_stored_buffer_size_l_bytes, unsigned int, mode_lib->mp.DETBufferSizeY);
273 dml_get_per_pipe_var_func(det_stored_buffer_size_c_bytes, unsigned int, mode_lib->mp.DETBufferSizeC);
274 dml_get_per_pipe_var_func(det_buffer_size_kbytes, unsigned int, mode_lib->mp.DETBufferSizeInKByte);
275 dml_get_per_pipe_var_func(surface_size_in_mall_bytes, unsigned int, mode_lib->mp.SurfaceSizeInTheMALL);
276
277 #define dml_get_per_plane_var_func(variable, type, interval_var) static type dml_get_plane_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx) \
278 { \
279 return (type) interval_var[plane_idx]; \
280 }
281
282 dml_get_per_plane_var_func(num_mcaches_plane0, unsigned int, mode_lib->ms.num_mcaches_l);
283 dml_get_per_plane_var_func(mcache_row_bytes_plane0, unsigned int, mode_lib->ms.mcache_row_bytes_l);
284 dml_get_per_plane_var_func(mcache_shift_granularity_plane0, unsigned int, mode_lib->ms.mcache_shift_granularity_l);
285 dml_get_per_plane_var_func(num_mcaches_plane1, unsigned int, mode_lib->ms.num_mcaches_c);
286 dml_get_per_plane_var_func(mcache_row_bytes_plane1, unsigned int, mode_lib->ms.mcache_row_bytes_c);
287 dml_get_per_plane_var_func(mcache_shift_granularity_plane1, unsigned int, mode_lib->ms.mcache_shift_granularity_c);
288 dml_get_per_plane_var_func(mall_comb_mcache_l, unsigned int, mode_lib->ms.mall_comb_mcache_l);
289 dml_get_per_plane_var_func(mall_comb_mcache_c, unsigned int, mode_lib->ms.mall_comb_mcache_c);
290 dml_get_per_plane_var_func(lc_comb_mcache, unsigned int, mode_lib->ms.lc_comb_mcache);
291 dml_get_per_plane_var_func(subviewport_lines_needed_in_mall, unsigned int, mode_lib->ms.SubViewportLinesNeededInMALL);
292 dml_get_per_plane_var_func(max_vstartup_lines, unsigned int, mode_lib->ms.MaxVStartupLines);
293
294 #define dml_get_per_plane_array_var_func(variable, type, interval_var) static type dml_get_plane_array_##variable(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int plane_idx, unsigned int array_idx) \
295 { \
296 return (type) interval_var[plane_idx][array_idx]; \
297 }
298
299 dml_get_per_plane_array_var_func(mcache_offsets_plane0, unsigned int, mode_lib->ms.mcache_offsets_l);
300 dml_get_per_plane_array_var_func(mcache_offsets_plane1, unsigned int, mode_lib->ms.mcache_offsets_c);
301
302 #define dml_get_var_func(var, type, internal_var) static type dml_get_##var(const struct dml2_core_internal_display_mode_lib *mode_lib) \
303 { \
304 return (type) internal_var; \
305 }
306
307 dml_get_var_func(wm_urgent, double, mode_lib->mp.Watermark.UrgentWatermark);
308 dml_get_var_func(wm_stutter_exit, double, mode_lib->mp.Watermark.StutterExitWatermark);
309 dml_get_var_func(wm_stutter_enter_exit, double, mode_lib->mp.Watermark.StutterEnterPlusExitWatermark);
310 dml_get_var_func(wm_z8_stutter_exit, double, mode_lib->mp.Watermark.Z8StutterExitWatermark);
311 dml_get_var_func(wm_z8_stutter_enter_exit, double, mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark);
312 dml_get_var_func(wm_memory_trip, double, mode_lib->mp.UrgentLatency);
313 dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory);
314
315 dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark);
316 dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark);
317 dml_get_var_func(wm_g6_temp_read, double, mode_lib->mp.Watermark.g6_temp_read_watermark_us);
318 dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark);
319 dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth);
320 dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip);
321 dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL);
322 dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency);
323 dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
324 dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
325 dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency);
326 dml_get_var_func(stutter_efficiency_no_vblank, double, mode_lib->mp.StutterEfficiencyNotIncludingVBlank);
327 dml_get_var_func(stutter_num_bursts, double, mode_lib->mp.NumberOfStutterBurstsPerFrame);
328 dml_get_var_func(stutter_efficiency_z8, double, mode_lib->mp.Z8StutterEfficiency);
329 dml_get_var_func(stutter_num_bursts_z8, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrame);
330 dml_get_var_func(stutter_period, double, mode_lib->mp.StutterPeriod);
331 dml_get_var_func(stutter_efficiency_z8_bestcase, double, mode_lib->mp.Z8StutterEfficiencyBestCase);
332 dml_get_var_func(stutter_num_bursts_z8_bestcase, double, mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase);
333 dml_get_var_func(stutter_period_bestcase, double, mode_lib->mp.StutterPeriodBestCase);
334 dml_get_var_func(fclk_change_latency, double, mode_lib->mp.MaxActiveFCLKChangeLatencySupported);
335 dml_get_var_func(global_dppclk_khz, double, mode_lib->mp.GlobalDPPCLK * 1000.0);
336
337 dml_get_var_func(sys_active_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
338 dml_get_var_func(sys_active_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
339
340 dml_get_var_func(svp_prefetch_avg_bw_required_sdp, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
341 dml_get_var_func(svp_prefetch_avg_bw_required_dram, double, mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
342
343 dml_get_var_func(sys_active_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
344 dml_get_var_func(sys_active_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
345
346 dml_get_var_func(svp_prefetch_avg_bw_available_sdp, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
347 dml_get_var_func(svp_prefetch_avg_bw_available_dram, double, mode_lib->mp.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
348
349 dml_get_var_func(sys_active_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
350 dml_get_var_func(sys_active_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
351 dml_get_var_func(sys_active_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
352
353 dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
354 dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
355 dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]);
356
357 dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us);
358 dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us);
359 dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us);
360
361 dml_get_var_func(sys_active_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
362 dml_get_var_func(sys_active_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
363 dml_get_var_func(svp_prefetch_urg_bw_required_sdp, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
364 dml_get_var_func(svp_prefetch_urg_bw_required_dram, double, mode_lib->mp.urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
365
366 dml_get_var_func(sys_active_non_urg_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
367 dml_get_var_func(sys_active_non_urg_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
368 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
369 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram, double, mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
370
371 dml_get_var_func(sys_active_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
372 dml_get_var_func(sys_active_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
373 dml_get_var_func(svp_prefetch_urg_bw_required_sdp_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
374 dml_get_var_func(svp_prefetch_urg_bw_required_dram_flip, double, mode_lib->mp.urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
375
376 dml_get_var_func(sys_active_non_urg_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
377 dml_get_var_func(sys_active_non_urg_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
378 dml_get_var_func(svp_prefetch_non_urg_bw_required_sdp_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
379 dml_get_var_func(svp_prefetch_non_urg_bw_required_dram_flip, double, mode_lib->mp.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
380
381 dml_get_var_func(comp_buffer_size_kbytes, unsigned int, mode_lib->mp.CompressedBufferSizeInkByte);
382
383 dml_get_var_func(unbounded_request_enabled, bool, mode_lib->mp.UnboundedRequestEnabled);
384 dml_get_var_func(wm_writeback_urgent, double, mode_lib->mp.Watermark.WritebackUrgentWatermark);
385 dml_get_var_func(cstate_max_cap_mode, bool, mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
386 dml_get_var_func(compbuf_reserved_space_64b, unsigned int, mode_lib->mp.compbuf_reserved_space_64b);
387 dml_get_var_func(hw_debug5, bool, mode_lib->mp.hw_debug5);
388 dml_get_var_func(dcfclk_deep_sleep_hysteresis, unsigned int, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
389
CalculateMaxDETAndMinCompressedBufferSize(unsigned int ConfigReturnBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInKByte,unsigned int ROBBufferSizeInKByte,unsigned int MaxNumDPP,unsigned int nomDETInKByteOverrideEnable,unsigned int nomDETInKByteOverrideValue,bool is_mrq_present,unsigned int * MaxTotalDETInKByte,unsigned int * nomDETInKByte,unsigned int * MinCompressedBufferSizeInKByte)390 static void CalculateMaxDETAndMinCompressedBufferSize(
391 unsigned int ConfigReturnBufferSizeInKByte,
392 unsigned int ConfigReturnBufferSegmentSizeInKByte,
393 unsigned int ROBBufferSizeInKByte,
394 unsigned int MaxNumDPP,
395 unsigned int nomDETInKByteOverrideEnable, // VBA_DELTA, allow DV to override default DET size
396 unsigned int nomDETInKByteOverrideValue, // VBA_DELTA
397 bool is_mrq_present,
398
399 // Output
400 unsigned int *MaxTotalDETInKByte,
401 unsigned int *nomDETInKByte,
402 unsigned int *MinCompressedBufferSizeInKByte)
403 {
404 if (is_mrq_present)
405 *MaxTotalDETInKByte = (unsigned int) math_ceil2((double)(ConfigReturnBufferSizeInKByte + ROBBufferSizeInKByte)*4/5, 64);
406 else
407 *MaxTotalDETInKByte = ConfigReturnBufferSizeInKByte - ConfigReturnBufferSegmentSizeInKByte;
408
409 *nomDETInKByte = (unsigned int)(math_floor2((double)*MaxTotalDETInKByte / (double)MaxNumDPP, ConfigReturnBufferSegmentSizeInKByte));
410 *MinCompressedBufferSizeInKByte = ConfigReturnBufferSizeInKByte - *MaxTotalDETInKByte;
411
412 #if defined(__DML_VBA_DEBUG__)
413 dml2_printf("DML::%s: is_mrq_present = %u\n", __func__, is_mrq_present);
414 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
415 dml2_printf("DML::%s: ROBBufferSizeInKByte = %u\n", __func__, ROBBufferSizeInKByte);
416 dml2_printf("DML::%s: MaxNumDPP = %u\n", __func__, MaxNumDPP);
417 dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, *MaxTotalDETInKByte);
418 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, *nomDETInKByte);
419 dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, *MinCompressedBufferSizeInKByte);
420 #endif
421
422 if (nomDETInKByteOverrideEnable) {
423 *nomDETInKByte = nomDETInKByteOverrideValue;
424 dml2_printf("DML::%s: nomDETInKByte = %u (overrided)\n", __func__, *nomDETInKByte);
425 }
426 }
427
PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg * display_cfg,bool ptoi_supported,double * PixelClockBackEnd)428 static void PixelClockAdjustmentForProgressiveToInterlaceUnit(const struct dml2_display_cfg *display_cfg, bool ptoi_supported, double *PixelClockBackEnd)
429 {
430 //unsigned int num_active_planes = display_cfg->num_planes;
431
432 //Progressive To Interlace Unit Effect
433 for (unsigned int k = 0; k < display_cfg->num_planes; ++k) {
434 PixelClockBackEnd[k] = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
435 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && ptoi_supported == true) {
436 // FIXME_STAGE2... can sw pass the pixel rate for interlaced directly
437 //display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz = 2 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz;
438 }
439 }
440 }
441
dml_is_420(enum dml2_source_format_class source_format)442 static bool dml_is_420(enum dml2_source_format_class source_format)
443 {
444 bool val = false;
445
446 switch (source_format) {
447 case dml2_444_8:
448 val = 0;
449 break;
450 case dml2_444_16:
451 val = 0;
452 break;
453 case dml2_444_32:
454 val = 0;
455 break;
456 case dml2_444_64:
457 val = 0;
458 break;
459 case dml2_420_8:
460 val = 1;
461 break;
462 case dml2_420_10:
463 val = 1;
464 break;
465 case dml2_420_12:
466 val = 1;
467 break;
468 case dml2_rgbe_alpha:
469 val = 0;
470 break;
471 case dml2_rgbe:
472 val = 0;
473 break;
474 case dml2_mono_8:
475 val = 0;
476 break;
477 case dml2_mono_16:
478 val = 0;
479 break;
480 default:
481 DML2_ASSERT(0);
482 break;
483 }
484 return val;
485 }
486
dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)487 static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode)
488 {
489 switch (sw_mode) {
490 case (dml2_sw_linear):
491 return 256; break;
492 case (dml2_sw_256b_2d):
493 return 256; break;
494 case (dml2_sw_4kb_2d):
495 return 4096; break;
496 case (dml2_sw_64kb_2d):
497 return 65536; break;
498 case (dml2_sw_256kb_2d):
499 return 262144; break;
500 case (dml2_gfx11_sw_linear):
501 return 256; break;
502 case (dml2_gfx11_sw_64kb_d):
503 return 65536; break;
504 case (dml2_gfx11_sw_64kb_d_t):
505 return 65536; break;
506 case (dml2_gfx11_sw_64kb_d_x):
507 return 65536; break;
508 case (dml2_gfx11_sw_64kb_r_x):
509 return 65536; break;
510 case (dml2_gfx11_sw_256kb_d_x):
511 return 262144; break;
512 case (dml2_gfx11_sw_256kb_r_x):
513 return 262144; break;
514 default:
515 DML2_ASSERT(0);
516 return 256;
517 }
518 }
519
dml_is_vertical_rotation(enum dml2_rotation_angle Scan)520 static bool dml_is_vertical_rotation(enum dml2_rotation_angle Scan)
521 {
522 bool is_vert = false;
523 if (Scan == dml2_rotation_90 || Scan == dml2_rotation_270) {
524 is_vert = true;
525 } else {
526 is_vert = false;
527 }
528 return is_vert;
529 }
530
dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)531 static int unsigned dml_get_gfx_version(enum dml2_swizzle_mode sw_mode)
532 {
533 int unsigned version = 0;
534
535 if (sw_mode == dml2_sw_linear ||
536 sw_mode == dml2_sw_256b_2d ||
537 sw_mode == dml2_sw_4kb_2d ||
538 sw_mode == dml2_sw_64kb_2d ||
539 sw_mode == dml2_sw_256kb_2d) {
540 version = 12;
541 } else if (sw_mode == dml2_gfx11_sw_linear ||
542 sw_mode == dml2_gfx11_sw_64kb_d ||
543 sw_mode == dml2_gfx11_sw_64kb_d_t ||
544 sw_mode == dml2_gfx11_sw_64kb_d_x ||
545 sw_mode == dml2_gfx11_sw_64kb_r_x ||
546 sw_mode == dml2_gfx11_sw_256kb_d_x ||
547 sw_mode == dml2_gfx11_sw_256kb_r_x) {
548 version = 11;
549 } else {
550 dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode);
551 DML2_ASSERT(0);
552 }
553
554 return version;
555 }
556
CalculateBytePerPixelAndBlockSizes(enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,unsigned int pitch_y,unsigned int pitch_c,unsigned int * BytePerPixelY,unsigned int * BytePerPixelC,double * BytePerPixelDETY,double * BytePerPixelDETC,unsigned int * BlockHeight256BytesY,unsigned int * BlockHeight256BytesC,unsigned int * BlockWidth256BytesY,unsigned int * BlockWidth256BytesC,unsigned int * MacroTileHeightY,unsigned int * MacroTileHeightC,unsigned int * MacroTileWidthY,unsigned int * MacroTileWidthC,bool * surf_linear128_l,bool * surf_linear128_c)557 static void CalculateBytePerPixelAndBlockSizes(
558 enum dml2_source_format_class SourcePixelFormat,
559 enum dml2_swizzle_mode SurfaceTiling,
560 unsigned int pitch_y,
561 unsigned int pitch_c,
562
563 // Output
564 unsigned int *BytePerPixelY,
565 unsigned int *BytePerPixelC,
566 double *BytePerPixelDETY,
567 double *BytePerPixelDETC,
568 unsigned int *BlockHeight256BytesY,
569 unsigned int *BlockHeight256BytesC,
570 unsigned int *BlockWidth256BytesY,
571 unsigned int *BlockWidth256BytesC,
572 unsigned int *MacroTileHeightY,
573 unsigned int *MacroTileHeightC,
574 unsigned int *MacroTileWidthY,
575 unsigned int *MacroTileWidthC,
576 bool *surf_linear128_l,
577 bool *surf_linear128_c)
578 {
579 *BytePerPixelDETY = 0;
580 *BytePerPixelDETC = 0;
581 *BytePerPixelY = 0;
582 *BytePerPixelC = 0;
583
584 if (SourcePixelFormat == dml2_444_64) {
585 *BytePerPixelDETY = 8;
586 *BytePerPixelDETC = 0;
587 *BytePerPixelY = 8;
588 *BytePerPixelC = 0;
589 } else if (SourcePixelFormat == dml2_444_32 || SourcePixelFormat == dml2_rgbe) {
590 *BytePerPixelDETY = 4;
591 *BytePerPixelDETC = 0;
592 *BytePerPixelY = 4;
593 *BytePerPixelC = 0;
594 } else if (SourcePixelFormat == dml2_444_16 || SourcePixelFormat == dml2_mono_16) {
595 *BytePerPixelDETY = 2;
596 *BytePerPixelDETC = 0;
597 *BytePerPixelY = 2;
598 *BytePerPixelC = 0;
599 } else if (SourcePixelFormat == dml2_444_8 || SourcePixelFormat == dml2_mono_8) {
600 *BytePerPixelDETY = 1;
601 *BytePerPixelDETC = 0;
602 *BytePerPixelY = 1;
603 *BytePerPixelC = 0;
604 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
605 *BytePerPixelDETY = 4;
606 *BytePerPixelDETC = 1;
607 *BytePerPixelY = 4;
608 *BytePerPixelC = 1;
609 } else if (SourcePixelFormat == dml2_420_8) {
610 *BytePerPixelDETY = 1;
611 *BytePerPixelDETC = 2;
612 *BytePerPixelY = 1;
613 *BytePerPixelC = 2;
614 } else if (SourcePixelFormat == dml2_420_12) {
615 *BytePerPixelDETY = 2;
616 *BytePerPixelDETC = 4;
617 *BytePerPixelY = 2;
618 *BytePerPixelC = 4;
619 } else if (SourcePixelFormat == dml2_420_10) {
620 *BytePerPixelDETY = (double)(4.0 / 3);
621 *BytePerPixelDETC = (double)(8.0 / 3);
622 *BytePerPixelY = 2;
623 *BytePerPixelC = 4;
624 } else {
625 dml2_printf("ERROR: DML::%s: SourcePixelFormat = %u not supported!\n", __func__, SourcePixelFormat);
626 DML2_ASSERT(0);
627 }
628
629 #ifdef __DML_VBA_DEBUG__
630 dml2_printf("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
631 dml2_printf("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
632 dml2_printf("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
633 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
634 dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
635 dml2_printf("DML::%s: pitch_y = %u\n", __func__, pitch_y);
636 dml2_printf("DML::%s: pitch_c = %u\n", __func__, pitch_c);
637 dml2_printf("DML::%s: surf_linear128_l = %u\n", __func__, *surf_linear128_l);
638 dml2_printf("DML::%s: surf_linear128_c = %u\n", __func__, *surf_linear128_c);
639 #endif
640
641 if (dml_get_gfx_version(SurfaceTiling) == 11) {
642 *surf_linear128_l = 0;
643 *surf_linear128_c = 0;
644 } else {
645 if (SurfaceTiling == dml2_sw_linear) {
646 *surf_linear128_l = (((pitch_y * *BytePerPixelY) % 256) != 0);
647
648 if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)
649 *surf_linear128_c = (((pitch_c * *BytePerPixelC) % 256) != 0);
650 }
651 }
652
653 if (!(dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha)) {
654 if (SurfaceTiling == dml2_sw_linear) {
655 *BlockHeight256BytesY = 1;
656 } else if (SourcePixelFormat == dml2_444_64) {
657 *BlockHeight256BytesY = 4;
658 } else if (SourcePixelFormat == dml2_444_8) {
659 *BlockHeight256BytesY = 16;
660 } else {
661 *BlockHeight256BytesY = 8;
662 }
663 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
664 *BlockHeight256BytesC = 0;
665 *BlockWidth256BytesC = 0;
666 } else { // dual plane
667 if (SurfaceTiling == dml2_sw_linear) {
668 *BlockHeight256BytesY = 1;
669 *BlockHeight256BytesC = 1;
670 } else if (SourcePixelFormat == dml2_rgbe_alpha) {
671 *BlockHeight256BytesY = 8;
672 *BlockHeight256BytesC = 16;
673 } else if (SourcePixelFormat == dml2_420_8) {
674 *BlockHeight256BytesY = 16;
675 *BlockHeight256BytesC = 8;
676 } else {
677 *BlockHeight256BytesY = 8;
678 *BlockHeight256BytesC = 8;
679 }
680 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
681 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
682 }
683 #ifdef __DML_VBA_DEBUG__
684 dml2_printf("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
685 dml2_printf("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
686 dml2_printf("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
687 dml2_printf("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
688 #endif
689
690 if (dml_get_gfx_version(SurfaceTiling) == 11) {
691 if (SurfaceTiling == dml2_gfx11_sw_linear) {
692 *MacroTileHeightY = *BlockHeight256BytesY;
693 *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
694 *MacroTileHeightC = *BlockHeight256BytesC;
695 if (*MacroTileHeightC == 0) {
696 *MacroTileWidthC = 0;
697 } else {
698 *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
699 }
700 } else if (SurfaceTiling == dml2_gfx11_sw_64kb_d || SurfaceTiling == dml2_gfx11_sw_64kb_d_t || SurfaceTiling == dml2_gfx11_sw_64kb_d_x || SurfaceTiling == dml2_gfx11_sw_64kb_r_x) {
701 *MacroTileHeightY = 16 * *BlockHeight256BytesY;
702 *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
703 *MacroTileHeightC = 16 * *BlockHeight256BytesC;
704 if (*MacroTileHeightC == 0) {
705 *MacroTileWidthC = 0;
706 } else {
707 *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
708 }
709 } else {
710 *MacroTileHeightY = 32 * *BlockHeight256BytesY;
711 *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
712 *MacroTileHeightC = 32 * *BlockHeight256BytesC;
713 if (*MacroTileHeightC == 0) {
714 *MacroTileWidthC = 0;
715 } else {
716 *MacroTileWidthC = 65536 * 4 / *BytePerPixelC / *MacroTileHeightC;
717 }
718 }
719 } else {
720 unsigned int macro_tile_size_bytes = dml_get_tile_block_size_bytes(SurfaceTiling);
721 unsigned int macro_tile_scale = 1; // macro tile to 256B req scaling
722
723 if (SurfaceTiling == dml2_sw_linear) {
724 macro_tile_scale = 1;
725 } else if (SurfaceTiling == dml2_sw_4kb_2d) {
726 macro_tile_scale = 4;
727 } else if (SurfaceTiling == dml2_sw_64kb_2d) {
728 macro_tile_scale = 16;
729 } else if (SurfaceTiling == dml2_sw_256kb_2d) {
730 macro_tile_scale = 32;
731 } else {
732 dml2_printf("ERROR: Invalid SurfaceTiling setting! val=%u\n", SurfaceTiling);
733 DML2_ASSERT(0);
734 }
735
736 *MacroTileHeightY = macro_tile_scale * *BlockHeight256BytesY;
737 *MacroTileWidthY = macro_tile_size_bytes / *BytePerPixelY / *MacroTileHeightY;
738 *MacroTileHeightC = macro_tile_scale * *BlockHeight256BytesC;
739 if (*MacroTileHeightC == 0) {
740 *MacroTileWidthC = 0;
741 } else {
742 *MacroTileWidthC = macro_tile_size_bytes / *BytePerPixelC / *MacroTileHeightC;
743 }
744 }
745
746 #ifdef __DML_VBA_DEBUG__
747 dml2_printf("DML::%s: MacroTileWidthY = %u\n", __func__, *MacroTileWidthY);
748 dml2_printf("DML::%s: MacroTileHeightY = %u\n", __func__, *MacroTileHeightY);
749 dml2_printf("DML::%s: MacroTileWidthC = %u\n", __func__, *MacroTileWidthC);
750 dml2_printf("DML::%s: MacroTileHeightC = %u\n", __func__, *MacroTileHeightC);
751 #endif
752 }
753
CalculateSinglePipeDPPCLKAndSCLThroughput(double HRatio,double HRatioChroma,double VRatio,double VRatioChroma,double MaxDCHUBToPSCLThroughput,double MaxPSCLToLBThroughput,double PixelClock,enum dml2_source_format_class SourcePixelFormat,unsigned int HTaps,unsigned int HTapsChroma,unsigned int VTaps,unsigned int VTapsChroma,double * PSCL_THROUGHPUT,double * PSCL_THROUGHPUT_CHROMA,double * DPPCLKUsingSingleDPP)754 static void CalculateSinglePipeDPPCLKAndSCLThroughput(
755 double HRatio,
756 double HRatioChroma,
757 double VRatio,
758 double VRatioChroma,
759 double MaxDCHUBToPSCLThroughput,
760 double MaxPSCLToLBThroughput,
761 double PixelClock,
762 enum dml2_source_format_class SourcePixelFormat,
763 unsigned int HTaps,
764 unsigned int HTapsChroma,
765 unsigned int VTaps,
766 unsigned int VTapsChroma,
767
768 // Output
769 double *PSCL_THROUGHPUT,
770 double *PSCL_THROUGHPUT_CHROMA,
771 double *DPPCLKUsingSingleDPP)
772 {
773 double DPPCLKUsingSingleDPPLuma;
774 double DPPCLKUsingSingleDPPChroma;
775
776 if (HRatio > 1) {
777 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatio / math_ceil2((double)HTaps / 6.0, 1.0));
778 } else {
779 *PSCL_THROUGHPUT = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
780 }
781
782 DPPCLKUsingSingleDPPLuma = PixelClock * math_max3(VTaps / 6 * math_min2(1, HRatio), HRatio * VRatio / *PSCL_THROUGHPUT, 1);
783
784 if ((HTaps > 6 || VTaps > 6) && DPPCLKUsingSingleDPPLuma < 2 * PixelClock)
785 DPPCLKUsingSingleDPPLuma = 2 * PixelClock;
786
787 if (!dml_is_420(SourcePixelFormat) && SourcePixelFormat != dml2_rgbe_alpha) {
788 *PSCL_THROUGHPUT_CHROMA = 0;
789 *DPPCLKUsingSingleDPP = DPPCLKUsingSingleDPPLuma;
790 } else {
791 if (HRatioChroma > 1) {
792 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput * HRatioChroma / math_ceil2((double)HTapsChroma / 6.0, 1.0));
793 } else {
794 *PSCL_THROUGHPUT_CHROMA = math_min2(MaxDCHUBToPSCLThroughput, MaxPSCLToLBThroughput);
795 }
796 DPPCLKUsingSingleDPPChroma = PixelClock * math_max3(VTapsChroma / 6 * math_min2(1, HRatioChroma),
797 HRatioChroma * VRatioChroma / *PSCL_THROUGHPUT_CHROMA, 1);
798 if ((HTapsChroma > 6 || VTapsChroma > 6) && DPPCLKUsingSingleDPPChroma < 2 * PixelClock)
799 DPPCLKUsingSingleDPPChroma = 2 * PixelClock;
800 *DPPCLKUsingSingleDPP = math_max2(DPPCLKUsingSingleDPPLuma, DPPCLKUsingSingleDPPChroma);
801 }
802 }
803
CalculateSwathWidth(const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,enum dml2_odm_mode ODMMode[],unsigned int BytePerPixY[],unsigned int BytePerPixC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],bool surf_linear128_l[],bool surf_linear128_c[],unsigned int DPPPerSurface[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],unsigned int SwathWidthSingleDPPY[],unsigned int SwathWidthSingleDPPC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int MaximumSwathHeightY[],unsigned int MaximumSwathHeightC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[])804 static void CalculateSwathWidth(
805 const struct dml2_display_cfg *display_cfg,
806 bool ForceSingleDPP,
807 unsigned int NumberOfActiveSurfaces,
808 enum dml2_odm_mode ODMMode[],
809 unsigned int BytePerPixY[],
810 unsigned int BytePerPixC[],
811 unsigned int Read256BytesBlockHeightY[],
812 unsigned int Read256BytesBlockHeightC[],
813 unsigned int Read256BytesBlockWidthY[],
814 unsigned int Read256BytesBlockWidthC[],
815 bool surf_linear128_l[],
816 bool surf_linear128_c[],
817 unsigned int DPPPerSurface[],
818
819 // Output
820 unsigned int req_per_swath_ub_l[],
821 unsigned int req_per_swath_ub_c[],
822 unsigned int SwathWidthSingleDPPY[],
823 unsigned int SwathWidthSingleDPPC[],
824 unsigned int SwathWidthY[], // per-pipe
825 unsigned int SwathWidthC[], // per-pipe
826 unsigned int MaximumSwathHeightY[],
827 unsigned int MaximumSwathHeightC[],
828 unsigned int swath_width_luma_ub[], // per-pipe
829 unsigned int swath_width_chroma_ub[]) // per-pipe
830 {
831 enum dml2_odm_mode MainSurfaceODMMode;
832 double odm_hactive_factor = 1.0;
833 unsigned int req_width_horz_y;
834 unsigned int req_width_horz_c;
835 unsigned int surface_width_ub_l;
836 unsigned int surface_height_ub_l;
837 unsigned int surface_width_ub_c;
838 unsigned int surface_height_ub_c;
839
840 #ifdef __DML_VBA_DEBUG__
841 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
842 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
843 #endif
844
845 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
846 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
847 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
848 } else {
849 SwathWidthSingleDPPY[k] = (unsigned int)display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
850 }
851
852 #ifdef __DML_VBA_DEBUG__
853 dml2_printf("DML::%s: k=%u ViewportWidth=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
854 dml2_printf("DML::%s: k=%u ViewportHeight=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
855 dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
856 #endif
857
858 MainSurfaceODMMode = ODMMode[k];
859
860 if (ForceSingleDPP) {
861 SwathWidthY[k] = SwathWidthSingleDPPY[k];
862 } else {
863 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1)
864 odm_hactive_factor = 4.0;
865 else if (MainSurfaceODMMode == dml2_odm_mode_combine_3to1)
866 odm_hactive_factor = 3.0;
867 else if (MainSurfaceODMMode == dml2_odm_mode_combine_2to1)
868 odm_hactive_factor = 2.0;
869
870 if (MainSurfaceODMMode == dml2_odm_mode_combine_4to1 || MainSurfaceODMMode == dml2_odm_mode_combine_3to1 || MainSurfaceODMMode == dml2_odm_mode_combine_2to1) {
871 SwathWidthY[k] = (unsigned int)(math_min2((double)SwathWidthSingleDPPY[k], math_round((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active / odm_hactive_factor * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio)));
872 } else if (DPPPerSurface[k] == 2) {
873 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2;
874 } else {
875 SwathWidthY[k] = SwathWidthSingleDPPY[k];
876 }
877 }
878
879 #ifdef __DML_VBA_DEBUG__
880 dml2_printf("DML::%s: k=%u HActive=%u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active);
881 dml2_printf("DML::%s: k=%u HRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
882 dml2_printf("DML::%s: k=%u MainSurfaceODMMode=%u\n", __func__, k, MainSurfaceODMMode);
883 dml2_printf("DML::%s: k=%u SwathWidthSingleDPPY=%u\n", __func__, k, SwathWidthSingleDPPY[k]);
884 dml2_printf("DML::%s: k=%u SwathWidthY=%u\n", __func__, k, SwathWidthY[k]);
885 #endif
886
887 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
888 SwathWidthC[k] = SwathWidthY[k] / 2;
889 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2;
890 } else {
891 SwathWidthC[k] = SwathWidthY[k];
892 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k];
893 }
894
895 if (ForceSingleDPP == true) {
896 SwathWidthY[k] = SwathWidthSingleDPPY[k];
897 SwathWidthC[k] = SwathWidthSingleDPPC[k];
898 }
899
900 req_width_horz_y = Read256BytesBlockWidthY[k];
901 req_width_horz_c = Read256BytesBlockWidthC[k];
902
903 if (surf_linear128_l[k])
904 req_width_horz_y = req_width_horz_y / 2;
905
906 if (surf_linear128_c[k])
907 req_width_horz_c = req_width_horz_c / 2;
908
909 surface_width_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.width, req_width_horz_y);
910 surface_height_ub_l = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane0.height, Read256BytesBlockHeightY[k]);
911 surface_width_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.width, req_width_horz_c);
912 surface_height_ub_c = (unsigned int)math_ceil2((double)display_cfg->plane_descriptors[k].surface.plane1.height, Read256BytesBlockHeightC[k]);
913
914 #ifdef __DML_VBA_DEBUG__
915 dml2_printf("DML::%s: k=%u surface_width_ub_l=%u\n", __func__, k, surface_width_ub_l);
916 dml2_printf("DML::%s: k=%u surface_height_ub_l=%u\n", __func__, k, surface_height_ub_l);
917 dml2_printf("DML::%s: k=%u surface_width_ub_c=%u\n", __func__, k, surface_width_ub_c);
918 dml2_printf("DML::%s: k=%u surface_height_ub_c=%u\n", __func__, k, surface_height_ub_c);
919 dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
920 dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
921 dml2_printf("DML::%s: k=%u Read256BytesBlockWidthY=%u\n", __func__, k, Read256BytesBlockWidthY[k]);
922 dml2_printf("DML::%s: k=%u Read256BytesBlockHeightY=%u\n", __func__, k, Read256BytesBlockHeightY[k]);
923 dml2_printf("DML::%s: k=%u Read256BytesBlockWidthC=%u\n", __func__, k, Read256BytesBlockWidthC[k]);
924 dml2_printf("DML::%s: k=%u Read256BytesBlockHeightC=%u\n", __func__, k, Read256BytesBlockHeightC[k]);
925 dml2_printf("DML::%s: k=%u req_width_horz_y=%u\n", __func__, k, req_width_horz_y);
926 dml2_printf("DML::%s: k=%u req_width_horz_c=%u\n", __func__, k, req_width_horz_c);
927 dml2_printf("DML::%s: k=%u ViewportStationary=%u\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.stationary);
928 dml2_printf("DML::%s: k=%u DPPPerSurface=%u\n", __func__, k, DPPPerSurface[k]);
929 #endif
930
931 req_per_swath_ub_l[k] = 0;
932 req_per_swath_ub_c[k] = 0;
933 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
934 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k];
935 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k];
936 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
937 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start + SwathWidthY[k] + req_width_horz_y - 1, req_width_horz_y) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start, req_width_horz_y)));
938 } else {
939 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_width_ub_l, math_ceil2((double)SwathWidthY[k] - 1, req_width_horz_y) + req_width_horz_y));
940 }
941 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / req_width_horz_y;
942
943 if (BytePerPixC[k] > 0) {
944 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
945 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + req_width_horz_c - 1, req_width_horz_c) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, req_width_horz_c)));
946 } else {
947 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_width_ub_c, math_ceil2((double)SwathWidthC[k] - 1, req_width_horz_c) + req_width_horz_c));
948 }
949 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / req_width_horz_c;
950 } else {
951 swath_width_chroma_ub[k] = 0;
952 }
953 } else {
954 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k];
955 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k];
956
957 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
958 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start + SwathWidthY[k] + Read256BytesBlockHeightY[k] - 1, Read256BytesBlockHeightY[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start, Read256BytesBlockHeightY[k])));
959 } else {
960 swath_width_luma_ub[k] = (unsigned int)(math_min2(surface_height_ub_l, math_ceil2((double)SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]));
961 }
962 req_per_swath_ub_l[k] = swath_width_luma_ub[k] / Read256BytesBlockHeightY[k];
963 if (BytePerPixC[k] > 0) {
964 if (display_cfg->plane_descriptors[k].composition.viewport.stationary && DPPPerSurface[k] == 1) {
965 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start + SwathWidthC[k] + Read256BytesBlockHeightC[k] - 1, Read256BytesBlockHeightC[k]) - math_floor2(display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start, Read256BytesBlockHeightC[k])));
966 } else {
967 swath_width_chroma_ub[k] = (unsigned int)(math_min2(surface_height_ub_c, math_ceil2((double)SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]));
968 }
969 req_per_swath_ub_c[k] = swath_width_chroma_ub[k] / Read256BytesBlockHeightC[k];
970 } else {
971 swath_width_chroma_ub[k] = 0;
972 }
973 }
974
975 #ifdef __DML_VBA_DEBUG__
976 dml2_printf("DML::%s: k=%u swath_width_luma_ub=%u\n", __func__, k, swath_width_luma_ub[k]);
977 dml2_printf("DML::%s: k=%u swath_width_chroma_ub=%u\n", __func__, k, swath_width_chroma_ub[k]);
978 dml2_printf("DML::%s: k=%u MaximumSwathHeightY=%u\n", __func__, k, MaximumSwathHeightY[k]);
979 dml2_printf("DML::%s: k=%u MaximumSwathHeightC=%u\n", __func__, k, MaximumSwathHeightC[k]);
980 dml2_printf("DML::%s: k=%u req_per_swath_ub_l=%u\n", __func__, k, req_per_swath_ub_l[k]);
981 dml2_printf("DML::%s: k=%u req_per_swath_ub_c=%u\n", __func__, k, req_per_swath_ub_c[k]);
982 #endif
983
984 }
985 }
986
UnboundedRequest(bool unb_req_force_en,bool unb_req_force_val,unsigned int TotalNumberOfActiveDPP,bool NoChromaOrLinear)987 static bool UnboundedRequest(bool unb_req_force_en, bool unb_req_force_val, unsigned int TotalNumberOfActiveDPP, bool NoChromaOrLinear)
988 {
989 bool unb_req_ok = false;
990 bool unb_req_en = false;
991
992 unb_req_ok = (TotalNumberOfActiveDPP == 1 && NoChromaOrLinear);
993 unb_req_en = unb_req_ok;
994
995 if (unb_req_force_en) {
996 unb_req_en = unb_req_force_val && unb_req_ok;
997 }
998 #ifdef __DML_VBA_DEBUG__
999 dml2_printf("DML::%s: unb_req_force_en = %u\n", __func__, unb_req_force_en);
1000 dml2_printf("DML::%s: unb_req_force_val = %u\n", __func__, unb_req_force_val);
1001 dml2_printf("DML::%s: unb_req_ok = %u\n", __func__, unb_req_ok);
1002 dml2_printf("DML::%s: unb_req_en = %u\n", __func__, unb_req_en);
1003 #endif
1004 return (unb_req_en);
1005 }
1006
CalculateDETBufferSize(struct dml2_core_shared_CalculateDETBufferSize_locals * l,const struct dml2_display_cfg * display_cfg,bool ForceSingleDPP,unsigned int NumberOfActiveSurfaces,bool UnboundedRequestEnabled,unsigned int nomDETInKByte,unsigned int MaxTotalDETInKByte,unsigned int ConfigReturnBufferSizeInKByte,unsigned int MinCompressedBufferSizeInKByte,unsigned int ConfigReturnBufferSegmentSizeInkByte,unsigned int CompressedBufferSegmentSizeInkByte,double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int full_swath_bytes_l[],unsigned int full_swath_bytes_c[],unsigned int DPPPerSurface[],unsigned int DETBufferSizeInKByte[],unsigned int * CompressedBufferSizeInkByte)1007 static void CalculateDETBufferSize(
1008 struct dml2_core_shared_CalculateDETBufferSize_locals *l,
1009 const struct dml2_display_cfg *display_cfg,
1010 bool ForceSingleDPP,
1011 unsigned int NumberOfActiveSurfaces,
1012 bool UnboundedRequestEnabled,
1013 unsigned int nomDETInKByte,
1014 unsigned int MaxTotalDETInKByte,
1015 unsigned int ConfigReturnBufferSizeInKByte,
1016 unsigned int MinCompressedBufferSizeInKByte,
1017 unsigned int ConfigReturnBufferSegmentSizeInkByte,
1018 unsigned int CompressedBufferSegmentSizeInkByte,
1019 double ReadBandwidthLuma[],
1020 double ReadBandwidthChroma[],
1021 unsigned int full_swath_bytes_l[],
1022 unsigned int full_swath_bytes_c[],
1023 unsigned int DPPPerSurface[],
1024 // Output
1025 unsigned int DETBufferSizeInKByte[],
1026 unsigned int *CompressedBufferSizeInkByte)
1027 {
1028 memset(l, 0, sizeof(struct dml2_core_shared_CalculateDETBufferSize_locals));
1029
1030 bool DETPieceAssignedToThisSurfaceAlready[DML2_MAX_PLANES];
1031 bool NextPotentialSurfaceToAssignDETPieceFound;
1032 bool MinimizeReallocationSuccess = false;
1033
1034 #ifdef __DML_VBA_DEBUG__
1035 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, ForceSingleDPP);
1036 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
1037 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
1038 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, UnboundedRequestEnabled);
1039 dml2_printf("DML::%s: MaxTotalDETInKByte = %u\n", __func__, MaxTotalDETInKByte);
1040 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, ConfigReturnBufferSizeInKByte);
1041 dml2_printf("DML::%s: MinCompressedBufferSizeInKByte = %u\n", __func__, MinCompressedBufferSizeInKByte);
1042 dml2_printf("DML::%s: CompressedBufferSegmentSizeInkByte = %u\n", __func__, CompressedBufferSegmentSizeInkByte);
1043 #endif
1044
1045 // Note: Will use default det size if that fits 2 swaths
1046 if (UnboundedRequestEnabled) {
1047 if (display_cfg->plane_descriptors[0].overrides.det_size_override_kb > 0) {
1048 DETBufferSizeInKByte[0] = display_cfg->plane_descriptors[0].overrides.det_size_override_kb;
1049 } else {
1050 DETBufferSizeInKByte[0] = (unsigned int)math_max2(128.0, math_ceil2(2.0 * ((double)full_swath_bytes_l[0] + (double)full_swath_bytes_c[0]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte));
1051 }
1052 *CompressedBufferSizeInkByte = ConfigReturnBufferSizeInKByte - DETBufferSizeInKByte[0];
1053 } else {
1054 l->DETBufferSizePoolInKByte = MaxTotalDETInKByte;
1055 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1056 DETBufferSizeInKByte[k] = 0;
1057 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
1058 l->max_minDET = nomDETInKByte - ConfigReturnBufferSegmentSizeInkByte;
1059 } else {
1060 l->max_minDET = nomDETInKByte;
1061 }
1062 l->minDET = 128;
1063 l->minDET_pipe = 0;
1064
1065 // add DET resource until can hold 2 full swaths
1066 while (l->minDET <= l->max_minDET && l->minDET_pipe == 0) {
1067 if (2.0 * ((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0 <= l->minDET)
1068 l->minDET_pipe = l->minDET;
1069 l->minDET = l->minDET + ConfigReturnBufferSegmentSizeInkByte;
1070 }
1071
1072 #ifdef __DML_VBA_DEBUG__
1073 dml2_printf("DML::%s: k=%u minDET = %u\n", __func__, k, l->minDET);
1074 dml2_printf("DML::%s: k=%u max_minDET = %u\n", __func__, k, l->max_minDET);
1075 dml2_printf("DML::%s: k=%u minDET_pipe = %u\n", __func__, k, l->minDET_pipe);
1076 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, full_swath_bytes_l[k]);
1077 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, full_swath_bytes_c[k]);
1078 #endif
1079
1080 if (l->minDET_pipe == 0) {
1081 l->minDET_pipe = (unsigned int)(math_max2(128, math_ceil2(((double)full_swath_bytes_l[k] + (double)full_swath_bytes_c[k]) / 1024.0, ConfigReturnBufferSegmentSizeInkByte)));
1082 #ifdef __DML_VBA_DEBUG__
1083 dml2_printf("DML::%s: k=%u minDET_pipe = %u (assume each plane take half DET)\n", __func__, k, l->minDET_pipe);
1084 #endif
1085 }
1086
1087 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1088 DETBufferSizeInKByte[k] = 0;
1089 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0) {
1090 DETBufferSizeInKByte[k] = display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1091 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * display_cfg->plane_descriptors[k].overrides.det_size_override_kb;
1092 } else if ((ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe <= l->DETBufferSizePoolInKByte) {
1093 DETBufferSizeInKByte[k] = l->minDET_pipe;
1094 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - (ForceSingleDPP ? 1 : DPPPerSurface[k]) * l->minDET_pipe;
1095 }
1096
1097 #ifdef __DML_VBA_DEBUG__
1098 dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, DPPPerSurface[k]);
1099 dml2_printf("DML::%s: k=%u DETSizeOverride = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.det_size_override_kb);
1100 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1101 dml2_printf("DML::%s: DETBufferSizePoolInKByte = %u\n", __func__, l->DETBufferSizePoolInKByte);
1102 #endif
1103 }
1104
1105 if (display_cfg->minimize_det_reallocation) {
1106 MinimizeReallocationSuccess = true;
1107 // To minimize det reallocation, we don't distribute based on each surfaces bandwidth proportional to the global
1108 // but rather distribute DET across streams proportionally based on pixel rate, and only distribute based on
1109 // bandwidth between the planes on the same stream. This ensures that large scale re-distribution only on a
1110 // stream count and/or pixel rate change, which is must less likely then general bandwidth changes per plane.
1111
1112 // Calculate total pixel rate
1113 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1114 l->TotalPixelRate += display_cfg->stream_descriptors[k].timing.pixel_clock_khz;
1115 }
1116
1117 // Calculate per stream DET budget
1118 for (unsigned int k = 0; k < display_cfg->num_streams; ++k) {
1119 l->DETBudgetPerStream[k] = (unsigned int)((double) display_cfg->stream_descriptors[k].timing.pixel_clock_khz * MaxTotalDETInKByte / l->TotalPixelRate);
1120 l->RemainingDETBudgetPerStream[k] = l->DETBudgetPerStream[k];
1121 }
1122
1123 // Calculate the per stream total bandwidth
1124 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1125 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1126 l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index] += (unsigned int)(ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1127
1128 // Check the minimum can be satisfied by budget
1129 if (l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] >= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k])) {
1130 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1131 } else {
1132 MinimizeReallocationSuccess = false;
1133 break;
1134 }
1135 }
1136 }
1137
1138 if (MinimizeReallocationSuccess) {
1139 // Since a fixed budget per stream is sufficient to satisfy the minimums, just re-distribute each streams
1140 // budget proportionally across its planes
1141 l->ResidualDETAfterRounding = MaxTotalDETInKByte;
1142
1143 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1144 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1145 l->IdealDETBudget = (unsigned int)(((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidthPerStream[display_cfg->plane_descriptors[k].stream_index])
1146 * l->DETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index]);
1147
1148 if (l->IdealDETBudget > DETBufferSizeInKByte[k]) {
1149 l->DeltaDETBudget = l->IdealDETBudget - DETBufferSizeInKByte[k];
1150 if (l->DeltaDETBudget > l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index])
1151 l->DeltaDETBudget = l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index];
1152
1153 /* split the additional budgeted DET among the pipes per plane */
1154 DETBufferSizeInKByte[k] += (unsigned int)((double)l->DeltaDETBudget / (ForceSingleDPP ? 1 : DPPPerSurface[k]));
1155 l->RemainingDETBudgetPerStream[display_cfg->plane_descriptors[k].stream_index] -= l->DeltaDETBudget;
1156 }
1157
1158 // Round down to segment size
1159 DETBufferSizeInKByte[k] = (DETBufferSizeInKByte[k] / ConfigReturnBufferSegmentSizeInkByte) * ConfigReturnBufferSegmentSizeInkByte;
1160
1161 l->ResidualDETAfterRounding -= DETBufferSizeInKByte[k] * (ForceSingleDPP ? 1 : DPPPerSurface[k]);
1162 }
1163 }
1164 }
1165 }
1166
1167 if (!MinimizeReallocationSuccess) {
1168 l->TotalBandwidth = 0;
1169 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1170 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1171 l->TotalBandwidth = l->TotalBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
1172 }
1173 }
1174 #ifdef __DML_VBA_DEBUG__
1175 dml2_printf("DML::%s: --- Before bandwidth adjustment ---\n", __func__);
1176 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1177 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, DETBufferSizeInKByte[k]);
1178 }
1179 dml2_printf("DML::%s: --- DET allocation with bandwidth ---\n", __func__);
1180 #endif
1181 dml2_printf("DML::%s: TotalBandwidth = %f\n", __func__, l->TotalBandwidth);
1182 l->BandwidthOfSurfacesNotAssignedDETPiece = l->TotalBandwidth;
1183 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1184
1185 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
1186 DETPieceAssignedToThisSurfaceAlready[k] = true;
1187 } else if (display_cfg->plane_descriptors[k].overrides.det_size_override_kb > 0 || (((double)(ForceSingleDPP ? 1 : DPPPerSurface[k]) * (double)DETBufferSizeInKByte[k] / (double)MaxTotalDETInKByte) >= ((ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) / l->TotalBandwidth))) {
1188 DETPieceAssignedToThisSurfaceAlready[k] = true;
1189 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - ReadBandwidthLuma[k] - ReadBandwidthChroma[k];
1190 } else {
1191 DETPieceAssignedToThisSurfaceAlready[k] = false;
1192 }
1193 #ifdef __DML_VBA_DEBUG__
1194 dml2_printf("DML::%s: k=%u DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, k, DETPieceAssignedToThisSurfaceAlready[k]);
1195 dml2_printf("DML::%s: k=%u BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, k, l->BandwidthOfSurfacesNotAssignedDETPiece);
1196 #endif
1197 }
1198
1199 for (unsigned int j = 0; j < NumberOfActiveSurfaces; ++j) {
1200 NextPotentialSurfaceToAssignDETPieceFound = false;
1201 l->NextSurfaceToAssignDETPiece = 0;
1202
1203 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1204 #ifdef __DML_VBA_DEBUG__
1205 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[k] = %f\n", __func__, j, k, ReadBandwidthLuma[k]);
1206 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[k] = %f\n", __func__, j, k, ReadBandwidthChroma[k]);
1207 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthLuma[Next] = %f\n", __func__, j, k, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1208 dml2_printf("DML::%s: j=%u k=%u, ReadBandwidthChroma[Next] = %f\n", __func__, j, k, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1209 dml2_printf("DML::%s: j=%u k=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, k, l->NextSurfaceToAssignDETPiece);
1210 #endif
1211 if (!DETPieceAssignedToThisSurfaceAlready[k] && (!NextPotentialSurfaceToAssignDETPieceFound ||
1212 ReadBandwidthLuma[k] + ReadBandwidthChroma[k] < ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece])) {
1213 l->NextSurfaceToAssignDETPiece = k;
1214 NextPotentialSurfaceToAssignDETPieceFound = true;
1215 }
1216 #ifdef __DML_VBA_DEBUG__
1217 dml2_printf("DML::%s: j=%u k=%u, DETPieceAssignedToThisSurfaceAlready = %u\n", __func__, j, k, DETPieceAssignedToThisSurfaceAlready[k]);
1218 dml2_printf("DML::%s: j=%u k=%u, NextPotentialSurfaceToAssignDETPieceFound = %u\n", __func__, j, k, NextPotentialSurfaceToAssignDETPieceFound);
1219 #endif
1220 }
1221
1222 if (NextPotentialSurfaceToAssignDETPieceFound) {
1223 l->NextDETBufferPieceInKByte = (unsigned int)(math_min2(
1224 math_round((double)l->DETBufferSizePoolInKByte * (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]) / l->BandwidthOfSurfacesNotAssignedDETPiece /
1225 ((ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte))
1226 * (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte,
1227 math_floor2((double)l->DETBufferSizePoolInKByte, (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]) * ConfigReturnBufferSegmentSizeInkByte)));
1228
1229 #ifdef __DML_VBA_DEBUG__
1230 dml2_printf("DML::%s: j=%u, DETBufferSizePoolInKByte = %u\n", __func__, j, l->DETBufferSizePoolInKByte);
1231 dml2_printf("DML::%s: j=%u, NextSurfaceToAssignDETPiece = %u\n", __func__, j, l->NextSurfaceToAssignDETPiece);
1232 dml2_printf("DML::%s: j=%u, ReadBandwidthLuma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece]);
1233 dml2_printf("DML::%s: j=%u, ReadBandwidthChroma[%u] = %f\n", __func__, j, l->NextSurfaceToAssignDETPiece, ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1234 dml2_printf("DML::%s: j=%u, BandwidthOfSurfacesNotAssignedDETPiece = %f\n", __func__, j, l->BandwidthOfSurfacesNotAssignedDETPiece);
1235 dml2_printf("DML::%s: j=%u, NextDETBufferPieceInKByte = %u\n", __func__, j, l->NextDETBufferPieceInKByte);
1236 dml2_printf("DML::%s: j=%u, DETBufferSizeInKByte[%u] increases from %u ", __func__, j, l->NextSurfaceToAssignDETPiece, DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1237 #endif
1238
1239 DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] = DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece] + l->NextDETBufferPieceInKByte / (ForceSingleDPP ? 1 : DPPPerSurface[l->NextSurfaceToAssignDETPiece]);
1240 #ifdef __DML_VBA_DEBUG__
1241 dml2_printf("to %u\n", DETBufferSizeInKByte[l->NextSurfaceToAssignDETPiece]);
1242 #endif
1243
1244 l->DETBufferSizePoolInKByte = l->DETBufferSizePoolInKByte - l->NextDETBufferPieceInKByte;
1245 DETPieceAssignedToThisSurfaceAlready[l->NextSurfaceToAssignDETPiece] = true;
1246 l->BandwidthOfSurfacesNotAssignedDETPiece = l->BandwidthOfSurfacesNotAssignedDETPiece - (ReadBandwidthLuma[l->NextSurfaceToAssignDETPiece] + ReadBandwidthChroma[l->NextSurfaceToAssignDETPiece]);
1247 }
1248 }
1249 }
1250 *CompressedBufferSizeInkByte = MinCompressedBufferSizeInKByte;
1251 }
1252 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByte / ConfigReturnBufferSegmentSizeInkByte;
1253
1254 #ifdef __DML_VBA_DEBUG__
1255 dml2_printf("DML::%s: --- After bandwidth adjustment ---\n", __func__);
1256 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *CompressedBufferSizeInkByte);
1257 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1258 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u (TotalReadBandWidth=%f)\n", __func__, k, DETBufferSizeInKByte[k], ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
1259 }
1260 #endif
1261 }
1262
CalculateRequiredDispclk(enum dml2_odm_mode ODMMode,double PixelClock)1263 static double CalculateRequiredDispclk(
1264 enum dml2_odm_mode ODMMode,
1265 double PixelClock)
1266 {
1267
1268 if (ODMMode == dml2_odm_mode_combine_4to1) {
1269 return PixelClock / 4.0;
1270 } else if (ODMMode == dml2_odm_mode_combine_3to1) {
1271 return PixelClock / 3.0;
1272 } else if (ODMMode == dml2_odm_mode_combine_2to1) {
1273 return PixelClock / 2.0;
1274 } else {
1275 return PixelClock;
1276 }
1277 }
1278
TruncToValidBPP(struct dml2_core_shared_TruncToValidBPP_locals * l,double LinkBitRate,unsigned int Lanes,unsigned int HTotal,unsigned int HActive,double PixelClock,double DesiredBPP,bool DSCEnable,enum dml2_output_encoder_class Output,enum dml2_output_format_class Format,unsigned int DSCInputBitPerComponent,unsigned int DSCSlices,unsigned int AudioRate,unsigned int AudioLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,unsigned int * RequiredSlots)1279 static double TruncToValidBPP(
1280 struct dml2_core_shared_TruncToValidBPP_locals *l,
1281 double LinkBitRate,
1282 unsigned int Lanes,
1283 unsigned int HTotal,
1284 unsigned int HActive,
1285 double PixelClock,
1286 double DesiredBPP,
1287 bool DSCEnable,
1288 enum dml2_output_encoder_class Output,
1289 enum dml2_output_format_class Format,
1290 unsigned int DSCInputBitPerComponent,
1291 unsigned int DSCSlices,
1292 unsigned int AudioRate,
1293 unsigned int AudioLayout,
1294 enum dml2_odm_mode ODMModeNoDSC,
1295 enum dml2_odm_mode ODMModeDSC,
1296
1297 // Output
1298 unsigned int *RequiredSlots)
1299 {
1300 double MaxLinkBPP;
1301 unsigned int MinDSCBPP;
1302 double MaxDSCBPP;
1303 unsigned int NonDSCBPP0;
1304 unsigned int NonDSCBPP1;
1305 unsigned int NonDSCBPP2;
1306 enum dml2_odm_mode ODMMode;
1307
1308 if (Format == dml2_420) {
1309 NonDSCBPP0 = 12;
1310 NonDSCBPP1 = 15;
1311 NonDSCBPP2 = 18;
1312 MinDSCBPP = 6;
1313 MaxDSCBPP = 16;
1314 } else if (Format == dml2_444) {
1315 NonDSCBPP0 = 24;
1316 NonDSCBPP1 = 30;
1317 NonDSCBPP2 = 36;
1318 MinDSCBPP = 8;
1319 MaxDSCBPP = 16;
1320 } else {
1321 if (Output == dml2_hdmi || Output == dml2_hdmifrl) {
1322 NonDSCBPP0 = 24;
1323 NonDSCBPP1 = 24;
1324 NonDSCBPP2 = 24;
1325 } else {
1326 NonDSCBPP0 = 16;
1327 NonDSCBPP1 = 20;
1328 NonDSCBPP2 = 24;
1329 }
1330 if (Format == dml2_n422 || Output == dml2_hdmifrl) {
1331 MinDSCBPP = 7;
1332 MaxDSCBPP = 16;
1333 } else {
1334 MinDSCBPP = 8;
1335 MaxDSCBPP = 16;
1336 }
1337 }
1338 if (Output == dml2_dp2p0) {
1339 MaxLinkBPP = LinkBitRate * Lanes / PixelClock * 128.0 / 132.0 * 383.0 / 384.0 * 65536.0 / 65540.0;
1340 } else if (DSCEnable && Output == dml2_dp) {
1341 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock * (1 - 2.4 / 100);
1342 } else {
1343 MaxLinkBPP = LinkBitRate / 10.0 * 8.0 * Lanes / PixelClock;
1344 }
1345
1346 ODMMode = DSCEnable ? ODMModeDSC : ODMModeNoDSC;
1347
1348 if (ODMMode == dml2_odm_mode_split_1to2) {
1349 MaxLinkBPP = 2 * MaxLinkBPP;
1350 }
1351
1352 if (DesiredBPP == 0) {
1353 if (DSCEnable) {
1354 if (MaxLinkBPP < MinDSCBPP) {
1355 return __DML2_CALCS_DPP_INVALID__;
1356 } else if (MaxLinkBPP >= MaxDSCBPP) {
1357 return MaxDSCBPP;
1358 } else {
1359 return math_floor2(16.0 * MaxLinkBPP, 1.0) / 16.0;
1360 }
1361 } else {
1362 if (MaxLinkBPP >= NonDSCBPP2) {
1363 return NonDSCBPP2;
1364 } else if (MaxLinkBPP >= NonDSCBPP1) {
1365 return NonDSCBPP1;
1366 } else if (MaxLinkBPP >= NonDSCBPP0) {
1367 return NonDSCBPP0;
1368 } else {
1369 return __DML2_CALCS_DPP_INVALID__;
1370 }
1371 }
1372 } else {
1373 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP == NonDSCBPP0)) ||
1374 (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) {
1375 return __DML2_CALCS_DPP_INVALID__;
1376 } else {
1377 return DesiredBPP;
1378 }
1379 }
1380 }
1381
1382 // updated for dcn4
dscceComputeDelay(unsigned int bpc,double BPP,unsigned int sliceWidth,unsigned int numSlices,enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1383 static unsigned int dscceComputeDelay(
1384 unsigned int bpc,
1385 double BPP,
1386 unsigned int sliceWidth,
1387 unsigned int numSlices,
1388 enum dml2_output_format_class pixelFormat,
1389 enum dml2_output_encoder_class Output)
1390 {
1391 // valid bpc = source bits per component in the set of {8, 10, 12}
1392 // valid bpp = increments of 1/16 of a bit
1393 // min = 6/7/8 in N420/N422/444, respectively
1394 // max = such that compression is 1:1
1395 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
1396 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
1397 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}
1398
1399 // fixed value
1400 unsigned int rcModelSize = 8192;
1401
1402 // N422/N420 operate at 2 pixels per clock
1403 unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified;
1404
1405
1406 if (pixelFormat == dml2_420)
1407 pixelsPerClock = 2;
1408 // #all other modes operate at 1 pixel per clock
1409 else if (pixelFormat == dml2_444)
1410 pixelsPerClock = 1;
1411 else if (pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1412 pixelsPerClock = 2;
1413 else
1414 pixelsPerClock = 1;
1415
1416 //initial transmit delay as per PPS
1417 initial_xmit_delay = (unsigned int)(math_round(rcModelSize / 2.0 / BPP / pixelsPerClock));
1418
1419 //slice width as seen by dscc_bcl in pixels or pixels pairs (depending on number of pixels per pixel container based on pixel format)
1420 slice_width_modified = (pixelFormat == dml2_444 || pixelFormat == dml2_420 || Output == dml2_hdmifrl) ? sliceWidth / 2 : sliceWidth;
1421
1422 padding_pixels = ((slice_width_modified % 3) != 0) ? (3 - (slice_width_modified % 3)) * (initial_xmit_delay / slice_width_modified) : 0;
1423
1424 if ((3.0 * pixelsPerClock * BPP) >= ((double)((initial_xmit_delay + 2) / 3) * (double)(3 + (pixelFormat == dml2_n422)))) {
1425 if ((initial_xmit_delay + padding_pixels) % 3 == 1) {
1426 initial_xmit_delay++;
1427 }
1428 }
1429
1430
1431 //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard
1432 if (bpc == 8)
1433 ssm_group_priming_delay = 83;
1434 else if (bpc == 10)
1435 ssm_group_priming_delay = 91;
1436 else if (bpc == 12)
1437 ssm_group_priming_delay = 115;
1438 else if (bpc == 14)
1439 ssm_group_priming_delay = 123;
1440 else
1441 ssm_group_priming_delay = 128;
1442
1443 //slice width in groups is rounded up to the nearest group as DSC adds padded pixels such that there are an integer number of groups per slice
1444 slice_width_groups = (slice_width_modified + 2) / 3;
1445
1446 //determine number of padded pixels in the last group of a slice line, computed as
1447 slice_padded_pixels = 3 * slice_width_groups - slice_width_modified;
1448
1449
1450
1451
1452 //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered
1453 number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified;
1454
1455 //increase initial transmit delay by the number of padded pixels added to a slice line multipled by the integer number of complete lines to reach initial transmit delay
1456 //this step is necessary as each padded pixel added takes up a clock cycle and, therefore, adds to the overall delay
1457 ixd_plus_padding = initial_xmit_delay + slice_padded_pixels * number_of_lines_to_reach_ixd;
1458
1459 //convert the padded initial transmit delay from pixels to groups by rounding up to the nearest group as DSC processes in groups of pixels
1460 ixd_plus_padding_groups = (ixd_plus_padding + 2) / 3;
1461
1462 //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay
1463 groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay;
1464
1465
1466 //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice
1467 //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice
1468 lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next
1469
1470 //determine if there are non-zero number of pixels reached in the group where initial transmit delay is reached
1471 //an additional group time (i.e., 3 pixel times) is required before the first output if there are no additional pixels beyond initial transmit delay
1472 additional_group_delay = ((initial_xmit_delay - number_of_lines_to_reach_ixd * slice_width_modified) % 3) == 0 ? 1 : 0;
1473
1474 //number of pipeline delay cycles in the ssm block (can be determined empirically or analytically by inspecting the ssm block)
1475 ssm_pipeline_delay = 2;
1476
1477 //number of pipe delay cycles in the obsm block (can be determined empirically or analytically by inspecting the obsm block)
1478 obsm_pipeline_delay = 1;
1479
1480 //a group of pixels is worth 6 pixels in N422/N420 mode or 3 pixels in all other modes
1481 if (pixelFormat == dml2_420 || pixelFormat == dml2_444 || pixelFormat == dml2_n422 || Output == dml2_hdmifrl)
1482 cycles_per_group = 6;
1483 else
1484 cycles_per_group = 3;
1485 //delay of the bit stream contruction layer in pixels is the sum of:
1486 //1. number of pixel containers in a slice line multipled by the number of lines required to reach initial transmit delay multipled by number of slices to the left of the last horizontal slice
1487 //2. number of pixel containers required to reach initial transmit delay (specifically, in the last horizontal slice)
1488 //3. additional group of delay if initial transmit delay is reached exactly in a group
1489 //4. ssm and obsm pipeline delay (i.e., clock cycles of delay)
1490 group_delay = (lines_to_reach_ixd * slice_width_groups * (numSlices - 1)) + groups_to_reach_ixd + additional_group_delay;
1491 pipeline_delay = ssm_pipeline_delay + obsm_pipeline_delay;
1492
1493 //pixel delay is group_delay (converted to pixels) + pipeline, however, first group is a special case since it is processed as soon as it arrives (i.e., in 3 cycles regardless of pixel format)
1494 pixels = (group_delay - 1) * cycles_per_group + 3 + pipeline_delay;
1495
1496 #ifdef __DML_VBA_DEBUG__
1497 dml2_printf("DML::%s: bpc: %u\n", __func__, bpc);
1498 dml2_printf("DML::%s: BPP: %f\n", __func__, BPP);
1499 dml2_printf("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
1500 dml2_printf("DML::%s: numSlices: %u\n", __func__, numSlices);
1501 dml2_printf("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
1502 dml2_printf("DML::%s: Output: %u\n", __func__, Output);
1503 dml2_printf("DML::%s: pixels: %u\n", __func__, pixels);
1504 #endif
1505 return pixels;
1506 }
1507
1508
1509 //updated in dcn4
dscComputeDelay(enum dml2_output_format_class pixelFormat,enum dml2_output_encoder_class Output)1510 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output)
1511 {
1512 unsigned int Delay = 0;
1513 unsigned int dispclk_per_dscclk = 3;
1514
1515 // sfr
1516 Delay = Delay + 2;
1517
1518 if (pixelFormat == dml2_420 || pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1519 dispclk_per_dscclk = 3 * 2;
1520 }
1521
1522 if (pixelFormat == dml2_420) {
1523 //dscc top delay for pixel compression layer
1524 Delay = Delay + 16 * dispclk_per_dscclk;
1525
1526 // dscc - input deserializer
1527 Delay = Delay + 5;
1528
1529 // dscc - input cdc fifo
1530 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1531
1532 // dscc - output cdc fifo
1533 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1534
1535 // dscc - cdc uncertainty
1536 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1537 } else if (pixelFormat == dml2_n422 || (Output == dml2_hdmifrl && pixelFormat != dml2_444)) {
1538 //dscc top delay for pixel compression layer
1539 Delay = Delay + 16 * dispclk_per_dscclk;
1540 // dsccif
1541 Delay = Delay + 1;
1542 // dscc - input deserializer
1543 Delay = Delay + 5;
1544 // dscc - input cdc fifo
1545 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1546
1547
1548 // dscc - output cdc fifo
1549 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1550 // dscc - cdc uncertainty
1551 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1552 } else if (pixelFormat == dml2_s422) {
1553 //dscc top delay for pixel compression layer
1554 Delay = Delay + 17 * dispclk_per_dscclk;
1555
1556 // dscc - input deserializer
1557 Delay = Delay + 3;
1558 // dscc - input cdc fifo
1559 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1560 // dscc - output cdc fifo
1561 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1562 // dscc - cdc uncertainty
1563 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1564 } else {
1565 //dscc top delay for pixel compression layer
1566 Delay = Delay + 16 * dispclk_per_dscclk;
1567 // dscc - input deserializer
1568 Delay = Delay + 3;
1569 // dscc - input cdc fifo
1570 Delay = Delay + 1 + 4 * dispclk_per_dscclk;
1571 // dscc - output cdc fifo
1572 Delay = Delay + 3 + 1 * dispclk_per_dscclk;
1573
1574 // dscc - cdc uncertainty
1575 Delay = Delay + 3 + 3 * dispclk_per_dscclk;
1576 }
1577
1578 // sft
1579 Delay = Delay + 1;
1580 #ifdef __DML_VBA_DEBUG__
1581 dml2_printf("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
1582 dml2_printf("DML::%s: Delay = %u\n", __func__, Delay);
1583 #endif
1584
1585 return Delay;
1586 }
1587
CalculateHostVMDynamicLevels(bool GPUVMEnable,bool HostVMEnable,unsigned int HostVMMinPageSize,unsigned int HostVMMaxNonCachedPageTableLevels)1588 static unsigned int CalculateHostVMDynamicLevels(
1589 bool GPUVMEnable,
1590 bool HostVMEnable,
1591 unsigned int HostVMMinPageSize,
1592 unsigned int HostVMMaxNonCachedPageTableLevels)
1593 {
1594 unsigned int HostVMDynamicLevels = 0;
1595
1596 if (GPUVMEnable && HostVMEnable) {
1597 if (HostVMMinPageSize < 2048)
1598 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels;
1599 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576)
1600 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 1);
1601 else
1602 HostVMDynamicLevels = (unsigned int)math_max2(0, (double)HostVMMaxNonCachedPageTableLevels - 2);
1603 } else {
1604 HostVMDynamicLevels = 0;
1605 }
1606 return HostVMDynamicLevels;
1607 }
1608
CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params * p)1609 static unsigned int CalculateVMAndRowBytes(struct dml2_core_shared_calculate_vm_and_row_bytes_params *p)
1610 {
1611 unsigned int extra_dpde_bytes;
1612 unsigned int extra_mpde_bytes;
1613 unsigned int MacroTileSizeBytes;
1614 unsigned int vp_height_dpte_ub;
1615
1616 unsigned int meta_surface_bytes;
1617 unsigned int vm_bytes;
1618 unsigned int vp_height_meta_ub;
1619 unsigned int PixelPTEReqWidth_linear = 0; // VBA_DELTA. VBA doesn't calculate this
1620
1621 *p->MetaRequestHeight = 8 * p->BlockHeight256Bytes;
1622 *p->MetaRequestWidth = 8 * p->BlockWidth256Bytes;
1623 if (p->SurfaceTiling == dml2_sw_linear) {
1624 *p->meta_row_height = 32;
1625 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1626 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0); // FIXME_DCN4SW missing in old code but no dcc for linear anyways?
1627 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1628 *p->meta_row_height = *p->MetaRequestHeight;
1629 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1630 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->MetaRequestWidth - 1, *p->MetaRequestWidth) - math_floor2(p->ViewportXStart, *p->MetaRequestWidth));
1631 } else {
1632 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestWidth) + *p->MetaRequestWidth);
1633 }
1634 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestHeight * p->BytePerPixel / 256.0);
1635 } else {
1636 *p->meta_row_height = *p->MetaRequestWidth;
1637 if (p->ViewportStationary && p->NumberOfDPPs == 1) {
1638 *p->meta_row_width = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->MetaRequestHeight - 1, *p->MetaRequestHeight) - math_floor2(p->ViewportYStart, *p->MetaRequestHeight));
1639 } else {
1640 *p->meta_row_width = (unsigned int)(math_ceil2(p->SwathWidth - 1, *p->MetaRequestHeight) + *p->MetaRequestHeight);
1641 }
1642 *p->meta_row_bytes = (unsigned int)(*p->meta_row_width * *p->MetaRequestWidth * p->BytePerPixel / 256.0);
1643 }
1644
1645 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1646 vp_height_meta_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + 64 * p->BlockHeight256Bytes - 1, 64 * p->BlockHeight256Bytes) - math_floor2(p->ViewportYStart, 64 * p->BlockHeight256Bytes));
1647 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1648 vp_height_meta_ub = (unsigned int)(math_ceil2(p->ViewportHeight - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1649 } else {
1650 vp_height_meta_ub = (unsigned int)(math_ceil2(p->SwathWidth - 1, 64 * p->BlockHeight256Bytes) + 64 * p->BlockHeight256Bytes);
1651 }
1652
1653 meta_surface_bytes = (unsigned int)(p->DCCMetaPitch * vp_height_meta_ub * p->BytePerPixel / 256.0);
1654 #ifdef __DML_VBA_DEBUG__
1655 dml2_printf("DML::%s: DCCMetaPitch = %u\n", __func__, p->DCCMetaPitch);
1656 dml2_printf("DML::%s: meta_surface_bytes = %u\n", __func__, meta_surface_bytes);
1657 #endif
1658 if (p->GPUVMEnable == true) {
1659 double meta_vmpg_bytes = 4.0 * 1024.0;
1660 *p->meta_pte_bytes_per_frame_ub = (unsigned int)((math_ceil2((double) (meta_surface_bytes - meta_vmpg_bytes) / (8 * meta_vmpg_bytes), 1) + 1) * 64);
1661 extra_mpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 1);
1662 } else {
1663 *p->meta_pte_bytes_per_frame_ub = 0;
1664 extra_mpde_bytes = 0;
1665 }
1666
1667 if (!p->DCCEnable || !p->mrq_present) {
1668 *p->meta_pte_bytes_per_frame_ub = 0;
1669 extra_mpde_bytes = 0;
1670 *p->meta_row_bytes = 0;
1671 }
1672
1673 if (!p->GPUVMEnable) {
1674 *p->PixelPTEBytesPerRow = 0;
1675 *p->PixelPTEBytesPerRowStorage = 0;
1676 *p->dpte_row_width_ub = 0;
1677 *p->dpte_row_height = 0;
1678 *p->dpte_row_height_linear = 0;
1679 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1680 *p->dpte_row_width_ub_one_row_per_frame = 0;
1681 *p->dpte_row_height_one_row_per_frame = 0;
1682 *p->vmpg_width = 0;
1683 *p->vmpg_height = 0;
1684 *p->PixelPTEReqWidth = 0;
1685 *p->PixelPTEReqHeight = 0;
1686 *p->PTERequestSize = 0;
1687 *p->dpde0_bytes_per_frame_ub = 0;
1688 return 0;
1689 }
1690
1691 MacroTileSizeBytes = p->MacroTileWidth * p->BytePerPixel * p->MacroTileHeight;
1692
1693 if (p->ViewportStationary && p->is_phantom && (p->NumberOfDPPs == 1 || !dml_is_vertical_rotation(p->RotationAngle))) {
1694 vp_height_dpte_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + p->MacroTileHeight - 1, p->MacroTileHeight) - math_floor2(p->ViewportYStart, p->MacroTileHeight));
1695 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1696 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->ViewportHeight - 1, p->MacroTileHeight) + p->MacroTileHeight);
1697 } else {
1698 vp_height_dpte_ub = (unsigned int)(math_ceil2((double)p->SwathWidth - 1, p->MacroTileHeight) + p->MacroTileHeight);
1699 }
1700
1701 if (p->GPUVMEnable == true && p->GPUVMMaxPageTableLevels > 1) {
1702 *p->dpde0_bytes_per_frame_ub = (unsigned int)(64 * (math_ceil2((double)(p->Pitch * vp_height_dpte_ub * p->BytePerPixel - MacroTileSizeBytes) / (double)(8 * 2097152), 1) + 1));
1703 extra_dpde_bytes = 128 * (p->GPUVMMaxPageTableLevels - 2);
1704 } else {
1705 *p->dpde0_bytes_per_frame_ub = 0;
1706 extra_dpde_bytes = 0;
1707 }
1708
1709 vm_bytes = *p->meta_pte_bytes_per_frame_ub + extra_mpde_bytes + *p->dpde0_bytes_per_frame_ub + extra_dpde_bytes;
1710
1711 #ifdef __DML_VBA_DEBUG__
1712 dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->DCCEnable);
1713 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1714 dml2_printf("DML::%s: SwModeLinear = %u\n", __func__, p->SurfaceTiling == dml2_sw_linear);
1715 dml2_printf("DML::%s: BytePerPixel = %u\n", __func__, p->BytePerPixel);
1716 dml2_printf("DML::%s: GPUVMMaxPageTableLevels = %u\n", __func__, p->GPUVMMaxPageTableLevels);
1717 dml2_printf("DML::%s: BlockHeight256Bytes = %u\n", __func__, p->BlockHeight256Bytes);
1718 dml2_printf("DML::%s: BlockWidth256Bytes = %u\n", __func__, p->BlockWidth256Bytes);
1719 dml2_printf("DML::%s: MacroTileHeight = %u\n", __func__, p->MacroTileHeight);
1720 dml2_printf("DML::%s: MacroTileWidth = %u\n", __func__, p->MacroTileWidth);
1721 dml2_printf("DML::%s: meta_pte_bytes_per_frame_ub = %u\n", __func__, *p->meta_pte_bytes_per_frame_ub);
1722 dml2_printf("DML::%s: dpde0_bytes_per_frame_ub = %u\n", __func__, *p->dpde0_bytes_per_frame_ub);
1723 dml2_printf("DML::%s: extra_mpde_bytes = %u\n", __func__, extra_mpde_bytes);
1724 dml2_printf("DML::%s: extra_dpde_bytes = %u\n", __func__, extra_dpde_bytes);
1725 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
1726 dml2_printf("DML::%s: ViewportHeight = %u\n", __func__, p->ViewportHeight);
1727 dml2_printf("DML::%s: SwathWidth = %u\n", __func__, p->SwathWidth);
1728 dml2_printf("DML::%s: vp_height_dpte_ub = %u\n", __func__, vp_height_dpte_ub);
1729 #endif
1730
1731 if (p->SurfaceTiling == dml2_sw_linear) {
1732 *p->PixelPTEReqHeight = 1;
1733 *p->PixelPTEReqWidth = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1734 PixelPTEReqWidth_linear = p->GPUVMMinPageSizeKBytes * 1024 * 8 / p->BytePerPixel;
1735 *p->PTERequestSize = 64;
1736
1737 *p->vmpg_height = 1;
1738 *p->vmpg_width = p->GPUVMMinPageSizeKBytes * 1024 / p->BytePerPixel;
1739 } else if (p->GPUVMMinPageSizeKBytes * 1024 >= dml_get_tile_block_size_bytes(p->SurfaceTiling)) { // 1 64B 8x1 PTE
1740 *p->PixelPTEReqHeight = p->MacroTileHeight;
1741 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1742 *p->PTERequestSize = 64;
1743
1744 *p->vmpg_height = p->MacroTileHeight;
1745 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1746
1747 } else if (p->GPUVMMinPageSizeKBytes == 4 && dml_get_tile_block_size_bytes(p->SurfaceTiling) == 65536) { // 2 64B PTE requests to get 16 PTEs to cover the 64K tile
1748 // one 64KB tile, is 16x16x256B req
1749 *p->PixelPTEReqHeight = 16 * p->BlockHeight256Bytes;
1750 *p->PixelPTEReqWidth = 16 * p->BlockWidth256Bytes;
1751 *p->PTERequestSize = 128;
1752
1753 *p->vmpg_height = *p->PixelPTEReqHeight;
1754 *p->vmpg_width = *p->PixelPTEReqWidth;
1755 } else {
1756 // default for rest of calculation to go through, when vm is disable, the calulated pte related values shouldnt be used anyways
1757 *p->PixelPTEReqHeight = p->MacroTileHeight;
1758 *p->PixelPTEReqWidth = 8 * 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1759 *p->PTERequestSize = 64;
1760
1761 *p->vmpg_height = p->MacroTileHeight;
1762 *p->vmpg_width = 1024 * p->GPUVMMinPageSizeKBytes / (p->MacroTileHeight * p->BytePerPixel);
1763
1764 if (p->GPUVMEnable == true) {
1765 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes=%u and sw_mode=%u (tile_size=%d) not supported!\n",
1766 __func__, p->GPUVMMinPageSizeKBytes, p->SurfaceTiling, dml_get_tile_block_size_bytes(p->SurfaceTiling));
1767 DML2_ASSERT(0);
1768 }
1769 }
1770
1771 #ifdef __DML_VBA_DEBUG__
1772 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1773 dml2_printf("DML::%s: PixelPTEReqHeight = %u\n", __func__, *p->PixelPTEReqHeight);
1774 dml2_printf("DML::%s: PixelPTEReqWidth = %u\n", __func__, *p->PixelPTEReqWidth);
1775 dml2_printf("DML::%s: PixelPTEReqWidth_linear = %u\n", __func__, PixelPTEReqWidth_linear);
1776 dml2_printf("DML::%s: PTERequestSize = %u\n", __func__, *p->PTERequestSize);
1777 dml2_printf("DML::%s: Pitch = %u\n", __func__, p->Pitch);
1778 dml2_printf("DML::%s: vmpg_width = %u\n", __func__, *p->vmpg_width);
1779 dml2_printf("DML::%s: vmpg_height = %u\n", __func__, *p->vmpg_height);
1780 #endif
1781
1782 *p->dpte_row_height_one_row_per_frame = vp_height_dpte_ub;
1783 *p->dpte_row_width_ub_one_row_per_frame = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height_one_row_per_frame / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * (double)*p->PixelPTEReqWidth);
1784 *p->PixelPTEBytesPerRow_one_row_per_frame = (unsigned int)((double)*p->dpte_row_width_ub_one_row_per_frame / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1785 *p->dpte_row_height_linear = 0;
1786
1787 if (p->SurfaceTiling == dml2_sw_linear) {
1788 *p->dpte_row_height = (unsigned int)(math_min2(128, (double)(1ULL << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * *p->PixelPTEReqWidth / p->Pitch), 2.0), 1))));
1789 *p->dpte_row_width_ub = (unsigned int)(math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height - 1), (double)*p->PixelPTEReqWidth) + *p->PixelPTEReqWidth);
1790 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqWidth * *p->PTERequestSize);
1791
1792 // VBA_DELTA, VBA doesn't have programming value for pte row height linear.
1793 *p->dpte_row_height_linear = (unsigned int)1 << (unsigned int)math_floor2(math_log((float)(p->PTEBufferSizeInRequests * PixelPTEReqWidth_linear / p->Pitch), 2.0), 1);
1794 if (*p->dpte_row_height_linear > 128)
1795 *p->dpte_row_height_linear = 128;
1796
1797 #ifdef __DML_VBA_DEBUG__
1798 dml2_printf("DML::%s: dpte_row_width_ub = %u (linear)\n", __func__, *p->dpte_row_width_ub);
1799 #endif
1800
1801 } else if (!dml_is_vertical_rotation(p->RotationAngle)) {
1802 *p->dpte_row_height = *p->PixelPTEReqHeight;
1803
1804 if (p->GPUVMMinPageSizeKBytes > 64) {
1805 *p->dpte_row_width_ub = (unsigned int)((math_ceil2(((double)p->Pitch * (double)*p->dpte_row_height / (double)*p->PixelPTEReqHeight - 1) / (double)*p->PixelPTEReqWidth, 1) + 1) * *p->PixelPTEReqWidth);
1806 } else if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1807 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportXStart + p->SwathWidth + *p->PixelPTEReqWidth - 1, *p->PixelPTEReqWidth) - math_floor2(p->ViewportXStart, *p->PixelPTEReqWidth));
1808 } else {
1809 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqWidth, 1) + 1.0) * *p->PixelPTEReqWidth);
1810 }
1811 #ifdef __DML_VBA_DEBUG__
1812 dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled horz)\n", __func__, *p->dpte_row_width_ub);
1813 #endif
1814
1815 *p->PixelPTEBytesPerRow = *p->dpte_row_width_ub / *p->PixelPTEReqWidth * *p->PTERequestSize;
1816 } else {
1817 *p->dpte_row_height = (unsigned int)(math_min2(*p->PixelPTEReqWidth, p->MacroTileWidth));
1818
1819 if (p->ViewportStationary && (p->NumberOfDPPs == 1)) {
1820 *p->dpte_row_width_ub = (unsigned int)(math_floor2(p->ViewportYStart + p->ViewportHeight + *p->PixelPTEReqHeight - 1, *p->PixelPTEReqHeight) - math_floor2(p->ViewportYStart, *p->PixelPTEReqHeight));
1821 } else {
1822 *p->dpte_row_width_ub = (unsigned int)((math_ceil2((double)(p->SwathWidth - 1) / (double)*p->PixelPTEReqHeight, 1) + 1) * *p->PixelPTEReqHeight);
1823 }
1824
1825 *p->PixelPTEBytesPerRow = (unsigned int)((double)*p->dpte_row_width_ub / (double)*p->PixelPTEReqHeight * *p->PTERequestSize);
1826 #ifdef __DML_VBA_DEBUG__
1827 dml2_printf("DML::%s: dpte_row_width_ub = %u (tiled vert)\n", __func__, *p->dpte_row_width_ub);
1828 #endif
1829 }
1830
1831 if (p->GPUVMEnable != true) {
1832 *p->PixelPTEBytesPerRow = 0;
1833 *p->PixelPTEBytesPerRow_one_row_per_frame = 0;
1834 }
1835
1836 *p->PixelPTEBytesPerRowStorage = *p->PixelPTEBytesPerRow;
1837
1838 #ifdef __DML_VBA_DEBUG__
1839 dml2_printf("DML::%s: GPUVMMinPageSizeKBytes = %u\n", __func__, p->GPUVMMinPageSizeKBytes);
1840 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
1841 dml2_printf("DML::%s: meta_row_height = %u\n", __func__, *p->meta_row_height);
1842 dml2_printf("DML::%s: dpte_row_height = %u\n", __func__, *p->dpte_row_height);
1843 dml2_printf("DML::%s: dpte_row_height_linear = %u\n", __func__, *p->dpte_row_height_linear);
1844 dml2_printf("DML::%s: dpte_row_width_ub = %u\n", __func__, *p->dpte_row_width_ub);
1845 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, *p->PixelPTEBytesPerRow);
1846 dml2_printf("DML::%s: PixelPTEBytesPerRowStorage = %u\n", __func__, *p->PixelPTEBytesPerRowStorage);
1847 dml2_printf("DML::%s: PTEBufferSizeInRequests = %u\n", __func__, p->PTEBufferSizeInRequests);
1848 dml2_printf("DML::%s: dpte_row_height_one_row_per_frame = %u\n", __func__, *p->dpte_row_height_one_row_per_frame);
1849 dml2_printf("DML::%s: dpte_row_width_ub_one_row_per_frame = %u\n", __func__, *p->dpte_row_width_ub_one_row_per_frame);
1850 dml2_printf("DML::%s: PixelPTEBytesPerRow_one_row_per_frame = %u\n", __func__, *p->PixelPTEBytesPerRow_one_row_per_frame);
1851 #endif
1852
1853 return vm_bytes;
1854 } // CalculateVMAndRowBytes
1855
CalculatePrefetchSourceLines(double VRatio,unsigned int VTaps,bool Interlace,bool ProgressiveToInterlaceUnitInOPP,unsigned int SwathHeight,enum dml2_rotation_angle RotationAngle,bool mirrored,bool ViewportStationary,unsigned int SwathWidth,unsigned int ViewportHeight,unsigned int ViewportXStart,unsigned int ViewportYStart,unsigned int * VInitPreFill,unsigned int * MaxNumSwath)1856 static unsigned int CalculatePrefetchSourceLines(
1857 double VRatio,
1858 unsigned int VTaps,
1859 bool Interlace,
1860 bool ProgressiveToInterlaceUnitInOPP,
1861 unsigned int SwathHeight,
1862 enum dml2_rotation_angle RotationAngle,
1863 bool mirrored,
1864 bool ViewportStationary,
1865 unsigned int SwathWidth,
1866 unsigned int ViewportHeight,
1867 unsigned int ViewportXStart,
1868 unsigned int ViewportYStart,
1869
1870 // Output
1871 unsigned int *VInitPreFill,
1872 unsigned int *MaxNumSwath)
1873 {
1874
1875 unsigned int vp_start_rot = 0;
1876 unsigned int sw0_tmp = 0;
1877 unsigned int MaxPartialSwath = 0;
1878 double numLines = 0;
1879
1880 #ifdef __DML_VBA_DEBUG__
1881 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
1882 dml2_printf("DML::%s: VTaps = %u\n", __func__, VTaps);
1883 dml2_printf("DML::%s: ViewportXStart = %u\n", __func__, ViewportXStart);
1884 dml2_printf("DML::%s: ViewportYStart = %u\n", __func__, ViewportYStart);
1885 dml2_printf("DML::%s: ViewportStationary = %u\n", __func__, ViewportStationary);
1886 dml2_printf("DML::%s: SwathHeight = %u\n", __func__, SwathHeight);
1887 #endif
1888 if (ProgressiveToInterlaceUnitInOPP)
1889 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1) / 2.0, 1));
1890 else
1891 *VInitPreFill = (unsigned int)(math_floor2((VRatio + (double)VTaps + 1 + (Interlace ? 1 : 0) * 0.5 * VRatio) / 2.0, 1));
1892
1893 if (ViewportStationary) {
1894 if (RotationAngle == dml2_rotation_180) {
1895 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + ViewportHeight - 1) % SwathHeight) + 1);
1896 } else if ((RotationAngle == dml2_rotation_270 && !mirrored) || (RotationAngle == dml2_rotation_90 && mirrored)) {
1897 vp_start_rot = ViewportXStart;
1898 } else if ((RotationAngle == dml2_rotation_90 && !mirrored) || (RotationAngle == dml2_rotation_270 && mirrored)) {
1899 vp_start_rot = SwathHeight - (((unsigned int)(ViewportYStart + SwathWidth - 1) % SwathHeight) + 1);
1900 } else {
1901 vp_start_rot = ViewportYStart;
1902 }
1903 sw0_tmp = SwathHeight - (vp_start_rot % SwathHeight);
1904 if (sw0_tmp < *VInitPreFill) {
1905 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - sw0_tmp) / (double)SwathHeight, 1) + 1);
1906 } else {
1907 *MaxNumSwath = 1;
1908 }
1909 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(vp_start_rot + *VInitPreFill - 1) % SwathHeight));
1910 } else {
1911 *MaxNumSwath = (unsigned int)(math_ceil2((*VInitPreFill - 1.0) / (double)SwathHeight, 1) + 1);
1912 if (*VInitPreFill > 1) {
1913 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill - 2) % SwathHeight));
1914 } else {
1915 MaxPartialSwath = (unsigned int)(math_max2(1, (unsigned int)(*VInitPreFill + SwathHeight - 2) % SwathHeight));
1916 }
1917 }
1918 numLines = *MaxNumSwath * SwathHeight + MaxPartialSwath;
1919
1920 #ifdef __DML_VBA_DEBUG__
1921 dml2_printf("DML::%s: vp_start_rot = %u\n", __func__, vp_start_rot);
1922 dml2_printf("DML::%s: VInitPreFill = %u\n", __func__, *VInitPreFill);
1923 dml2_printf("DML::%s: MaxPartialSwath = %u\n", __func__, MaxPartialSwath);
1924 dml2_printf("DML::%s: MaxNumSwath = %u\n", __func__, *MaxNumSwath);
1925 dml2_printf("DML::%s: Prefetch source lines = %3.2f\n", __func__, numLines);
1926 #endif
1927 return (unsigned int)(numLines);
1928
1929 }
1930
CalculateRowBandwidth(bool GPUVMEnable,bool use_one_row_for_frame,enum dml2_source_format_class SourcePixelFormat,double VRatio,double VRatioChroma,bool DCCEnable,double LineTime,unsigned int PixelPTEBytesPerRowLuma,unsigned int PixelPTEBytesPerRowChroma,unsigned int dpte_row_height_luma,unsigned int dpte_row_height_chroma,bool mrq_present,unsigned int meta_row_bytes_per_row_ub_l,unsigned int meta_row_bytes_per_row_ub_c,unsigned int meta_row_height_luma,unsigned int meta_row_height_chroma,double * dpte_row_bw,double * meta_row_bw)1931 static void CalculateRowBandwidth(
1932 bool GPUVMEnable,
1933 bool use_one_row_for_frame,
1934 enum dml2_source_format_class SourcePixelFormat,
1935 double VRatio,
1936 double VRatioChroma,
1937 bool DCCEnable,
1938 double LineTime,
1939 unsigned int PixelPTEBytesPerRowLuma,
1940 unsigned int PixelPTEBytesPerRowChroma,
1941 unsigned int dpte_row_height_luma,
1942 unsigned int dpte_row_height_chroma,
1943
1944 bool mrq_present,
1945 unsigned int meta_row_bytes_per_row_ub_l,
1946 unsigned int meta_row_bytes_per_row_ub_c,
1947 unsigned int meta_row_height_luma,
1948 unsigned int meta_row_height_chroma,
1949
1950 // Output
1951 double *dpte_row_bw,
1952 double *meta_row_bw)
1953 {
1954 if (!DCCEnable || !mrq_present) {
1955 *meta_row_bw = 0;
1956 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1957 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime)
1958 + VRatioChroma * meta_row_bytes_per_row_ub_c / (meta_row_height_chroma * LineTime);
1959 } else {
1960 *meta_row_bw = VRatio * meta_row_bytes_per_row_ub_l / (meta_row_height_luma * LineTime);
1961 }
1962
1963 if (GPUVMEnable != true) {
1964 *dpte_row_bw = 0;
1965 } else if (dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha) {
1966 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime)
1967 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime);
1968 } else {
1969 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime);
1970 }
1971 }
1972
CalculateMALLUseForStaticScreen(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int SurfaceSizeInMALL[],bool one_row_per_frame_fits_in_buffer[],bool is_using_mall_for_ss[])1973 static void CalculateMALLUseForStaticScreen(
1974 const struct dml2_display_cfg *display_cfg,
1975 unsigned int NumberOfActiveSurfaces,
1976 unsigned int MALLAllocatedForDCN,
1977 unsigned int SurfaceSizeInMALL[],
1978 bool one_row_per_frame_fits_in_buffer[],
1979
1980 // Output
1981 bool is_using_mall_for_ss[])
1982 {
1983
1984 unsigned int SurfaceToAddToMALL;
1985 bool CanAddAnotherSurfaceToMALL;
1986 unsigned int TotalSurfaceSizeInMALL;
1987
1988 TotalSurfaceSizeInMALL = 0;
1989 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
1990 is_using_mall_for_ss[k] = (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable);
1991 if (is_using_mall_for_ss[k])
1992 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k];
1993 #ifdef __DML_VBA_DEBUG__
1994 dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, is_using_mall_for_ss[k]);
1995 dml2_printf("DML::%s: k=%u, TotalSurfaceSizeInMALL = %u\n", __func__, k, TotalSurfaceSizeInMALL);
1996 #endif
1997 }
1998
1999 SurfaceToAddToMALL = 0;
2000 CanAddAnotherSurfaceToMALL = true;
2001 while (CanAddAnotherSurfaceToMALL) {
2002 CanAddAnotherSurfaceToMALL = false;
2003 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
2004 if (TotalSurfaceSizeInMALL + SurfaceSizeInMALL[k] <= MALLAllocatedForDCN * 1024 * 1024 &&
2005 !is_using_mall_for_ss[k] && display_cfg->plane_descriptors[k].overrides.refresh_from_mall != dml2_refresh_from_mall_mode_override_force_disable && one_row_per_frame_fits_in_buffer[k] &&
2006 (!CanAddAnotherSurfaceToMALL || SurfaceSizeInMALL[k] < SurfaceSizeInMALL[SurfaceToAddToMALL])) {
2007 CanAddAnotherSurfaceToMALL = true;
2008 SurfaceToAddToMALL = k;
2009 dml2_printf("DML::%s: k=%u, UseMALLForStaticScreen = %u (dis, en, optimize)\n", __func__, k, display_cfg->plane_descriptors[k].overrides.refresh_from_mall);
2010 }
2011 }
2012 if (CanAddAnotherSurfaceToMALL) {
2013 is_using_mall_for_ss[SurfaceToAddToMALL] = true;
2014 TotalSurfaceSizeInMALL = TotalSurfaceSizeInMALL + SurfaceSizeInMALL[SurfaceToAddToMALL];
2015
2016 #ifdef __DML_VBA_DEBUG__
2017 dml2_printf("DML::%s: SurfaceToAddToMALL = %u\n", __func__, SurfaceToAddToMALL);
2018 dml2_printf("DML::%s: TotalSurfaceSizeInMALL = %u\n", __func__, TotalSurfaceSizeInMALL);
2019 #endif
2020 }
2021 }
2022 }
2023
CalculateDCCConfiguration(bool DCCEnabled,bool DCCProgrammingAssumesScanDirectionUnknown,enum dml2_source_format_class SourcePixelFormat,unsigned int SurfaceWidthLuma,unsigned int SurfaceWidthChroma,unsigned int SurfaceHeightLuma,unsigned int SurfaceHeightChroma,unsigned int nomDETInKByte,unsigned int RequestHeight256ByteLuma,unsigned int RequestHeight256ByteChroma,enum dml2_swizzle_mode TilingFormat,unsigned int BytePerPixelY,unsigned int BytePerPixelC,double BytePerPixelDETY,double BytePerPixelDETC,enum dml2_rotation_angle RotationAngle,enum dml2_core_internal_request_type * RequestLuma,enum dml2_core_internal_request_type * RequestChroma,unsigned int * MaxUncompressedBlockLuma,unsigned int * MaxUncompressedBlockChroma,unsigned int * MaxCompressedBlockLuma,unsigned int * MaxCompressedBlockChroma,unsigned int * IndependentBlockLuma,unsigned int * IndependentBlockChroma)2024 static void CalculateDCCConfiguration(
2025 bool DCCEnabled,
2026 bool DCCProgrammingAssumesScanDirectionUnknown,
2027 enum dml2_source_format_class SourcePixelFormat,
2028 unsigned int SurfaceWidthLuma,
2029 unsigned int SurfaceWidthChroma,
2030 unsigned int SurfaceHeightLuma,
2031 unsigned int SurfaceHeightChroma,
2032 unsigned int nomDETInKByte,
2033 unsigned int RequestHeight256ByteLuma,
2034 unsigned int RequestHeight256ByteChroma,
2035 enum dml2_swizzle_mode TilingFormat,
2036 unsigned int BytePerPixelY,
2037 unsigned int BytePerPixelC,
2038 double BytePerPixelDETY,
2039 double BytePerPixelDETC,
2040 enum dml2_rotation_angle RotationAngle,
2041
2042 // Output
2043 enum dml2_core_internal_request_type *RequestLuma,
2044 enum dml2_core_internal_request_type *RequestChroma,
2045 unsigned int *MaxUncompressedBlockLuma,
2046 unsigned int *MaxUncompressedBlockChroma,
2047 unsigned int *MaxCompressedBlockLuma,
2048 unsigned int *MaxCompressedBlockChroma,
2049 unsigned int *IndependentBlockLuma,
2050 unsigned int *IndependentBlockChroma)
2051 {
2052 unsigned int DETBufferSizeForDCC = nomDETInKByte * 1024;
2053
2054 unsigned int segment_order_horz_contiguous_luma;
2055 unsigned int segment_order_horz_contiguous_chroma;
2056 unsigned int segment_order_vert_contiguous_luma;
2057 unsigned int segment_order_vert_contiguous_chroma;
2058
2059 unsigned int req128_horz_wc_l;
2060 unsigned int req128_horz_wc_c;
2061 unsigned int req128_vert_wc_l;
2062 unsigned int req128_vert_wc_c;
2063
2064 unsigned int yuv420;
2065 unsigned int horz_div_l;
2066 unsigned int horz_div_c;
2067 unsigned int vert_div_l;
2068 unsigned int vert_div_c;
2069
2070 unsigned int swath_buf_size;
2071 double detile_buf_vp_horz_limit;
2072 double detile_buf_vp_vert_limit;
2073
2074 unsigned int MAS_vp_horz_limit;
2075 unsigned int MAS_vp_vert_limit;
2076 unsigned int max_vp_horz_width;
2077 unsigned int max_vp_vert_height;
2078 unsigned int eff_surf_width_l;
2079 unsigned int eff_surf_width_c;
2080 unsigned int eff_surf_height_l;
2081 unsigned int eff_surf_height_c;
2082
2083 unsigned int full_swath_bytes_horz_wc_l;
2084 unsigned int full_swath_bytes_horz_wc_c;
2085 unsigned int full_swath_bytes_vert_wc_l;
2086 unsigned int full_swath_bytes_vert_wc_c;
2087
2088 if (dml_is_420(SourcePixelFormat))
2089 yuv420 = 1;
2090 else
2091 yuv420 = 0;
2092
2093 horz_div_l = 1;
2094 horz_div_c = 1;
2095 vert_div_l = 1;
2096 vert_div_c = 1;
2097
2098 if (BytePerPixelY == 1)
2099 vert_div_l = 0;
2100 if (BytePerPixelC == 1)
2101 vert_div_c = 0;
2102
2103 if (BytePerPixelC == 0) {
2104 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 256;
2105 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l));
2106 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l));
2107 } else {
2108 swath_buf_size = DETBufferSizeForDCC / 2 - 2 * 2 * 256;
2109 detile_buf_vp_horz_limit = (double)swath_buf_size / ((double)RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) + (double)RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420));
2110 detile_buf_vp_vert_limit = (double)swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420));
2111 }
2112
2113 if (SourcePixelFormat == dml2_420_10) {
2114 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit;
2115 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit;
2116 }
2117
2118 detile_buf_vp_horz_limit = math_floor2(detile_buf_vp_horz_limit - 1, 16);
2119 detile_buf_vp_vert_limit = math_floor2(detile_buf_vp_vert_limit - 1, 16);
2120
2121 MAS_vp_horz_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : 6144;
2122 MAS_vp_vert_limit = SourcePixelFormat == dml2_rgbe_alpha ? 3840 : (BytePerPixelY == 8 ? 3072 : 6144);
2123 max_vp_horz_width = (unsigned int)(math_min2((double)MAS_vp_horz_limit, detile_buf_vp_horz_limit));
2124 max_vp_vert_height = (unsigned int)(math_min2((double)MAS_vp_vert_limit, detile_buf_vp_vert_limit));
2125 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma);
2126 eff_surf_width_c = eff_surf_width_l / (1 + yuv420);
2127 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma);
2128 eff_surf_height_c = eff_surf_height_l / (1 + yuv420);
2129
2130 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY;
2131 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma;
2132 if (BytePerPixelC > 0) {
2133 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC;
2134 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma;
2135 } else {
2136 full_swath_bytes_horz_wc_c = 0;
2137 full_swath_bytes_vert_wc_c = 0;
2138 }
2139
2140 if (SourcePixelFormat == dml2_420_10) {
2141 full_swath_bytes_horz_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_l * 2.0 / 3.0, 256.0));
2142 full_swath_bytes_horz_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_horz_wc_c * 2.0 / 3.0, 256.0));
2143 full_swath_bytes_vert_wc_l = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_l * 2.0 / 3.0, 256.0));
2144 full_swath_bytes_vert_wc_c = (unsigned int)(math_ceil2((double)full_swath_bytes_vert_wc_c * 2.0 / 3.0, 256.0));
2145 }
2146
2147 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2148 req128_horz_wc_l = 0;
2149 req128_horz_wc_c = 0;
2150 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2151 req128_horz_wc_l = 0;
2152 req128_horz_wc_c = 1;
2153 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSizeForDCC) {
2154 req128_horz_wc_l = 1;
2155 req128_horz_wc_c = 0;
2156 } else {
2157 req128_horz_wc_l = 1;
2158 req128_horz_wc_c = 1;
2159 }
2160
2161 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2162 req128_vert_wc_l = 0;
2163 req128_vert_wc_c = 0;
2164 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2165 req128_vert_wc_l = 0;
2166 req128_vert_wc_c = 1;
2167 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSizeForDCC) {
2168 req128_vert_wc_l = 1;
2169 req128_vert_wc_c = 0;
2170 } else {
2171 req128_vert_wc_l = 1;
2172 req128_vert_wc_c = 1;
2173 }
2174
2175 if (BytePerPixelY == 2) {
2176 segment_order_horz_contiguous_luma = 0;
2177 segment_order_vert_contiguous_luma = 1;
2178 } else {
2179 segment_order_horz_contiguous_luma = 1;
2180 segment_order_vert_contiguous_luma = 0;
2181 }
2182
2183 if (BytePerPixelC == 2) {
2184 segment_order_horz_contiguous_chroma = 0;
2185 segment_order_vert_contiguous_chroma = 1;
2186 } else {
2187 segment_order_horz_contiguous_chroma = 1;
2188 segment_order_vert_contiguous_chroma = 0;
2189 }
2190 #ifdef __DML_VBA_DEBUG__
2191 dml2_printf("DML::%s: DCCEnabled = %u\n", __func__, DCCEnabled);
2192 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, nomDETInKByte);
2193 dml2_printf("DML::%s: DETBufferSizeForDCC = %u\n", __func__, DETBufferSizeForDCC);
2194 dml2_printf("DML::%s: req128_horz_wc_l = %u\n", __func__, req128_horz_wc_l);
2195 dml2_printf("DML::%s: req128_horz_wc_c = %u\n", __func__, req128_horz_wc_c);
2196 dml2_printf("DML::%s: full_swath_bytes_horz_wc_l = %u\n", __func__, full_swath_bytes_horz_wc_l);
2197 dml2_printf("DML::%s: full_swath_bytes_vert_wc_c = %u\n", __func__, full_swath_bytes_vert_wc_c);
2198 dml2_printf("DML::%s: segment_order_horz_contiguous_luma = %u\n", __func__, segment_order_horz_contiguous_luma);
2199 dml2_printf("DML::%s: segment_order_horz_contiguous_chroma = %u\n", __func__, segment_order_horz_contiguous_chroma);
2200 #endif
2201 if (DCCProgrammingAssumesScanDirectionUnknown == true) {
2202 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) {
2203 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2204 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) {
2205 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2206 } else {
2207 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2208 }
2209 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) {
2210 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2211 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) {
2212 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2213 } else {
2214 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2215 }
2216 } else if (!dml_is_vertical_rotation(RotationAngle)) {
2217 if (req128_horz_wc_l == 0) {
2218 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2219 } else if (segment_order_horz_contiguous_luma == 0) {
2220 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2221 } else {
2222 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2223 }
2224 if (req128_horz_wc_c == 0) {
2225 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2226 } else if (segment_order_horz_contiguous_chroma == 0) {
2227 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2228 } else {
2229 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2230 }
2231 } else {
2232 if (req128_vert_wc_l == 0) {
2233 *RequestLuma = dml2_core_internal_request_type_256_bytes;
2234 } else if (segment_order_vert_contiguous_luma == 0) {
2235 *RequestLuma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2236 } else {
2237 *RequestLuma = dml2_core_internal_request_type_128_bytes_contiguous;
2238 }
2239 if (req128_vert_wc_c == 0) {
2240 *RequestChroma = dml2_core_internal_request_type_256_bytes;
2241 } else if (segment_order_vert_contiguous_chroma == 0) {
2242 *RequestChroma = dml2_core_internal_request_type_128_bytes_non_contiguous;
2243 } else {
2244 *RequestChroma = dml2_core_internal_request_type_128_bytes_contiguous;
2245 }
2246 }
2247
2248 if (*RequestLuma == dml2_core_internal_request_type_256_bytes) {
2249 *MaxUncompressedBlockLuma = 256;
2250 *MaxCompressedBlockLuma = 256;
2251 *IndependentBlockLuma = 0;
2252 } else if (*RequestLuma == dml2_core_internal_request_type_128_bytes_contiguous) {
2253 *MaxUncompressedBlockLuma = 256;
2254 *MaxCompressedBlockLuma = 128;
2255 *IndependentBlockLuma = 128;
2256 } else {
2257 *MaxUncompressedBlockLuma = 256;
2258 *MaxCompressedBlockLuma = 64;
2259 *IndependentBlockLuma = 64;
2260 }
2261
2262 if (*RequestChroma == dml2_core_internal_request_type_256_bytes) {
2263 *MaxUncompressedBlockChroma = 256;
2264 *MaxCompressedBlockChroma = 256;
2265 *IndependentBlockChroma = 0;
2266 } else if (*RequestChroma == dml2_core_internal_request_type_128_bytes_contiguous) {
2267 *MaxUncompressedBlockChroma = 256;
2268 *MaxCompressedBlockChroma = 128;
2269 *IndependentBlockChroma = 128;
2270 } else {
2271 *MaxUncompressedBlockChroma = 256;
2272 *MaxCompressedBlockChroma = 64;
2273 *IndependentBlockChroma = 64;
2274 }
2275
2276 if (DCCEnabled != true || BytePerPixelC == 0) {
2277 *MaxUncompressedBlockChroma = 0;
2278 *MaxCompressedBlockChroma = 0;
2279 *IndependentBlockChroma = 0;
2280 }
2281
2282 if (DCCEnabled != true) {
2283 *MaxUncompressedBlockLuma = 0;
2284 *MaxCompressedBlockLuma = 0;
2285 *IndependentBlockLuma = 0;
2286 }
2287
2288 #ifdef __DML_VBA_DEBUG__
2289 dml2_printf("DML::%s: MaxUncompressedBlockLuma = %u\n", __func__, *MaxUncompressedBlockLuma);
2290 dml2_printf("DML::%s: MaxCompressedBlockLuma = %u\n", __func__, *MaxCompressedBlockLuma);
2291 dml2_printf("DML::%s: IndependentBlockLuma = %u\n", __func__, *IndependentBlockLuma);
2292 dml2_printf("DML::%s: MaxUncompressedBlockChroma = %u\n", __func__, *MaxUncompressedBlockChroma);
2293 dml2_printf("DML::%s: MaxCompressedBlockChroma = %u\n", __func__, *MaxCompressedBlockChroma);
2294 dml2_printf("DML::%s: IndependentBlockChroma = %u\n", __func__, *IndependentBlockChroma);
2295 #endif
2296
2297 }
2298
calculate_mcache_row_bytes(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_row_bytes_params * p)2299 static void calculate_mcache_row_bytes(
2300 struct dml2_core_internal_scratch *scratch,
2301 struct dml2_core_calcs_calculate_mcache_row_bytes_params *p)
2302 {
2303 unsigned int vmpg_bytes = 0;
2304 unsigned int blk_bytes = 0;
2305 float meta_per_mvmpg_per_channel = 0;
2306 unsigned int est_blk_per_vmpg = 2;
2307 unsigned int mvmpg_per_row_ub = 0;
2308 unsigned int full_vp_width_mvmpg_aligned = 0;
2309 unsigned int full_vp_height_mvmpg_aligned = 0;
2310 unsigned int meta_per_mvmpg_per_channel_ub = 0;
2311 unsigned int mvmpg_per_mcache;
2312
2313 #ifdef __DML_VBA_DEBUG__
2314 dml2_printf("DML::%s: num_chans = %u\n", __func__, p->num_chans);
2315 dml2_printf("DML::%s: mem_word_bytes = %u\n", __func__, p->mem_word_bytes);
2316 dml2_printf("DML::%s: mcache_line_size_bytes = %u\n", __func__, p->mcache_line_size_bytes);
2317 dml2_printf("DML::%s: mcache_size_bytes = %u\n", __func__, p->mcache_size_bytes);
2318 dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2319 dml2_printf("DML::%s: gpuvm_page_size_kbytes = %u\n", __func__, p->gpuvm_page_size_kbytes);
2320 dml2_printf("DML::%s: vp_stationary = %u\n", __func__, p->vp_stationary);
2321 dml2_printf("DML::%s: tiling_mode = %u\n", __func__, p->tiling_mode);
2322 dml2_printf("DML::%s: vp_start_x = %u\n", __func__, p->vp_start_x);
2323 dml2_printf("DML::%s: vp_start_y = %u\n", __func__, p->vp_start_y);
2324 dml2_printf("DML::%s: full_vp_width = %u\n", __func__, p->full_vp_width);
2325 dml2_printf("DML::%s: full_vp_height = %u\n", __func__, p->full_vp_height);
2326 dml2_printf("DML::%s: blk_width = %u\n", __func__, p->blk_width);
2327 dml2_printf("DML::%s: blk_height = %u\n", __func__, p->blk_height);
2328 dml2_printf("DML::%s: vmpg_width = %u\n", __func__, p->vmpg_width);
2329 dml2_printf("DML::%s: vmpg_height = %u\n", __func__, p->vmpg_height);
2330 dml2_printf("DML::%s: full_swath_bytes = %u\n", __func__, p->full_swath_bytes);
2331 #endif
2332 DML2_ASSERT(p->mcache_line_size_bytes != 0);
2333 DML2_ASSERT(p->mcache_size_bytes != 0);
2334
2335 *p->mvmpg_width = 0;
2336 *p->mvmpg_height = 0;
2337
2338 if (p->full_vp_height == 0 && p->full_vp_width == 0) {
2339 *p->num_mcaches = 0;
2340 *p->mcache_row_bytes = 0;
2341 } else {
2342 blk_bytes = dml_get_tile_block_size_bytes(p->tiling_mode);
2343
2344 // if gpuvm is not enable, the alignment boundary should be in terms of tiling block size
2345 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
2346
2347 //With vmpg_bytes >= tile blk_bytes, the meta_row_width alignment equations are relative to the vmpg_width/height.
2348 // But for 4KB page with 64KB tile block, we need the meta for all pages in the tile block.
2349 // Therefore, the alignment is relative to the blk_width/height. The factor of 16 vmpg per 64KB tile block is applied at the end.
2350 *p->mvmpg_width = p->blk_width;
2351 *p->mvmpg_height = p->blk_height;
2352 if (p->gpuvm_enable) {
2353 if (vmpg_bytes >= blk_bytes) {
2354 *p->mvmpg_width = p->vmpg_width;
2355 *p->mvmpg_height = p->vmpg_height;
2356 } else if (!((blk_bytes == 65536) && (vmpg_bytes == 4096))) {
2357 dml2_printf("ERROR: DML::%s: Tiling size and vm page size combination not supported\n", __func__);
2358 DML2_ASSERT(0);
2359 }
2360 }
2361
2362 //For plane0 & 1, first calculate full_vp_width/height_l/c aligned to vmpg_width/height_l/c
2363 full_vp_width_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_x + p->full_vp_width) + *p->mvmpg_width - 1, *p->mvmpg_width) - math_floor2(p->vp_start_x, *p->mvmpg_width));
2364 full_vp_height_mvmpg_aligned = (unsigned int)(math_floor2((p->vp_start_y + p->full_vp_height) + *p->mvmpg_height - 1, *p->mvmpg_height) - math_floor2(p->vp_start_y, *p->mvmpg_height));
2365
2366 *p->full_vp_access_width_mvmpg_aligned = p->surf_vert ? full_vp_height_mvmpg_aligned : full_vp_width_mvmpg_aligned;
2367
2368 //Use the equation for the exact alignment when possible. Note that the exact alignment cannot be used for horizontal access if vmpg_bytes > blk_bytes.
2369 if (!p->surf_vert) { //horizontal access
2370 if (p->vp_stationary == 1 && vmpg_bytes <= blk_bytes)
2371 *p->meta_row_width_ub = full_vp_width_mvmpg_aligned;
2372 else
2373 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_width - 1, *p->mvmpg_width) + *p->mvmpg_width;
2374 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_width;
2375 } else { //vertical access
2376 if (p->vp_stationary == 1)
2377 *p->meta_row_width_ub = full_vp_height_mvmpg_aligned;
2378 else
2379 *p->meta_row_width_ub = (unsigned int)math_ceil2((double)p->full_vp_height - 1, *p->mvmpg_height) + *p->mvmpg_height;
2380 mvmpg_per_row_ub = *p->meta_row_width_ub / *p->mvmpg_height;
2381 }
2382
2383 if (p->gpuvm_enable) {
2384 meta_per_mvmpg_per_channel = (float)vmpg_bytes / (float)256 / p->num_chans;
2385
2386 //but using the est_blk_per_vmpg between 2 and 4, to be not as pessimestic
2387 if (p->surf_vert && vmpg_bytes > blk_bytes) {
2388 meta_per_mvmpg_per_channel = (float)est_blk_per_vmpg * blk_bytes / (float)256 / p->num_chans;
2389 }
2390
2391 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel)); // dcc_dr_oh_nom
2392 } else {
2393 meta_per_mvmpg_per_channel = (float) blk_bytes / (float)256 / p->num_chans;
2394
2395 if (!p->surf_vert)
2396 *p->dcc_dram_bw_nom_overhead_factor = 1 + 1.0 / 256.0;
2397 else
2398 *p->dcc_dram_bw_nom_overhead_factor = 1 + math_max2(1.0 / 256.0, math_ceil2(meta_per_mvmpg_per_channel, p->mem_word_bytes) / (256 * meta_per_mvmpg_per_channel));
2399 }
2400
2401 meta_per_mvmpg_per_channel_ub = (unsigned int)math_ceil2((double)meta_per_mvmpg_per_channel, p->mcache_line_size_bytes);
2402
2403 //but for 4KB vmpg with 64KB tile blk
2404 if (p->gpuvm_enable && (blk_bytes == 65536) && (vmpg_bytes == 4096))
2405 meta_per_mvmpg_per_channel_ub = 16 * meta_per_mvmpg_per_channel_ub;
2406
2407 // If this mcache_row_bytes for the full viewport of the surface is less than or equal to mcache_bytes,
2408 // then one mcache can be used for this request stream. If not, it is useful to know the width of the viewport that can be supported in the mcache_bytes.
2409 if (p->gpuvm_enable || !p->surf_vert) {
2410 *p->mcache_row_bytes = mvmpg_per_row_ub * meta_per_mvmpg_per_channel_ub;
2411 } else { // horizontal and gpuvm disable
2412 *p->mcache_row_bytes = *p->meta_row_width_ub * p->blk_height * p->bytes_per_pixel / 256;
2413 *p->mcache_row_bytes = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->num_chans, p->mcache_line_size_bytes);
2414 }
2415
2416 *p->dcc_dram_bw_pref_overhead_factor = 1 + math_max2(1.0 / 256.0, *p->mcache_row_bytes / p->full_swath_bytes); // dcc_dr_oh_pref
2417 *p->num_mcaches = (unsigned int)math_ceil2((double)*p->mcache_row_bytes / p->mcache_size_bytes, 1);
2418
2419 mvmpg_per_mcache = p->mcache_size_bytes / meta_per_mvmpg_per_channel_ub;
2420 *p->mvmpg_per_mcache_lb = (unsigned int)math_floor2(mvmpg_per_mcache, 1);
2421
2422 #ifdef __DML_VBA_DEBUG__
2423 dml2_printf("DML::%s: gpuvm_enable = %u\n", __func__, p->gpuvm_enable);
2424 dml2_printf("DML::%s: vmpg_bytes = %u\n", __func__, vmpg_bytes);
2425 dml2_printf("DML::%s: blk_bytes = %u\n", __func__, blk_bytes);
2426 dml2_printf("DML::%s: meta_per_mvmpg_per_channel = %f\n", __func__, meta_per_mvmpg_per_channel);
2427 dml2_printf("DML::%s: mvmpg_per_row_ub = %u\n", __func__, mvmpg_per_row_ub);
2428 dml2_printf("DML::%s: meta_row_width_ub = %u\n", __func__, *p->meta_row_width_ub);
2429 dml2_printf("DML::%s: mvmpg_width = %u\n", __func__, *p->mvmpg_width);
2430 dml2_printf("DML::%s: mvmpg_height = %u\n", __func__, *p->mvmpg_height);
2431 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_nom_overhead_factor);
2432 dml2_printf("DML::%s: dcc_dram_bw_pref_overhead_factor = %f\n", __func__, *p->dcc_dram_bw_pref_overhead_factor);
2433 #endif
2434 }
2435
2436 #ifdef __DML_VBA_DEBUG__
2437 dml2_printf("DML::%s: mcache_row_bytes = %u\n", __func__, *p->mcache_row_bytes);
2438 dml2_printf("DML::%s: num_mcaches = %u\n", __func__, *p->num_mcaches);
2439 #endif
2440 DML2_ASSERT(*p->num_mcaches > 0);
2441 }
2442
calculate_mcache_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_mcache_setting_params * p)2443 static void calculate_mcache_setting(
2444 struct dml2_core_internal_scratch *scratch,
2445 struct dml2_core_calcs_calculate_mcache_setting_params *p)
2446 {
2447 unsigned int n;
2448
2449 struct dml2_core_shared_calculate_mcache_setting_locals *l = &scratch->calculate_mcache_setting_locals;
2450 memset(l, 0, sizeof(struct dml2_core_shared_calculate_mcache_setting_locals));
2451
2452 *p->num_mcaches_l = 0;
2453 *p->mcache_row_bytes_l = 0;
2454 *p->dcc_dram_bw_nom_overhead_factor_l = 1.0;
2455 *p->dcc_dram_bw_pref_overhead_factor_l = 1.0;
2456
2457 *p->num_mcaches_c = 0;
2458 *p->mcache_row_bytes_c = 0;
2459 *p->dcc_dram_bw_nom_overhead_factor_c = 1.0;
2460 *p->dcc_dram_bw_pref_overhead_factor_c = 1.0;
2461
2462 *p->mall_comb_mcache_l = 0;
2463 *p->mall_comb_mcache_c = 0;
2464 *p->lc_comb_mcache = 0;
2465
2466 if (!p->dcc_enable)
2467 return;
2468
2469 l->is_dual_plane = dml_is_420(p->source_format) || p->source_format == dml2_rgbe_alpha;
2470
2471 l->l_p.num_chans = p->num_chans;
2472 l->l_p.mem_word_bytes = p->mem_word_bytes;
2473 l->l_p.mcache_size_bytes = p->mcache_size_bytes;
2474 l->l_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2475 l->l_p.gpuvm_enable = p->gpuvm_enable;
2476 l->l_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2477 l->l_p.surf_vert = p->surf_vert;
2478 l->l_p.vp_stationary = p->vp_stationary;
2479 l->l_p.tiling_mode = p->tiling_mode;
2480 l->l_p.vp_start_x = p->vp_start_x_l;
2481 l->l_p.vp_start_y = p->vp_start_y_l;
2482 l->l_p.full_vp_width = p->full_vp_width_l;
2483 l->l_p.full_vp_height = p->full_vp_height_l;
2484 l->l_p.blk_width = p->blk_width_l;
2485 l->l_p.blk_height = p->blk_height_l;
2486 l->l_p.vmpg_width = p->vmpg_width_l;
2487 l->l_p.vmpg_height = p->vmpg_height_l;
2488 l->l_p.full_swath_bytes = p->full_swath_bytes_l;
2489 l->l_p.bytes_per_pixel = p->bytes_per_pixel_l;
2490
2491 // output
2492 l->l_p.num_mcaches = p->num_mcaches_l;
2493 l->l_p.mcache_row_bytes = p->mcache_row_bytes_l;
2494 l->l_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_l;
2495 l->l_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_l;
2496 l->l_p.mvmpg_width = &l->mvmpg_width_l;
2497 l->l_p.mvmpg_height = &l->mvmpg_height_l;
2498 l->l_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_l;
2499 l->l_p.meta_row_width_ub = &l->meta_row_width_l;
2500 l->l_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_l;
2501
2502 calculate_mcache_row_bytes(scratch, &l->l_p);
2503 dml2_assert(*p->num_mcaches_l > 0);
2504
2505 if (l->is_dual_plane) {
2506 l->c_p.num_chans = p->num_chans;
2507 l->c_p.mem_word_bytes = p->mem_word_bytes;
2508 l->c_p.mcache_size_bytes = p->mcache_size_bytes;
2509 l->c_p.mcache_line_size_bytes = p->mcache_line_size_bytes;
2510 l->c_p.gpuvm_enable = p->gpuvm_enable;
2511 l->c_p.gpuvm_page_size_kbytes = p->gpuvm_page_size_kbytes;
2512 l->c_p.surf_vert = p->surf_vert;
2513 l->c_p.vp_stationary = p->vp_stationary;
2514 l->c_p.tiling_mode = p->tiling_mode;
2515 l->c_p.vp_start_x = p->vp_start_x_c;
2516 l->c_p.vp_start_y = p->vp_start_y_c;
2517 l->c_p.full_vp_width = p->full_vp_width_c;
2518 l->c_p.full_vp_height = p->full_vp_height_c;
2519 l->c_p.blk_width = p->blk_width_c;
2520 l->c_p.blk_height = p->blk_height_c;
2521 l->c_p.vmpg_width = p->vmpg_width_c;
2522 l->c_p.vmpg_height = p->vmpg_height_c;
2523 l->c_p.full_swath_bytes = p->full_swath_bytes_c;
2524 l->c_p.bytes_per_pixel = p->bytes_per_pixel_c;
2525
2526 // output
2527 l->c_p.num_mcaches = p->num_mcaches_c;
2528 l->c_p.mcache_row_bytes = p->mcache_row_bytes_c;
2529 l->c_p.dcc_dram_bw_nom_overhead_factor = p->dcc_dram_bw_nom_overhead_factor_c;
2530 l->c_p.dcc_dram_bw_pref_overhead_factor = p->dcc_dram_bw_pref_overhead_factor_c;
2531 l->c_p.mvmpg_width = &l->mvmpg_width_c;
2532 l->c_p.mvmpg_height = &l->mvmpg_height_c;
2533 l->c_p.full_vp_access_width_mvmpg_aligned = &l->full_vp_access_width_mvmpg_aligned_c;
2534 l->c_p.meta_row_width_ub = &l->meta_row_width_c;
2535 l->c_p.mvmpg_per_mcache_lb = &l->mvmpg_per_mcache_lb_c;
2536
2537 calculate_mcache_row_bytes(scratch, &l->c_p);
2538 dml2_assert(*p->num_mcaches_c > 0);
2539 }
2540
2541 // Sharing for iMALL access
2542 l->mcache_remainder_l = *p->mcache_row_bytes_l % p->mcache_size_bytes;
2543 l->mcache_remainder_c = *p->mcache_row_bytes_c % p->mcache_size_bytes;
2544 l->mvmpg_access_width_l = p->surf_vert ? l->mvmpg_height_l : l->mvmpg_width_l;
2545 l->mvmpg_access_width_c = p->surf_vert ? l->mvmpg_height_c : l->mvmpg_width_c;
2546
2547 if (p->imall_enable) {
2548 *p->mall_comb_mcache_l = (2 * l->mcache_remainder_l <= p->mcache_size_bytes);
2549
2550 if (l->is_dual_plane)
2551 *p->mall_comb_mcache_c = (2 * l->mcache_remainder_c <= p->mcache_size_bytes);
2552 }
2553
2554 if (!p->surf_vert) // horizonatal access
2555 l->luma_time_factor = (double)l->mvmpg_height_c / l->mvmpg_height_l * 2;
2556 else // vertical access
2557 l->luma_time_factor = (double)l->mvmpg_width_c / l->mvmpg_width_l * 2;
2558
2559 // The algorithm starts with computing a non-integer, avg_mcache_element_size_l/c:
2560 if (*p->num_mcaches_l) {
2561 l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l;
2562 }
2563
2564 if (l->is_dual_plane && *p->num_mcaches_c) {
2565 l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c;
2566
2567 if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) {
2568 l->lc_comb_last_mcache_size = (unsigned int)((l->mcache_remainder_l * (*p->mall_comb_mcache_l ? 2 : 1) * l->luma_time_factor) +
2569 (l->mcache_remainder_c * (*p->mall_comb_mcache_c ? 2 : 1)));
2570 }
2571 *p->lc_comb_mcache = (l->lc_comb_last_mcache_size <= p->mcache_size_bytes) && (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c);
2572 }
2573
2574 #ifdef __DML_VBA_DEBUG__
2575 dml2_printf("DML::%s: imall_enable = %u\n", __func__, p->imall_enable);
2576 dml2_printf("DML::%s: is_dual_plane = %u\n", __func__, l->is_dual_plane);
2577 dml2_printf("DML::%s: surf_vert = %u\n", __func__, p->surf_vert);
2578 dml2_printf("DML::%s: mvmpg_width_l = %u\n", __func__, l->mvmpg_width_l);
2579 dml2_printf("DML::%s: mvmpg_height_l = %u\n", __func__, l->mvmpg_height_l);
2580 dml2_printf("DML::%s: mcache_remainder_l = %f\n", __func__, l->mcache_remainder_l);
2581 dml2_printf("DML::%s: num_mcaches_l = %u\n", __func__, *p->num_mcaches_l);
2582 dml2_printf("DML::%s: avg_mcache_element_size_l = %u\n", __func__, l->avg_mcache_element_size_l);
2583 dml2_printf("DML::%s: mvmpg_access_width_l = %u\n", __func__, l->mvmpg_access_width_l);
2584 dml2_printf("DML::%s: mall_comb_mcache_l = %u\n", __func__, *p->mall_comb_mcache_l);
2585
2586 if (l->is_dual_plane) {
2587 dml2_printf("DML::%s: mvmpg_width_c = %u\n", __func__, l->mvmpg_width_c);
2588 dml2_printf("DML::%s: mvmpg_height_c = %u\n", __func__, l->mvmpg_height_c);
2589 dml2_printf("DML::%s: mcache_remainder_c = %f\n", __func__, l->mcache_remainder_c);
2590 dml2_printf("DML::%s: luma_time_factor = %f\n", __func__, l->luma_time_factor);
2591 dml2_printf("DML::%s: num_mcaches_c = %u\n", __func__, *p->num_mcaches_c);
2592 dml2_printf("DML::%s: avg_mcache_element_size_c = %u\n", __func__, l->avg_mcache_element_size_c);
2593 dml2_printf("DML::%s: mvmpg_access_width_c = %u\n", __func__, l->mvmpg_access_width_c);
2594 dml2_printf("DML::%s: mall_comb_mcache_c = %u\n", __func__, *p->mall_comb_mcache_c);
2595 dml2_printf("DML::%s: lc_comb_last_mcache_size = %u\n", __func__, l->lc_comb_last_mcache_size);
2596 dml2_printf("DML::%s: lc_comb_mcache = %u\n", __func__, *p->lc_comb_mcache);
2597 }
2598 #endif
2599 // calculate split_coordinate
2600 l->full_vp_access_width_l = p->surf_vert ? p->full_vp_height_l : p->full_vp_width_l;
2601 l->full_vp_access_width_c = p->surf_vert ? p->full_vp_height_c : p->full_vp_width_c;
2602
2603 for (n = 0; n < *p->num_mcaches_l - 1; n++) {
2604 p->mcache_offsets_l[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_l / l->mvmpg_access_width_l, 1)) * l->mvmpg_access_width_l;
2605 }
2606 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2607
2608 if (l->is_dual_plane) {
2609 for (n = 0; n < *p->num_mcaches_c - 1; n++) {
2610 p->mcache_offsets_c[n] = (unsigned int)(math_floor2((n + 1) * l->avg_mcache_element_size_c / l->mvmpg_access_width_c, 1)) * l->mvmpg_access_width_c;
2611 }
2612 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2613 }
2614 #ifdef __DML_VBA_DEBUG__
2615 for (n = 0; n < *p->num_mcaches_l; n++)
2616 dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2617
2618 if (l->is_dual_plane) {
2619 for (n = 0; n < *p->num_mcaches_c; n++)
2620 dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2621 }
2622 #endif
2623
2624 // Luma/Chroma combine in the last mcache
2625 // In the case of Luma/Chroma combine-mCache (with lc_comb_mcache==1), all mCaches except the last segment are filled as much as possible, when stay aligned to mvmpg boundary
2626 if (*p->lc_comb_mcache && l->is_dual_plane) {
2627 for (n = 0; n < *p->num_mcaches_l - 1; n++)
2628 p->mcache_offsets_l[n] = (n + 1) * l->mvmpg_per_mcache_lb_l * l->mvmpg_access_width_l;
2629 p->mcache_offsets_l[*p->num_mcaches_l - 1] = l->full_vp_access_width_l;
2630
2631 for (n = 0; n < *p->num_mcaches_c - 1; n++)
2632 p->mcache_offsets_c[n] = (n + 1) * l->mvmpg_per_mcache_lb_c * l->mvmpg_access_width_c;
2633 p->mcache_offsets_c[*p->num_mcaches_c - 1] = l->full_vp_access_width_c;
2634
2635 #ifdef __DML_VBA_DEBUG__
2636 for (n = 0; n < *p->num_mcaches_l; n++)
2637 dml2_printf("DML::%s: mcache_offsets_l[%u] = %u\n", __func__, n, p->mcache_offsets_l[n]);
2638
2639 for (n = 0; n < *p->num_mcaches_c; n++)
2640 dml2_printf("DML::%s: mcache_offsets_c[%u] = %u\n", __func__, n, p->mcache_offsets_c[n]);
2641 #endif
2642 }
2643
2644 *p->mcache_shift_granularity_l = l->mvmpg_access_width_l;
2645 *p->mcache_shift_granularity_c = l->mvmpg_access_width_c;
2646 }
2647
calculate_mall_bw_overhead_factor(double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes)2648 static void calculate_mall_bw_overhead_factor(
2649 double mall_prefetch_sdp_overhead_factor[], //mall_sdp_oh_nom/pref
2650 double mall_prefetch_dram_overhead_factor[], //mall_dram_oh_nom/pref
2651
2652 // input
2653 const struct dml2_display_cfg *display_cfg,
2654 unsigned int num_active_planes)
2655 {
2656 for (unsigned int k = 0; k < num_active_planes; ++k) {
2657 mall_prefetch_sdp_overhead_factor[k] = 1.0;
2658 mall_prefetch_dram_overhead_factor[k] = 1.0;
2659
2660 // SDP - on the return side
2661 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall) // always no data return
2662 mall_prefetch_sdp_overhead_factor[k] = 1.25;
2663 else if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return)
2664 mall_prefetch_sdp_overhead_factor[k] = 0.25;
2665
2666 // DRAM
2667 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
2668 mall_prefetch_dram_overhead_factor[k] = 2.0;
2669
2670 #ifdef __DML_VBA_DEBUG__
2671 dml2_printf("DML::%s: k=%u, mall_prefetch_sdp_overhead_factor = %f\n", __func__, k, mall_prefetch_sdp_overhead_factor[k]);
2672 dml2_printf("DML::%s: k=%u, mall_prefetch_dram_overhead_factor = %f\n", __func__, k, mall_prefetch_dram_overhead_factor[k]);
2673 #endif
2674 }
2675 }
2676
dml_get_return_bandwidth_available(const struct dml2_soc_bb * soc,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool is_avg_bw,bool is_hvm_en,bool is_hvm_only,double dcflk_mhz,double fclk_mhz,double dram_bw_mbps)2677 static double dml_get_return_bandwidth_available(
2678 const struct dml2_soc_bb *soc,
2679 enum dml2_core_internal_soc_state_type state_type,
2680 enum dml2_core_internal_bw_type bw_type,
2681 bool is_avg_bw,
2682 bool is_hvm_en,
2683 bool is_hvm_only,
2684 double dcflk_mhz,
2685 double fclk_mhz,
2686 double dram_bw_mbps)
2687 {
2688 double return_bw_mbps = 0.;
2689 double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcflk_mhz;
2690 double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes;
2691 double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes;
2692
2693 double derate_sdp_factor;
2694 double derate_fabric_factor;
2695 double derate_dram_factor;
2696
2697 double derate_sdp_bandwidth;
2698 double derate_fabric_bandwidth;
2699 double derate_dram_bandwidth;
2700
2701 if (is_avg_bw) {
2702 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2703 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dcfclk_derate_percent / 100.0;
2704 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.fclk_derate_percent / 100.0;
2705 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100.0;
2706 } else { // just assume sys_active
2707 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100.0;
2708 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100.0;
2709 derate_dram_factor = soc->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100.0;
2710 }
2711 } else { // urgent bw
2712 if (state_type == dml2_core_internal_soc_state_svp_prefetch) {
2713 derate_sdp_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dcfclk_derate_percent / 100.0;
2714 derate_fabric_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.fclk_derate_percent / 100.0;
2715 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2716
2717 if (is_hvm_en) {
2718 if (is_hvm_only)
2719 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_vm / 100.0;
2720 else
2721 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2722 } else {
2723 derate_dram_factor = soc->qos_parameters.derate_table.dcn_mall_prefetch_urgent.dram_derate_percent_pixel / 100.0;
2724 }
2725 } else { // just assume sys_active
2726 derate_sdp_factor = soc->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0;
2727 derate_fabric_factor = soc->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100.0;
2728
2729 if (is_hvm_en) {
2730 if (is_hvm_only)
2731 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_vm / 100.0;
2732 else
2733 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel_and_vm / 100.0;
2734 } else {
2735 derate_dram_factor = soc->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100.0;
2736 }
2737 }
2738 }
2739
2740 derate_sdp_bandwidth = ideal_sdp_bandwidth * derate_sdp_factor;
2741 derate_fabric_bandwidth = ideal_fabric_bandwidth * derate_fabric_factor;
2742 derate_dram_bandwidth = ideal_dram_bandwidth * derate_dram_factor;
2743
2744 if (bw_type == dml2_core_internal_bw_sdp)
2745 return_bw_mbps = math_min2(derate_sdp_bandwidth, derate_fabric_bandwidth);
2746 else // dml2_core_internal_bw_dram
2747 return_bw_mbps = derate_dram_bandwidth;
2748
2749 #ifdef __DML_VBA_DEBUG__
2750 dml2_printf("DML::%s: is_avg_bw = %u\n", __func__, is_avg_bw);
2751 dml2_printf("DML::%s: is_hvm_en = %u\n", __func__, is_hvm_en);
2752 dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only);
2753 dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type));
2754 dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type));
2755 dml2_printf("DML::%s: dcflk_mhz = %f\n", __func__, dcflk_mhz);
2756 dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2757 dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth);
2758 dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth);
2759 dml2_printf("DML::%s: ideal_dram_bandwidth = %f\n", __func__, ideal_dram_bandwidth);
2760 dml2_printf("DML::%s: derate_sdp_bandwidth = %f (derate %f)\n", __func__, derate_sdp_bandwidth, derate_sdp_factor);
2761 dml2_printf("DML::%s: derate_fabric_bandwidth = %f (derate %f)\n", __func__, derate_fabric_bandwidth, derate_fabric_factor);
2762 dml2_printf("DML::%s: derate_dram_bandwidth = %f (derate %f)\n", __func__, derate_dram_bandwidth, derate_dram_factor);
2763 dml2_printf("DML::%s: return_bw_mbps = %f\n", __func__, return_bw_mbps);
2764 #endif
2765 return return_bw_mbps;
2766 }
2767
calculate_bandwidth_available(double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_min[dml2_core_internal_soc_state_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],const struct dml2_soc_bb * soc,bool HostVMEnable,double dcfclk_mhz,double fclk_mhz,double dram_bw_mbps)2768 static void calculate_bandwidth_available(
2769 double avg_bandwidth_available_min[dml2_core_internal_soc_state_max],
2770 double avg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2771 double urg_bandwidth_available_min[dml2_core_internal_soc_state_max], // min between SDP and DRAM
2772 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2773 double urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_max],
2774 double urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_max],
2775
2776 const struct dml2_soc_bb *soc,
2777 bool HostVMEnable,
2778 double dcfclk_mhz,
2779 double fclk_mhz,
2780 double dram_bw_mbps)
2781 {
2782 unsigned int n, m;
2783
2784 dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz);
2785 dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz);
2786 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, dram_bw_mbps);
2787
2788 // Calculate all the bandwidth availabe
2789 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2790 for (n = 0; n < dml2_core_internal_bw_max; n++) {
2791 avg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc,
2792 m, // soc_state
2793 n, // bw_type
2794 1, // avg_bw
2795 HostVMEnable,
2796 0, // hvm_only
2797 dcfclk_mhz,
2798 fclk_mhz,
2799 dram_bw_mbps);
2800
2801 urg_bandwidth_available[m][n] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2802
2803
2804 #ifdef __DML_VBA_DEBUG__
2805 dml2_printf("DML::%s: avg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), avg_bandwidth_available[m][n]);
2806 dml2_printf("DML::%s: urg_bandwidth_available[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), urg_bandwidth_available[m][n]);
2807 #endif
2808
2809 // urg_bandwidth_available_vm_only is indexed by soc_state
2810 if (n == dml2_core_internal_bw_dram) {
2811 urg_bandwidth_available_vm_only[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 1, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2812 urg_bandwidth_available_pixel_and_vm[m] = dml_get_return_bandwidth_available(soc, m, n, 0, HostVMEnable, 0, dcfclk_mhz, fclk_mhz, dram_bw_mbps);
2813 }
2814 }
2815
2816 avg_bandwidth_available_min[m] = math_min2(avg_bandwidth_available[m][dml2_core_internal_bw_dram], avg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2817 urg_bandwidth_available_min[m] = math_min2(urg_bandwidth_available[m][dml2_core_internal_bw_dram], urg_bandwidth_available[m][dml2_core_internal_bw_sdp]);
2818
2819 #ifdef __DML_VBA_DEBUG__
2820 dml2_printf("DML::%s: avg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), avg_bandwidth_available_min[m]);
2821 dml2_printf("DML::%s: urg_bandwidth_available_min[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_min[m]);
2822 dml2_printf("DML::%s: urg_bandwidth_available_vm_only[%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), urg_bandwidth_available_vm_only[n]);
2823 #endif
2824 }
2825 }
2826
calculate_avg_bandwidth_required(double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,double ReadBandwidthLuma[],double ReadBandwidthChroma[],double cursor_bw[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double mall_prefetch_dram_overhead_factor[],double mall_prefetch_sdp_overhead_factor[])2827 static void calculate_avg_bandwidth_required(
2828 double avg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
2829
2830 // input
2831 const struct dml2_display_cfg *display_cfg,
2832 unsigned int num_active_planes,
2833 double ReadBandwidthLuma[],
2834 double ReadBandwidthChroma[],
2835 double cursor_bw[],
2836 double dcc_dram_bw_nom_overhead_factor_p0[],
2837 double dcc_dram_bw_nom_overhead_factor_p1[],
2838 double mall_prefetch_dram_overhead_factor[],
2839 double mall_prefetch_sdp_overhead_factor[])
2840 {
2841 unsigned int n, m, k;
2842 double sdp_overhead_factor;
2843 double dram_overhead_factor_p0;
2844 double dram_overhead_factor_p1;
2845
2846 // Average BW support check
2847 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
2848 for (n = 0; n < dml2_core_internal_bw_max; n++) { // sdp, dram
2849 avg_bandwidth_required[m][n] = 0;
2850 }
2851 }
2852
2853 // SysActive and SVP Prefetch AVG bandwidth Check
2854 for (k = 0; k < num_active_planes; ++k) {
2855 #ifdef __DML_VBA_DEBUG__
2856 dml2_printf("DML::%s: plane %0d\n", __func__, k);
2857 dml2_printf("DML::%s: ReadBandwidthLuma=%f\n", __func__, ReadBandwidthLuma[k]);
2858 dml2_printf("DML::%s: ReadBandwidthChroma=%f\n", __func__, ReadBandwidthChroma[k]);
2859 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p0=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p0[k]);
2860 dml2_printf("DML::%s: dcc_dram_bw_nom_overhead_factor_p1=%f\n", __func__, dcc_dram_bw_nom_overhead_factor_p1[k]);
2861 dml2_printf("DML::%s: mall_prefetch_dram_overhead_factor=%f\n", __func__, mall_prefetch_dram_overhead_factor[k]);
2862 dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor=%f\n", __func__, mall_prefetch_sdp_overhead_factor[k]);
2863 #endif
2864
2865 sdp_overhead_factor = mall_prefetch_sdp_overhead_factor[k];
2866 dram_overhead_factor_p0 = dcc_dram_bw_nom_overhead_factor_p0[k] * mall_prefetch_dram_overhead_factor[k];
2867 dram_overhead_factor_p1 = dcc_dram_bw_nom_overhead_factor_p1[k] * mall_prefetch_dram_overhead_factor[k];
2868
2869 // FIXME_DCN4, was missing cursor_bw in here, but do I actually need that and tdlut bw for average bandwidth calculation?
2870 // active avg bw not include phantom, but svp_prefetch avg bw should include phantom pipes
2871 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
2872 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2873 avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2874 }
2875 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] += sdp_overhead_factor * (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]) + cursor_bw[k];
2876 avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] += dram_overhead_factor_p0 * ReadBandwidthLuma[k] + dram_overhead_factor_p1 * ReadBandwidthChroma[k] + cursor_bw[k];
2877
2878 #ifdef __DML_VBA_DEBUG__
2879 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]);
2880 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_sys_active), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram]);
2881 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_sdp), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp]);
2882 dml2_printf("DML::%s: avg_bandwidth_required[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(dml2_core_internal_soc_state_svp_prefetch), dml2_core_internal_bw_type_str(dml2_core_internal_bw_dram), avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]);
2883 #endif
2884 }
2885 }
2886
CalculateVMRowAndSwath(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateVMRowAndSwath_params * p)2887 static void CalculateVMRowAndSwath(struct dml2_core_internal_scratch *scratch,
2888 struct dml2_core_calcs_CalculateVMRowAndSwath_params *p)
2889 {
2890 struct dml2_core_calcs_CalculateVMRowAndSwath_locals *s = &scratch->CalculateVMRowAndSwath_locals;
2891
2892 s->HostVMDynamicLevels = CalculateHostVMDynamicLevels(p->display_cfg->gpuvm_enable, p->display_cfg->hostvm_enable, p->HostVMMinPageSize, p->display_cfg->hostvm_max_non_cached_page_table_levels);
2893
2894 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
2895 if (p->display_cfg->gpuvm_enable == true) {
2896 p->vm_group_bytes[k] = 512;
2897 p->dpte_group_bytes[k] = 512;
2898 } else {
2899 p->vm_group_bytes[k] = 0;
2900 p->dpte_group_bytes[k] = 0;
2901 }
2902
2903 if (dml_is_420(p->myPipe[k].SourcePixelFormat) || p->myPipe[k].SourcePixelFormat == dml2_rgbe_alpha) {
2904 if ((p->myPipe[k].SourcePixelFormat == dml2_420_10 || p->myPipe[k].SourcePixelFormat == dml2_420_12) && !dml_is_vertical_rotation(p->myPipe[k].RotationAngle)) {
2905 s->PTEBufferSizeInRequestsForLuma[k] = (p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma) / 2;
2906 s->PTEBufferSizeInRequestsForChroma[k] = s->PTEBufferSizeInRequestsForLuma[k];
2907 } else {
2908 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma;
2909 s->PTEBufferSizeInRequestsForChroma[k] = p->PTEBufferSizeInRequestsChroma;
2910 }
2911
2912 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
2913 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
2914 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
2915 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesC;
2916 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesC;
2917 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
2918 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
2919 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelC;
2920 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
2921 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthC[k];
2922 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeightC;
2923 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStartC;
2924 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStartC;
2925 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
2926 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
2927 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
2928 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForChroma[k];
2929 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchC;
2930 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthC;
2931 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightC;
2932 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
2933 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchC;
2934 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
2935
2936 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowC[k];
2937 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageC[k];
2938 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_chroma_ub[k];
2939 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_chroma[k];
2940 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_chroma[k];
2941 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowC_one_row_per_frame[k];
2942 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_chroma_ub_one_row_per_frame[k];
2943 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_chroma_one_row_per_frame[k];
2944 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_c[k];
2945 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_c[k];
2946 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthC[k];
2947 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightC[k];
2948 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeC[k];
2949 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_c[k];
2950
2951 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_c[k];
2952 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_chroma[k];
2953 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_chroma[k];
2954 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_chroma[k];
2955 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_chroma[k];
2956 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_c[k];
2957
2958 s->vm_bytes_c = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
2959
2960 p->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines(
2961 p->myPipe[k].VRatioChroma,
2962 p->myPipe[k].VTapsChroma,
2963 p->myPipe[k].InterlaceEnable,
2964 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
2965 p->myPipe[k].SwathHeightC,
2966 p->myPipe[k].RotationAngle,
2967 p->myPipe[k].mirrored,
2968 p->myPipe[k].ViewportStationary,
2969 p->SwathWidthC[k],
2970 p->myPipe[k].ViewportHeightC,
2971 p->myPipe[k].ViewportXStartC,
2972 p->myPipe[k].ViewportYStartC,
2973
2974 // Output
2975 &p->VInitPreFillC[k],
2976 &p->MaxNumSwathC[k]);
2977 } else {
2978 s->PTEBufferSizeInRequestsForLuma[k] = p->PTEBufferSizeInRequestsLuma + p->PTEBufferSizeInRequestsChroma;
2979 s->PTEBufferSizeInRequestsForChroma[k] = 0;
2980 s->PixelPTEBytesPerRowC[k] = 0;
2981 s->PixelPTEBytesPerRowStorageC[k] = 0;
2982 s->vm_bytes_c = 0;
2983 p->MaxNumSwathC[k] = 0;
2984 p->PrefetchSourceLinesC[k] = 0;
2985 s->dpte_row_height_chroma_one_row_per_frame[k] = 0;
2986 s->dpte_row_width_chroma_ub_one_row_per_frame[k] = 0;
2987 s->PixelPTEBytesPerRowC_one_row_per_frame[k] = 0;
2988 }
2989
2990 scratch->calculate_vm_and_row_bytes_params.ViewportStationary = p->myPipe[k].ViewportStationary;
2991 scratch->calculate_vm_and_row_bytes_params.DCCEnable = p->myPipe[k].DCCEnable;
2992 scratch->calculate_vm_and_row_bytes_params.NumberOfDPPs = p->myPipe[k].DPPPerSurface;
2993 scratch->calculate_vm_and_row_bytes_params.BlockHeight256Bytes = p->myPipe[k].BlockHeight256BytesY;
2994 scratch->calculate_vm_and_row_bytes_params.BlockWidth256Bytes = p->myPipe[k].BlockWidth256BytesY;
2995 scratch->calculate_vm_and_row_bytes_params.SourcePixelFormat = p->myPipe[k].SourcePixelFormat;
2996 scratch->calculate_vm_and_row_bytes_params.SurfaceTiling = p->myPipe[k].SurfaceTiling;
2997 scratch->calculate_vm_and_row_bytes_params.BytePerPixel = p->myPipe[k].BytePerPixelY;
2998 scratch->calculate_vm_and_row_bytes_params.RotationAngle = p->myPipe[k].RotationAngle;
2999 scratch->calculate_vm_and_row_bytes_params.SwathWidth = p->SwathWidthY[k];
3000 scratch->calculate_vm_and_row_bytes_params.ViewportHeight = p->myPipe[k].ViewportHeight;
3001 scratch->calculate_vm_and_row_bytes_params.ViewportXStart = p->myPipe[k].ViewportXStart;
3002 scratch->calculate_vm_and_row_bytes_params.ViewportYStart = p->myPipe[k].ViewportYStart;
3003 scratch->calculate_vm_and_row_bytes_params.GPUVMEnable = p->display_cfg->gpuvm_enable;
3004 scratch->calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = p->display_cfg->gpuvm_max_page_table_levels;
3005 scratch->calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
3006 scratch->calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = s->PTEBufferSizeInRequestsForLuma[k];
3007 scratch->calculate_vm_and_row_bytes_params.Pitch = p->myPipe[k].PitchY;
3008 scratch->calculate_vm_and_row_bytes_params.MacroTileWidth = p->myPipe[k].BlockWidthY;
3009 scratch->calculate_vm_and_row_bytes_params.MacroTileHeight = p->myPipe[k].BlockHeightY;
3010 scratch->calculate_vm_and_row_bytes_params.is_phantom = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]);
3011 scratch->calculate_vm_and_row_bytes_params.DCCMetaPitch = p->myPipe[k].DCCMetaPitchY;
3012 scratch->calculate_vm_and_row_bytes_params.mrq_present = p->mrq_present;
3013
3014 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &s->PixelPTEBytesPerRowY[k];
3015 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &s->PixelPTEBytesPerRowStorageY[k];
3016 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub = &p->dpte_row_width_luma_ub[k];
3017 scratch->calculate_vm_and_row_bytes_params.dpte_row_height = &p->dpte_row_height_luma[k];
3018 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_linear = &p->dpte_row_height_linear_luma[k];
3019 scratch->calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &s->PixelPTEBytesPerRowY_one_row_per_frame[k];
3020 scratch->calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &s->dpte_row_width_luma_ub_one_row_per_frame[k];
3021 scratch->calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &s->dpte_row_height_luma_one_row_per_frame[k];
3022 scratch->calculate_vm_and_row_bytes_params.vmpg_width = &p->vmpg_width_y[k];
3023 scratch->calculate_vm_and_row_bytes_params.vmpg_height = &p->vmpg_height_y[k];
3024 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &p->PixelPTEReqWidthY[k];
3025 scratch->calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &p->PixelPTEReqHeightY[k];
3026 scratch->calculate_vm_and_row_bytes_params.PTERequestSize = &p->PTERequestSizeY[k];
3027 scratch->calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &p->dpde0_bytes_per_frame_ub_l[k];
3028
3029 scratch->calculate_vm_and_row_bytes_params.meta_row_bytes = &s->meta_row_bytes_per_row_ub_l[k];
3030 scratch->calculate_vm_and_row_bytes_params.MetaRequestWidth = &p->meta_req_width_luma[k];
3031 scratch->calculate_vm_and_row_bytes_params.MetaRequestHeight = &p->meta_req_height_luma[k];
3032 scratch->calculate_vm_and_row_bytes_params.meta_row_width = &p->meta_row_width_luma[k];
3033 scratch->calculate_vm_and_row_bytes_params.meta_row_height = &p->meta_row_height_luma[k];
3034 scratch->calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &p->meta_pte_bytes_per_frame_ub_l[k];
3035
3036 s->vm_bytes_l = CalculateVMAndRowBytes(&scratch->calculate_vm_and_row_bytes_params);
3037
3038 p->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines(
3039 p->myPipe[k].VRatio,
3040 p->myPipe[k].VTaps,
3041 p->myPipe[k].InterlaceEnable,
3042 p->myPipe[k].ProgressiveToInterlaceUnitInOPP,
3043 p->myPipe[k].SwathHeightY,
3044 p->myPipe[k].RotationAngle,
3045 p->myPipe[k].mirrored,
3046 p->myPipe[k].ViewportStationary,
3047 p->SwathWidthY[k],
3048 p->myPipe[k].ViewportHeight,
3049 p->myPipe[k].ViewportXStart,
3050 p->myPipe[k].ViewportYStart,
3051
3052 // Output
3053 &p->VInitPreFillY[k],
3054 &p->MaxNumSwathY[k]);
3055
3056 #ifdef __DML_VBA_DEBUG__
3057 dml2_printf("DML::%s: k=%u, vm_bytes_l = %u (before hvm level)\n", __func__, k, s->vm_bytes_l);
3058 dml2_printf("DML::%s: k=%u, vm_bytes_c = %u (before hvm level)\n", __func__, k, s->vm_bytes_c);
3059 dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_l = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_l[k]);
3060 dml2_printf("DML::%s: k=%u, meta_row_bytes_per_row_ub_c = %u\n", __func__, k, s->meta_row_bytes_per_row_ub_c[k]);
3061 #endif
3062 p->vm_bytes[k] = (s->vm_bytes_l + s->vm_bytes_c) * (1 + 8 * s->HostVMDynamicLevels);
3063 p->meta_row_bytes[k] = s->meta_row_bytes_per_row_ub_l[k] + s->meta_row_bytes_per_row_ub_c[k];
3064 p->meta_row_bytes_per_row_ub_l[k] = s->meta_row_bytes_per_row_ub_l[k];
3065 p->meta_row_bytes_per_row_ub_c[k] = s->meta_row_bytes_per_row_ub_c[k];
3066
3067 #ifdef __DML_VBA_DEBUG__
3068 dml2_printf("DML::%s: k=%u, meta_row_bytes = %u\n", __func__, k, p->meta_row_bytes[k]);
3069 dml2_printf("DML::%s: k=%u, vm_bytes = %u (after hvm level)\n", __func__, k, p->vm_bytes[k]);
3070 #endif
3071 if (s->PixelPTEBytesPerRowStorageY[k] <= 64 * s->PTEBufferSizeInRequestsForLuma[k] && s->PixelPTEBytesPerRowStorageC[k] <= 64 * s->PTEBufferSizeInRequestsForChroma[k]) {
3072 p->PTEBufferSizeNotExceeded[k] = true;
3073 } else {
3074 p->PTEBufferSizeNotExceeded[k] = false;
3075 }
3076
3077 s->one_row_per_frame_fits_in_buffer[k] = (s->PixelPTEBytesPerRowY_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForLuma[k] &&
3078 s->PixelPTEBytesPerRowC_one_row_per_frame[k] <= 64 * 2 * s->PTEBufferSizeInRequestsForChroma[k]);
3079 #ifdef __DML_VBA_DEBUG__
3080 if (p->PTEBufferSizeNotExceeded[k] == 0 || s->one_row_per_frame_fits_in_buffer[k] == 0) {
3081 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3082 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (before hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3083 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageY = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageY[k]);
3084 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowStorageC = %u\n", __func__, k, s->PixelPTEBytesPerRowStorageC[k]);
3085 dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForLuma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForLuma[k]);
3086 dml2_printf("DML::%s: k=%u, PTEBufferSizeInRequestsForChroma = %u\n", __func__, k, s->PTEBufferSizeInRequestsForChroma[k]);
3087 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded (not one_row_per_frame) = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3088
3089 dml2_printf("DML::%s: k=%u, HostVMDynamicLevels = %u\n", __func__, k, s->HostVMDynamicLevels);
3090 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowY_one_row_per_frame[k]);
3091 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC_one_row_per_frame = %u\n", __func__, k, s->PixelPTEBytesPerRowC_one_row_per_frame[k]);
3092 dml2_printf("DML::%s: k=%u, one_row_per_frame_fits_in_buffer = %u\n", __func__, k, s->one_row_per_frame_fits_in_buffer[k]);
3093 }
3094 #endif
3095 }
3096
3097 CalculateMALLUseForStaticScreen(
3098 p->display_cfg,
3099 p->NumberOfActiveSurfaces,
3100 p->MALLAllocatedForDCN,
3101 p->SurfaceSizeInMALL,
3102 s->one_row_per_frame_fits_in_buffer,
3103 // Output
3104 p->is_using_mall_for_ss);
3105
3106 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3107 if (p->display_cfg->gpuvm_enable) {
3108 if (p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.enable == 1) {
3109 p->PTE_BUFFER_MODE[k] = p->display_cfg->plane_descriptors[k].overrides.hw.force_pte_buffer_mode.value;
3110 }
3111 p->PTE_BUFFER_MODE[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3112 dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64);
3113 p->BIGK_FRAGMENT_SIZE[k] = (unsigned int)(math_log((float)p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes * 1024, 2) - 12);
3114 } else {
3115 p->PTE_BUFFER_MODE[k] = 0;
3116 p->BIGK_FRAGMENT_SIZE[k] = 0;
3117 }
3118 }
3119
3120 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3121 p->DCCMetaBufferSizeNotExceeded[k] = true;
3122 #ifdef __DML_VBA_DEBUG__
3123 dml2_printf("DML::%s: k=%u, SurfaceSizeInMALL = %u\n", __func__, k, p->SurfaceSizeInMALL[k]);
3124 dml2_printf("DML::%s: k=%u, is_using_mall_for_ss = %u\n", __func__, k, p->is_using_mall_for_ss[k]);
3125 #endif
3126 p->use_one_row_for_frame[k] = p->myPipe[k].FORCE_ONE_ROW_FOR_FRAME || p->is_using_mall_for_ss[k] || (p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) ||
3127 (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) || (p->display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes > 64 && dml_is_vertical_rotation(p->myPipe[k].RotationAngle));
3128
3129 p->use_one_row_for_frame_flip[k] = p->use_one_row_for_frame[k] && !(p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame);
3130
3131 if (p->use_one_row_for_frame[k]) {
3132 p->dpte_row_height_luma[k] = s->dpte_row_height_luma_one_row_per_frame[k];
3133 p->dpte_row_width_luma_ub[k] = s->dpte_row_width_luma_ub_one_row_per_frame[k];
3134 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY_one_row_per_frame[k];
3135 p->dpte_row_height_chroma[k] = s->dpte_row_height_chroma_one_row_per_frame[k];
3136 p->dpte_row_width_chroma_ub[k] = s->dpte_row_width_chroma_ub_one_row_per_frame[k];
3137 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC_one_row_per_frame[k];
3138 p->PTEBufferSizeNotExceeded[k] = s->one_row_per_frame_fits_in_buffer[k];
3139 }
3140
3141 if (p->meta_row_bytes[k] <= p->DCCMetaBufferSizeBytes) {
3142 p->DCCMetaBufferSizeNotExceeded[k] = true;
3143 } else {
3144 p->DCCMetaBufferSizeNotExceeded[k] = false;
3145
3146 #ifdef __DML_VBA_DEBUG__
3147 dml2_printf("DML::%s: k=%d, meta_row_bytes = %d\n", __func__, k, p->meta_row_bytes[k]);
3148 dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeBytes = %d\n", __func__, k, p->DCCMetaBufferSizeBytes);
3149 dml2_printf("DML::%s: k=%d, DCCMetaBufferSizeNotExceeded = %d\n", __func__, k, p->DCCMetaBufferSizeNotExceeded[k]);
3150 #endif
3151 }
3152
3153 s->PixelPTEBytesPerRowY[k] = s->PixelPTEBytesPerRowY[k] * (1 + 8 * s->HostVMDynamicLevels);
3154 s->PixelPTEBytesPerRowC[k] = s->PixelPTEBytesPerRowC[k] * (1 + 8 * s->HostVMDynamicLevels);
3155 p->PixelPTEBytesPerRow[k] = s->PixelPTEBytesPerRowY[k] + s->PixelPTEBytesPerRowC[k];
3156 p->dpte_row_bytes_per_row_l[k] = s->PixelPTEBytesPerRowY[k];
3157 p->dpte_row_bytes_per_row_c[k] = s->PixelPTEBytesPerRowC[k];
3158
3159 // if one row of dPTEs is meant to span the entire frame, then for these calculations, we will pretend like that one big row is fetched in two halfs
3160 if (p->use_one_row_for_frame[k])
3161 p->PixelPTEBytesPerRow[k] = p->PixelPTEBytesPerRow[k] / 2;
3162
3163 CalculateRowBandwidth(
3164 p->display_cfg->gpuvm_enable,
3165 p->use_one_row_for_frame[k],
3166 p->myPipe[k].SourcePixelFormat,
3167 p->myPipe[k].VRatio,
3168 p->myPipe[k].VRatioChroma,
3169 p->myPipe[k].DCCEnable,
3170 p->myPipe[k].HTotal / p->myPipe[k].PixelClock,
3171 s->PixelPTEBytesPerRowY[k],
3172 s->PixelPTEBytesPerRowC[k],
3173 p->dpte_row_height_luma[k],
3174 p->dpte_row_height_chroma[k],
3175
3176 p->mrq_present,
3177 p->meta_row_bytes_per_row_ub_l[k],
3178 p->meta_row_bytes_per_row_ub_c[k],
3179 p->meta_row_height_luma[k],
3180 p->meta_row_height_chroma[k],
3181
3182 // Output
3183 &p->dpte_row_bw[k],
3184 &p->meta_row_bw[k]);
3185 #ifdef __DML_VBA_DEBUG__
3186 dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
3187 dml2_printf("DML::%s: k=%u, use_one_row_for_frame_flip = %u\n", __func__, k, p->use_one_row_for_frame_flip[k]);
3188 dml2_printf("DML::%s: k=%u, UseMALLForPStateChange = %u\n", __func__, k, p->display_cfg->plane_descriptors[k].overrides.legacy_svp_config);
3189 dml2_printf("DML::%s: k=%u, dpte_row_height_luma = %u\n", __func__, k, p->dpte_row_height_luma[k]);
3190 dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
3191 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowY = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowY[k]);
3192 dml2_printf("DML::%s: k=%u, dpte_row_height_chroma = %u\n", __func__, k, p->dpte_row_height_chroma[k]);
3193 dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
3194 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRowC = %u (after hvm level)\n", __func__, k, s->PixelPTEBytesPerRowC[k]);
3195 dml2_printf("DML::%s: k=%u, PixelPTEBytesPerRow = %u\n", __func__, k, p->PixelPTEBytesPerRow[k]);
3196 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, p->PTEBufferSizeNotExceeded[k]);
3197 dml2_printf("DML::%s: k=%u, gpuvm_enable = %u\n", __func__, k, p->display_cfg->gpuvm_enable);
3198 dml2_printf("DML::%s: k=%u, PTE_BUFFER_MODE = %u\n", __func__, k, p->PTE_BUFFER_MODE[k]);
3199 dml2_printf("DML::%s: k=%u, BIGK_FRAGMENT_SIZE = %u\n", __func__, k, p->BIGK_FRAGMENT_SIZE[k]);
3200 #endif
3201 }
3202 }
3203
CalculateUrgentLatency(double UrgentLatencyPixelDataOnly,double UrgentLatencyPixelMixedWithVMData,double UrgentLatencyVMDataOnly,bool DoUrgentLatencyAdjustment,double UrgentLatencyAdjustmentFabricClockComponent,double UrgentLatencyAdjustmentFabricClockReference,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int urgent_ramp_uclk_cycles,unsigned int df_qos_response_time_fclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_urgent_ramp_latency_margin,double fabric_max_transport_latency_margin)3204 static double CalculateUrgentLatency(
3205 double UrgentLatencyPixelDataOnly,
3206 double UrgentLatencyPixelMixedWithVMData,
3207 double UrgentLatencyVMDataOnly,
3208 bool DoUrgentLatencyAdjustment,
3209 double UrgentLatencyAdjustmentFabricClockComponent,
3210 double UrgentLatencyAdjustmentFabricClockReference,
3211 double FabricClock,
3212 double uclk_freq_mhz,
3213 enum dml2_qos_param_type qos_type,
3214 unsigned int urgent_ramp_uclk_cycles,
3215 unsigned int df_qos_response_time_fclk_cycles,
3216 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3217 unsigned int mall_overhead_fclk_cycles,
3218 double umc_urgent_ramp_latency_margin,
3219 double fabric_max_transport_latency_margin)
3220 {
3221 double urgent_latency = 0;
3222 if (qos_type == dml2_qos_param_type_dcn4x) {
3223 urgent_latency = (df_qos_response_time_fclk_cycles + mall_overhead_fclk_cycles) / FabricClock
3224 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1 + fabric_max_transport_latency_margin / 100.0)
3225 + urgent_ramp_uclk_cycles / uclk_freq_mhz * (1 + umc_urgent_ramp_latency_margin / 100.0);
3226 } else {
3227 urgent_latency = math_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly);
3228 if (DoUrgentLatencyAdjustment == true) {
3229 urgent_latency = urgent_latency + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1);
3230 }
3231 }
3232 #ifdef __DML_VBA_DEBUG__
3233 if (qos_type == dml2_qos_param_type_dcn4x) {
3234 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
3235 dml2_printf("DML::%s: urgent_ramp_uclk_cycles = %d\n", __func__, urgent_ramp_uclk_cycles);
3236 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3237 dml2_printf("DML::%s: umc_urgent_ramp_latency_margin = %f\n", __func__, umc_urgent_ramp_latency_margin);
3238 } else {
3239 dml2_printf("DML::%s: UrgentLatencyPixelDataOnly = %f\n", __func__, UrgentLatencyPixelDataOnly);
3240 dml2_printf("DML::%s: UrgentLatencyPixelMixedWithVMData = %f\n", __func__, UrgentLatencyPixelMixedWithVMData);
3241 dml2_printf("DML::%s: UrgentLatencyVMDataOnly = %f\n", __func__, UrgentLatencyVMDataOnly);
3242 dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockComponent = %f\n", __func__, UrgentLatencyAdjustmentFabricClockComponent);
3243 dml2_printf("DML::%s: UrgentLatencyAdjustmentFabricClockReference = %f\n", __func__, UrgentLatencyAdjustmentFabricClockReference);
3244 }
3245 dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3246 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, urgent_latency);
3247 #endif
3248 return urgent_latency;
3249 }
3250
CalculateTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int trip_to_memory_uclk_cycles,unsigned int max_round_trip_to_furthest_cs_fclk_cycles,unsigned int mall_overhead_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3251 static double CalculateTripToMemory(
3252 double UrgLatency,
3253 double FabricClock,
3254 double uclk_freq_mhz,
3255 enum dml2_qos_param_type qos_type,
3256 unsigned int trip_to_memory_uclk_cycles,
3257 unsigned int max_round_trip_to_furthest_cs_fclk_cycles,
3258 unsigned int mall_overhead_fclk_cycles,
3259 double umc_max_latency_margin,
3260 double fabric_max_transport_latency_margin)
3261 {
3262 double trip_to_memory_us;
3263 if (qos_type == dml2_qos_param_type_dcn4x) {
3264 trip_to_memory_us = mall_overhead_fclk_cycles / FabricClock
3265 + max_round_trip_to_furthest_cs_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3266 + trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3267 } else {
3268 trip_to_memory_us = UrgLatency;
3269 }
3270
3271 #ifdef __DML_VBA_DEBUG__
3272 if (qos_type == dml2_qos_param_type_dcn4x) {
3273 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
3274 dml2_printf("DML::%s: max_round_trip_to_furthest_cs_fclk_cycles = %d\n", __func__, max_round_trip_to_furthest_cs_fclk_cycles);
3275 dml2_printf("DML::%s: mall_overhead_fclk_cycles = %d\n", __func__, mall_overhead_fclk_cycles);
3276 dml2_printf("DML::%s: trip_to_memory_uclk_cycles = %d\n", __func__, trip_to_memory_uclk_cycles);
3277 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3278 dml2_printf("DML::%s: FabricClock = %f\n", __func__, FabricClock);
3279 dml2_printf("DML::%s: fabric_max_transport_latency_margin = %f\n", __func__, fabric_max_transport_latency_margin);
3280 dml2_printf("DML::%s: umc_max_latency_margin = %f\n", __func__, umc_max_latency_margin);
3281 } else {
3282 dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3283 }
3284 dml2_printf("DML::%s: trip_to_memory_us = %f\n", __func__, trip_to_memory_us);
3285 #endif
3286
3287
3288 return trip_to_memory_us;
3289 }
3290
CalculateMetaTripToMemory(double UrgLatency,double FabricClock,double uclk_freq_mhz,enum dml2_qos_param_type qos_type,unsigned int meta_trip_to_memory_uclk_cycles,unsigned int meta_trip_to_memory_fclk_cycles,double umc_max_latency_margin,double fabric_max_transport_latency_margin)3291 static double CalculateMetaTripToMemory(
3292 double UrgLatency,
3293 double FabricClock,
3294 double uclk_freq_mhz,
3295 enum dml2_qos_param_type qos_type,
3296 unsigned int meta_trip_to_memory_uclk_cycles,
3297 unsigned int meta_trip_to_memory_fclk_cycles,
3298 double umc_max_latency_margin,
3299 double fabric_max_transport_latency_margin)
3300 {
3301 double meta_trip_to_memory_us;
3302 if (qos_type == dml2_qos_param_type_dcn4x) {
3303 meta_trip_to_memory_us = meta_trip_to_memory_fclk_cycles / FabricClock * (1.0 + fabric_max_transport_latency_margin / 100.0)
3304 + meta_trip_to_memory_uclk_cycles / uclk_freq_mhz * (1.0 + umc_max_latency_margin / 100.0);
3305 } else {
3306 meta_trip_to_memory_us = UrgLatency;
3307 }
3308
3309 #ifdef __DML_VBA_DEBUG__
3310 if (qos_type == dml2_qos_param_type_dcn4x) {
3311 dml2_printf("DML::%s: qos_type = %d\n", __func__, qos_type);
3312 dml2_printf("DML::%s: meta_trip_to_memory_fclk_cycles = %d\n", __func__, meta_trip_to_memory_fclk_cycles);
3313 dml2_printf("DML::%s: meta_trip_to_memory_uclk_cycles = %d\n", __func__, meta_trip_to_memory_uclk_cycles);
3314 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, uclk_freq_mhz);
3315 } else {
3316 dml2_printf("DML::%s: UrgLatency = %f\n", __func__, UrgLatency);
3317 }
3318 dml2_printf("DML::%s: meta_trip_to_memory_us = %f\n", __func__, meta_trip_to_memory_us);
3319 #endif
3320
3321
3322 return meta_trip_to_memory_us;
3323 }
3324
calculate_cursor_req_attributes(unsigned int cursor_width,unsigned int cursor_bpp,unsigned int * cursor_lines_per_chunk,unsigned int * cursor_bytes_per_line,unsigned int * cursor_bytes_per_chunk,unsigned int * cursor_bytes)3325 static void calculate_cursor_req_attributes(
3326 unsigned int cursor_width,
3327 unsigned int cursor_bpp,
3328
3329 // output
3330 unsigned int *cursor_lines_per_chunk,
3331 unsigned int *cursor_bytes_per_line,
3332 unsigned int *cursor_bytes_per_chunk,
3333 unsigned int *cursor_bytes)
3334 {
3335 unsigned int cursor_pitch = 0;
3336 unsigned int cursor_bytes_per_req = 0;
3337 unsigned int cursor_width_bytes = 0;
3338 unsigned int cursor_height = 0;
3339
3340 //SW determines the cursor pitch to support the maximum cursor_width that will be used but the following restrictions apply.
3341 //- For 2bpp, cursor_pitch = 256 pixels due to min cursor request size of 64B
3342 //- For 32 or 64 bpp, cursor_pitch = 64, 128 or 256 pixels depending on the cursor width
3343 if (cursor_bpp == 2)
3344 cursor_pitch = 256;
3345 else
3346 cursor_pitch = (unsigned int)1 << (unsigned int)math_ceil2(math_log((float)cursor_width, 2), 1);
3347
3348 //The cursor requestor uses a cursor request size of 64B, 128B, or 256B depending on the cursor_width and cursor_bpp as follows.
3349
3350 cursor_width_bytes = (unsigned int)math_ceil2((double)cursor_width * cursor_bpp / 8, 1);
3351 if (cursor_width_bytes <= 64)
3352 cursor_bytes_per_req = 64;
3353 else if (cursor_width_bytes <= 128)
3354 cursor_bytes_per_req = 128;
3355 else
3356 cursor_bytes_per_req = 256;
3357
3358 //If cursor_width_bytes is greater than 256B, then multiple 256B requests are issued to fetch the entire cursor line.
3359 *cursor_bytes_per_line = (unsigned int)math_ceil2((double)cursor_width_bytes, cursor_bytes_per_req);
3360
3361 //Nominally, the cursor chunk is 1KB or 2KB but it is restricted to a power of 2 number of lines with a maximum of 16 lines.
3362 if (cursor_bpp == 2) {
3363 *cursor_lines_per_chunk = 16;
3364 } else if (cursor_bpp == 32) {
3365 if (cursor_width <= 32)
3366 *cursor_lines_per_chunk = 16;
3367 else if (cursor_width <= 64)
3368 *cursor_lines_per_chunk = 8;
3369 else if (cursor_width <= 128)
3370 *cursor_lines_per_chunk = 4;
3371 else
3372 *cursor_lines_per_chunk = 2;
3373 } else if (cursor_bpp == 64) {
3374 if (cursor_width <= 16)
3375 *cursor_lines_per_chunk = 16;
3376 else if (cursor_width <= 32)
3377 *cursor_lines_per_chunk = 8;
3378 else if (cursor_width <= 64)
3379 *cursor_lines_per_chunk = 4;
3380 else if (cursor_width <= 128)
3381 *cursor_lines_per_chunk = 2;
3382 else
3383 *cursor_lines_per_chunk = 1;
3384 } else {
3385 if (cursor_width > 0) {
3386 dml2_printf("DML::%s: Invalid cursor_bpp = %d\n", __func__, cursor_bpp);
3387 dml2_assert(0);
3388 }
3389 }
3390
3391 *cursor_bytes_per_chunk = *cursor_bytes_per_line * *cursor_lines_per_chunk;
3392
3393 // For the cursor implementation, all requested data is stored in the return buffer. Given this fact, the cursor_bytes can be directly compared with the CursorBufferSize.
3394 // Only cursor_width is provided for worst case sizing so assume that the cursor is square
3395 cursor_height = cursor_width;
3396 *cursor_bytes = *cursor_bytes_per_line * cursor_height;
3397 #ifdef __DML_VBA_DEBUG__
3398 dml2_printf("DML::%s: cursor_bpp = %d\n", __func__, cursor_bpp);
3399 dml2_printf("DML::%s: cursor_width = %d\n", __func__, cursor_width);
3400 dml2_printf("DML::%s: cursor_width_bytes = %d\n", __func__, cursor_width_bytes);
3401 dml2_printf("DML::%s: cursor_bytes_per_req = %d\n", __func__, cursor_bytes_per_req);
3402 dml2_printf("DML::%s: cursor_lines_per_chunk = %d\n", __func__, *cursor_lines_per_chunk);
3403 dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, *cursor_bytes_per_line);
3404 dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, *cursor_bytes_per_chunk);
3405 dml2_printf("DML::%s: cursor_bytes = %d\n", __func__, *cursor_bytes);
3406 dml2_printf("DML::%s: cursor_pitch = %d\n", __func__, cursor_pitch);
3407 #endif
3408 }
3409
calculate_cursor_urgent_burst_factor(unsigned int CursorBufferSize,unsigned int CursorWidth,unsigned int cursor_bytes_per_chunk,unsigned int cursor_lines_per_chunk,double LineTime,double UrgentLatency,double * UrgentBurstFactorCursor,bool * NotEnoughUrgentLatencyHiding)3410 static void calculate_cursor_urgent_burst_factor(
3411 unsigned int CursorBufferSize,
3412 unsigned int CursorWidth,
3413 unsigned int cursor_bytes_per_chunk,
3414 unsigned int cursor_lines_per_chunk,
3415 double LineTime,
3416 double UrgentLatency,
3417
3418 double *UrgentBurstFactorCursor,
3419 bool *NotEnoughUrgentLatencyHiding)
3420 {
3421 unsigned int LinesInCursorBuffer = 0;
3422 double CursorBufferSizeInTime = 0;
3423
3424 if (CursorWidth > 0) {
3425 LinesInCursorBuffer = (unsigned int)math_floor2(CursorBufferSize * 1024.0 / (double)cursor_bytes_per_chunk, 1) * cursor_lines_per_chunk;
3426
3427 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime;
3428 if (CursorBufferSizeInTime - UrgentLatency <= 0) {
3429 *NotEnoughUrgentLatencyHiding = 1;
3430 *UrgentBurstFactorCursor = 0;
3431 } else {
3432 *NotEnoughUrgentLatencyHiding = 0;
3433 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency);
3434 }
3435
3436 #ifdef __DML_VBA_DEBUG__
3437 dml2_printf("DML::%s: LinesInCursorBuffer = %u\n", __func__, LinesInCursorBuffer);
3438 dml2_printf("DML::%s: CursorBufferSizeInTime = %f\n", __func__, CursorBufferSizeInTime);
3439 dml2_printf("DML::%s: CursorBufferSize = %u (kbytes)\n", __func__, CursorBufferSize);
3440 dml2_printf("DML::%s: cursor_bytes_per_chunk = %u\n", __func__, cursor_bytes_per_chunk);
3441 dml2_printf("DML::%s: cursor_lines_per_chunk = %u\n", __func__, cursor_lines_per_chunk);
3442 dml2_printf("DML::%s: UrgentBurstFactorCursor = %f\n", __func__, *UrgentBurstFactorCursor);
3443 dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3444 #endif
3445
3446 }
3447 }
3448
CalculateUrgentBurstFactor(const struct dml2_plane_parameters * plane_cfg,unsigned int swath_width_luma_ub,unsigned int swath_width_chroma_ub,unsigned int SwathHeightY,unsigned int SwathHeightC,double LineTime,double UrgentLatency,double VRatio,double VRatioC,double BytePerPixelInDETY,double BytePerPixelInDETC,unsigned int DETBufferSizeY,unsigned int DETBufferSizeC,double * UrgentBurstFactorLuma,double * UrgentBurstFactorChroma,bool * NotEnoughUrgentLatencyHiding)3449 static void CalculateUrgentBurstFactor(
3450 const struct dml2_plane_parameters *plane_cfg,
3451 unsigned int swath_width_luma_ub,
3452 unsigned int swath_width_chroma_ub,
3453 unsigned int SwathHeightY,
3454 unsigned int SwathHeightC,
3455 double LineTime,
3456 double UrgentLatency,
3457 double VRatio,
3458 double VRatioC,
3459 double BytePerPixelInDETY,
3460 double BytePerPixelInDETC,
3461 unsigned int DETBufferSizeY,
3462 unsigned int DETBufferSizeC,
3463 // Output
3464 double *UrgentBurstFactorLuma,
3465 double *UrgentBurstFactorChroma,
3466 bool *NotEnoughUrgentLatencyHiding)
3467 {
3468 double LinesInDETLuma;
3469 double LinesInDETChroma;
3470 double DETBufferSizeInTimeLuma;
3471 double DETBufferSizeInTimeChroma;
3472
3473 *NotEnoughUrgentLatencyHiding = 0;
3474 *UrgentBurstFactorLuma = 0;
3475 *UrgentBurstFactorChroma = 0;
3476
3477 #ifdef __DML_VBA_DEBUG__
3478 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
3479 dml2_printf("DML::%s: VRatioC = %f\n", __func__, VRatioC);
3480 dml2_printf("DML::%s: DETBufferSizeY = %d\n", __func__, DETBufferSizeY);
3481 dml2_printf("DML::%s: DETBufferSizeC = %d\n", __func__, DETBufferSizeC);
3482 dml2_printf("DML::%s: BytePerPixelInDETY = %f\n", __func__, BytePerPixelInDETY);
3483 dml2_printf("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub);
3484 dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
3485 #endif
3486 DML2_ASSERT(VRatio > 0);
3487
3488 LinesInDETLuma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeY) / BytePerPixelInDETY / swath_width_luma_ub;
3489
3490 DETBufferSizeInTimeLuma = math_floor2(LinesInDETLuma, SwathHeightY) * LineTime / VRatio;
3491 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) {
3492 *NotEnoughUrgentLatencyHiding = 1;
3493 *UrgentBurstFactorLuma = 0;
3494 } else {
3495 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency);
3496 }
3497
3498 if (BytePerPixelInDETC > 0) {
3499 LinesInDETChroma = (dml_is_phantom_pipe(plane_cfg) ? 1024 * 1024 : DETBufferSizeC) / BytePerPixelInDETC / swath_width_chroma_ub;
3500
3501 DETBufferSizeInTimeChroma = math_floor2(LinesInDETChroma, SwathHeightC) * LineTime / VRatioC;
3502 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) {
3503 *NotEnoughUrgentLatencyHiding = 1;
3504 *UrgentBurstFactorChroma = 0;
3505 } else {
3506 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency);
3507 }
3508 }
3509
3510 #ifdef __DML_VBA_DEBUG__
3511 dml2_printf("DML::%s: LinesInDETLuma = %f\n", __func__, LinesInDETLuma);
3512 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
3513 dml2_printf("DML::%s: DETBufferSizeInTimeLuma = %f\n", __func__, DETBufferSizeInTimeLuma);
3514 dml2_printf("DML::%s: UrgentBurstFactorLuma = %f\n", __func__, *UrgentBurstFactorLuma);
3515 dml2_printf("DML::%s: UrgentBurstFactorChroma = %f\n", __func__, *UrgentBurstFactorChroma);
3516 dml2_printf("DML::%s: NotEnoughUrgentLatencyHiding = %d\n", __func__, *NotEnoughUrgentLatencyHiding);
3517 #endif
3518
3519 }
3520
CalculateDCFCLKDeepSleep(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelY[],unsigned int BytePerPixelC[],unsigned int SwathWidthY[],unsigned int SwathWidthC[],unsigned int DPPPerSurface[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],unsigned int ReturnBusWidth,double * DCFClkDeepSleep)3521 static void CalculateDCFCLKDeepSleep(
3522 const struct dml2_display_cfg *display_cfg,
3523 unsigned int NumberOfActiveSurfaces,
3524 unsigned int BytePerPixelY[],
3525 unsigned int BytePerPixelC[],
3526 unsigned int SwathWidthY[],
3527 unsigned int SwathWidthC[],
3528 unsigned int DPPPerSurface[],
3529 double PSCL_THROUGHPUT[],
3530 double PSCL_THROUGHPUT_CHROMA[],
3531 double Dppclk[],
3532 double ReadBandwidthLuma[],
3533 double ReadBandwidthChroma[],
3534 unsigned int ReturnBusWidth,
3535
3536 // Output
3537 double *DCFClkDeepSleep)
3538 {
3539 double DisplayPipeLineDeliveryTimeLuma;
3540 double DisplayPipeLineDeliveryTimeChroma;
3541 double DCFClkDeepSleepPerSurface[DML2_MAX_PLANES];
3542 double ReadBandwidth = 0.0;
3543
3544 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3545 double pixel_rate_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
3546
3547 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
3548 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_rate_mhz;
3549 } else {
3550 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
3551 }
3552 if (BytePerPixelC[k] == 0) {
3553 DisplayPipeLineDeliveryTimeChroma = 0;
3554 } else {
3555 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
3556 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerSurface[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_rate_mhz;
3557 } else {
3558 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
3559 }
3560 }
3561
3562 if (BytePerPixelC[k] > 0) {
3563 DCFClkDeepSleepPerSurface[k] = math_max2(__DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma,
3564 __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma);
3565 } else {
3566 DCFClkDeepSleepPerSurface[k] = __DML2_CALCS_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma;
3567 }
3568 DCFClkDeepSleepPerSurface[k] = math_max2(DCFClkDeepSleepPerSurface[k], pixel_rate_mhz / 16);
3569
3570 #ifdef __DML_VBA_DEBUG__
3571 dml2_printf("DML::%s: k=%u, PixelClock = %f\n", __func__, k, pixel_rate_mhz);
3572 dml2_printf("DML::%s: k=%u, DCFClkDeepSleepPerSurface = %f\n", __func__, k, DCFClkDeepSleepPerSurface[k]);
3573 #endif
3574 }
3575
3576 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3577 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k];
3578 }
3579
3580 *DCFClkDeepSleep = math_max2(8.0, __DML2_CALCS_DCFCLK_FACTOR__ * ReadBandwidth / (double)ReturnBusWidth);
3581
3582 #ifdef __DML_VBA_DEBUG__
3583 dml2_printf("DML::%s: __DML2_CALCS_DCFCLK_FACTOR__ = %f\n", __func__, __DML2_CALCS_DCFCLK_FACTOR__);
3584 dml2_printf("DML::%s: ReadBandwidth = %f\n", __func__, ReadBandwidth);
3585 dml2_printf("DML::%s: ReturnBusWidth = %u\n", __func__, ReturnBusWidth);
3586 dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, *DCFClkDeepSleep);
3587 #endif
3588
3589 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
3590 *DCFClkDeepSleep = math_max2(*DCFClkDeepSleep, DCFClkDeepSleepPerSurface[k]);
3591 }
3592 dml2_printf("DML::%s: DCFClkDeepSleep = %f (final)\n", __func__, *DCFClkDeepSleep);
3593 }
3594
CalculateWriteBackDelay(enum dml2_source_format_class WritebackPixelFormat,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackVTaps,unsigned int WritebackDestinationWidth,unsigned int WritebackDestinationHeight,unsigned int WritebackSourceHeight,unsigned int HTotal)3595 static double CalculateWriteBackDelay(
3596 enum dml2_source_format_class WritebackPixelFormat,
3597 double WritebackHRatio,
3598 double WritebackVRatio,
3599 unsigned int WritebackVTaps,
3600 unsigned int WritebackDestinationWidth,
3601 unsigned int WritebackDestinationHeight,
3602 unsigned int WritebackSourceHeight,
3603 unsigned int HTotal)
3604 {
3605 double CalculateWriteBackDelay;
3606 double Line_length;
3607 double Output_lines_last_notclamped;
3608 double WritebackVInit;
3609
3610 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2;
3611 Line_length = math_max2((double)WritebackDestinationWidth, math_ceil2((double)WritebackDestinationWidth / 6.0, 1.0) * WritebackVTaps);
3612 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - math_ceil2(((double)WritebackSourceHeight - (double)WritebackVInit) / (double)WritebackVRatio, 1.0);
3613 if (Output_lines_last_notclamped < 0) {
3614 CalculateWriteBackDelay = 0;
3615 } else {
3616 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80;
3617 }
3618 return CalculateWriteBackDelay;
3619 }
3620
CalculateMaxVStartup(bool ptoi_supported,unsigned int vblank_nom_default_us,const struct dml2_timing_cfg * timing,double write_back_delay_us)3621 static unsigned int CalculateMaxVStartup(
3622 bool ptoi_supported,
3623 unsigned int vblank_nom_default_us,
3624 const struct dml2_timing_cfg *timing,
3625 double write_back_delay_us)
3626 {
3627 unsigned int vblank_size = 0;
3628 unsigned int max_vstartup_lines = 0;
3629
3630 double line_time_us = (double)timing->h_total / ((double)timing->pixel_clock_khz / 1000);
3631 unsigned int vblank_actual = timing->v_total - timing->v_active;
3632 unsigned int vblank_nom_default_in_line = (unsigned int)math_floor2((double)vblank_nom_default_us / line_time_us, 1.0);
3633 unsigned int vblank_nom_input = (unsigned int)math_min2(timing->vblank_nom, vblank_nom_default_in_line);
3634 unsigned int vblank_avail = (vblank_nom_input == 0) ? vblank_nom_default_in_line : vblank_nom_input;
3635
3636 vblank_size = (unsigned int)math_min2(vblank_actual, vblank_avail);
3637
3638 if (timing->interlaced && !ptoi_supported)
3639 max_vstartup_lines = (unsigned int)(math_floor2(vblank_size / 2.0, 1.0));
3640 else
3641 max_vstartup_lines = vblank_size - (unsigned int)math_max2(1.0, math_ceil2(write_back_delay_us / line_time_us, 1.0));
3642 #ifdef __DML_VBA_DEBUG__
3643 dml2_printf("DML::%s: VBlankNom = %u\n", __func__, timing->vblank_nom);
3644 dml2_printf("DML::%s: vblank_nom_default_us = %u\n", __func__, vblank_nom_default_us);
3645 dml2_printf("DML::%s: line_time_us = %f\n", __func__, line_time_us);
3646 dml2_printf("DML::%s: vblank_actual = %u\n", __func__, vblank_actual);
3647 dml2_printf("DML::%s: vblank_avail = %u\n", __func__, vblank_avail);
3648 dml2_printf("DML::%s: max_vstartup_lines = %u\n", __func__, max_vstartup_lines);
3649 #endif
3650 return max_vstartup_lines;
3651 }
3652
CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params * p)3653 static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch *scratch,
3654 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *p)
3655 {
3656 unsigned int MaximumSwathHeightY[DML2_MAX_PLANES] = { 0 };
3657 unsigned int MaximumSwathHeightC[DML2_MAX_PLANES] = { 0 };
3658 unsigned int RoundedUpSwathSizeBytesY[DML2_MAX_PLANES] = { 0 };
3659 unsigned int RoundedUpSwathSizeBytesC[DML2_MAX_PLANES] = { 0 };
3660 unsigned int SwathWidthSingleDPP[DML2_MAX_PLANES] = { 0 };
3661 unsigned int SwathWidthSingleDPPChroma[DML2_MAX_PLANES] = { 0 };
3662
3663 unsigned int TotalActiveDPP = 0;
3664 bool NoChromaOrLinear = true;
3665 unsigned int SurfaceDoingUnboundedRequest = 0;
3666 unsigned int DETBufferSizeInKByteForSwathCalculation;
3667
3668 const long TTUFIFODEPTH = 8;
3669 const long MAXIMUMCOMPRESSION = 4;
3670
3671 #ifdef __DML_VBA_DEBUG__
3672 dml2_printf("DML::%s: ForceSingleDPP = %u\n", __func__, p->ForceSingleDPP);
3673 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3674 dml2_printf("DML::%s: DPPPerSurface[%u] = %u\n", __func__, k, p->DPPPerSurface[k]);
3675 }
3676 #endif
3677 CalculateSwathWidth(
3678 p->display_cfg,
3679 p->ForceSingleDPP,
3680 p->NumberOfActiveSurfaces,
3681 p->ODMMode,
3682 p->BytePerPixY,
3683 p->BytePerPixC,
3684 p->Read256BytesBlockHeightY,
3685 p->Read256BytesBlockHeightC,
3686 p->Read256BytesBlockWidthY,
3687 p->Read256BytesBlockWidthC,
3688 p->surf_linear128_l,
3689 p->surf_linear128_c,
3690 p->DPPPerSurface,
3691
3692 // Output
3693 p->req_per_swath_ub_l,
3694 p->req_per_swath_ub_c,
3695 SwathWidthSingleDPP,
3696 SwathWidthSingleDPPChroma,
3697 p->SwathWidth,
3698 p->SwathWidthChroma,
3699 MaximumSwathHeightY,
3700 MaximumSwathHeightC,
3701 p->swath_width_luma_ub,
3702 p->swath_width_chroma_ub);
3703
3704 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3705 p->full_swath_bytes_l[k] = (unsigned int)(p->swath_width_luma_ub[k] * p->BytePerPixDETY[k] * MaximumSwathHeightY[k]);
3706 p->full_swath_bytes_c[k] = (unsigned int)(p->swath_width_chroma_ub[k] * p->BytePerPixDETC[k] * MaximumSwathHeightC[k]);
3707 #ifdef __DML_VBA_DEBUG__
3708 dml2_printf("DML::%s: k=%u DPPPerSurface = %u\n", __func__, k, p->DPPPerSurface[k]);
3709 dml2_printf("DML::%s: k=%u swath_width_luma_ub = %u\n", __func__, k, p->swath_width_luma_ub[k]);
3710 dml2_printf("DML::%s: k=%u BytePerPixDETY = %f\n", __func__, k, p->BytePerPixDETY[k]);
3711 dml2_printf("DML::%s: k=%u MaximumSwathHeightY = %u\n", __func__, k, MaximumSwathHeightY[k]);
3712 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3713 dml2_printf("DML::%s: k=%u swath_width_chroma_ub = %u\n", __func__, k, p->swath_width_chroma_ub[k]);
3714 dml2_printf("DML::%s: k=%u BytePerPixDETC = %f\n", __func__, k, p->BytePerPixDETC[k]);
3715 dml2_printf("DML::%s: k=%u MaximumSwathHeightC = %u\n", __func__, k, MaximumSwathHeightC[k]);
3716 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3717 #endif
3718 if (p->display_cfg->plane_descriptors[k].pixel_format == dml2_420_10) {
3719 p->full_swath_bytes_l[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_l[k], 256));
3720 p->full_swath_bytes_c[k] = (unsigned int)(math_ceil2((double)p->full_swath_bytes_c[k], 256));
3721 }
3722 }
3723
3724 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3725 TotalActiveDPP = TotalActiveDPP + (p->ForceSingleDPP ? 1 : p->DPPPerSurface[k]);
3726 if (p->DPPPerSurface[k] > 0)
3727 SurfaceDoingUnboundedRequest = k;
3728 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format) || p->display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha
3729 || p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3730 NoChromaOrLinear = false;
3731 }
3732 }
3733
3734 *p->UnboundedRequestEnabled = UnboundedRequest(p->display_cfg->overrides.hw.force_unbounded_requesting.enable, p->display_cfg->overrides.hw.force_unbounded_requesting.value, TotalActiveDPP, NoChromaOrLinear);
3735
3736 CalculateDETBufferSize(
3737 &scratch->CalculateDETBufferSize_locals,
3738 p->display_cfg,
3739 p->ForceSingleDPP,
3740 p->NumberOfActiveSurfaces,
3741 *p->UnboundedRequestEnabled,
3742 p->nomDETInKByte,
3743 p->MaxTotalDETInKByte,
3744 p->ConfigReturnBufferSizeInKByte,
3745 p->MinCompressedBufferSizeInKByte,
3746 p->ConfigReturnBufferSegmentSizeInkByte,
3747 p->CompressedBufferSegmentSizeInkByte,
3748 p->ReadBandwidthLuma,
3749 p->ReadBandwidthChroma,
3750 p->full_swath_bytes_l,
3751 p->full_swath_bytes_c,
3752 p->DPPPerSurface,
3753
3754 // Output
3755 p->DETBufferSizeInKByte, // per hubp pipe
3756 p->CompressedBufferSizeInkByte);
3757
3758 #ifdef __DML_VBA_DEBUG__
3759 dml2_printf("DML::%s: TotalActiveDPP = %u\n", __func__, TotalActiveDPP);
3760 dml2_printf("DML::%s: nomDETInKByte = %u\n", __func__, p->nomDETInKByte);
3761 dml2_printf("DML::%s: ConfigReturnBufferSizeInKByte = %u\n", __func__, p->ConfigReturnBufferSizeInKByte);
3762 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, *p->UnboundedRequestEnabled);
3763 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u\n", __func__, *p->CompressedBufferSizeInkByte);
3764 #endif
3765
3766 *p->ViewportSizeSupport = true;
3767 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3768
3769 DETBufferSizeInKByteForSwathCalculation = (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 1024 : p->DETBufferSizeInKByte[k]);
3770 #ifdef __DML_VBA_DEBUG__
3771 dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation = %u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3772 #endif
3773 if (p->display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
3774 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3775 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3776 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3777 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3778
3779 if (p->surf_linear128_l[k])
3780 p->request_size_bytes_luma[k] = 128;
3781 else
3782 p->request_size_bytes_luma[k] = 256;
3783
3784 if (p->surf_linear128_c[k])
3785 p->request_size_bytes_chroma[k] = 128;
3786 else
3787 p->request_size_bytes_chroma[k] = 256;
3788
3789 } else if (p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3790 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3791 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3792 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3793 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3794 p->request_size_bytes_luma[k] = 256;
3795 p->request_size_bytes_chroma[k] = 256;
3796
3797 } else if (p->full_swath_bytes_l[k] >= 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3798 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3799 p->SwathHeightC[k] = MaximumSwathHeightC[k];
3800 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3801 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k];
3802 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3803 p->request_size_bytes_chroma[k] = 256;
3804
3805 } else if (p->full_swath_bytes_l[k] < 1.5 * p->full_swath_bytes_c[k] && p->full_swath_bytes_l[k] + p->full_swath_bytes_c[k] / 2 <= DETBufferSizeInKByteForSwathCalculation * 1024 / 2) {
3806 p->SwathHeightY[k] = MaximumSwathHeightY[k];
3807 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3808 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k];
3809 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3810 p->request_size_bytes_luma[k] = 256;
3811 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3812
3813 } else {
3814 p->SwathHeightY[k] = MaximumSwathHeightY[k] / 2;
3815 p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2;
3816 RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2;
3817 RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2;
3818 p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3819 p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;
3820 }
3821
3822 if (p->SwathHeightC[k] == 0)
3823 p->request_size_bytes_chroma[k] = 0;
3824
3825 if ((p->full_swath_bytes_l[k] / 2 + p->full_swath_bytes_c[k] / 2 > DETBufferSizeInKByteForSwathCalculation * 1024 / 2) ||
3826 p->SwathWidth[k] > p->MaximumSwathWidthLuma[k] || (p->SwathHeightC[k] > 0 && p->SwathWidthChroma[k] > p->MaximumSwathWidthChroma[k])) {
3827 *p->ViewportSizeSupport = false;
3828 dml2_printf("DML::%s: k=%u full_swath_bytes_l=%u\n", __func__, k, p->full_swath_bytes_l[k]);
3829 dml2_printf("DML::%s: k=%u full_swath_bytes_c=%u\n", __func__, k, p->full_swath_bytes_c[k]);
3830 dml2_printf("DML::%s: k=%u DETBufferSizeInKByteForSwathCalculation=%u\n", __func__, k, DETBufferSizeInKByteForSwathCalculation);
3831 dml2_printf("DML::%s: k=%u SwathWidth=%u\n", __func__, k, p->SwathWidth[k]);
3832 dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, p->MaximumSwathWidthLuma[k]);
3833 dml2_printf("DML::%s: k=%u SwathWidthChroma=%d\n", __func__, k, p->SwathWidthChroma[k]);
3834 dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, p->MaximumSwathWidthChroma[k]);
3835 p->ViewportSizeSupportPerSurface[k] = false;
3836 } else {
3837 p->ViewportSizeSupportPerSurface[k] = true;
3838 }
3839
3840 if (p->SwathHeightC[k] == 0) {
3841 #ifdef __DML_VBA_DEBUG__
3842 dml2_printf("DML::%s: k=%u, All DET will be used for plane0\n", __func__, k);
3843 #endif
3844 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024;
3845 p->DETBufferSizeC[k] = 0;
3846 } else if (RoundedUpSwathSizeBytesY[k] <= 1.5 * RoundedUpSwathSizeBytesC[k]) {
3847 #ifdef __DML_VBA_DEBUG__
3848 dml2_printf("DML::%s: k=%u, Half DET will be used for plane0, and half for plane1\n", __func__, k);
3849 #endif
3850 p->DETBufferSizeY[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3851 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 / 2;
3852 } else {
3853 #ifdef __DML_VBA_DEBUG__
3854 dml2_printf("DML::%s: k=%u, 2/3 DET will be used for plane0, and 1/3 for plane1\n", __func__, k);
3855 #endif
3856 p->DETBufferSizeY[k] = (unsigned int)(math_floor2(p->DETBufferSizeInKByte[k] * 1024 * 2 / 3, 1024));
3857 p->DETBufferSizeC[k] = p->DETBufferSizeInKByte[k] * 1024 - p->DETBufferSizeY[k];
3858 }
3859
3860 #ifdef __DML_VBA_DEBUG__
3861 dml2_printf("DML::%s: k=%u SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
3862 dml2_printf("DML::%s: k=%u SwathHeightC = %u\n", __func__, k, p->SwathHeightC[k]);
3863 dml2_printf("DML::%s: k=%u full_swath_bytes_l = %u\n", __func__, k, p->full_swath_bytes_l[k]);
3864 dml2_printf("DML::%s: k=%u full_swath_bytes_c = %u\n", __func__, k, p->full_swath_bytes_c[k]);
3865 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesY = %u\n", __func__, k, RoundedUpSwathSizeBytesY[k]);
3866 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3867 dml2_printf("DML::%s: k=%u DETBufferSizeInKByte = %u\n", __func__, k, p->DETBufferSizeInKByte[k]);
3868 dml2_printf("DML::%s: k=%u DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
3869 dml2_printf("DML::%s: k=%u DETBufferSizeC = %u\n", __func__, k, p->DETBufferSizeC[k]);
3870 dml2_printf("DML::%s: k=%u ViewportSizeSupportPerSurface = %u\n", __func__, k, p->ViewportSizeSupportPerSurface[k]);
3871 #endif
3872
3873 }
3874
3875 *p->compbuf_reserved_space_64b = 2 * p->pixel_chunk_size_kbytes * 1024 / 64;
3876 if (*p->UnboundedRequestEnabled) {
3877 *p->compbuf_reserved_space_64b = (unsigned int)math_ceil2(math_max2(*p->compbuf_reserved_space_64b,
3878 (double)(p->rob_buffer_size_kbytes * 1024 / 64) - (double)(RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest] * TTUFIFODEPTH / (p->mrq_present ? MAXIMUMCOMPRESSION : 1) / 64)), 1.0);
3879 #ifdef __DML_VBA_DEBUG__
3880 dml2_printf("DML::%s: RoundedUpSwathSizeBytesY[%d] = %u\n", __func__, SurfaceDoingUnboundedRequest, RoundedUpSwathSizeBytesY[SurfaceDoingUnboundedRequest]);
3881 dml2_printf("DML::%s: rob_buffer_size_kbytes = %u\n", __func__, p->rob_buffer_size_kbytes);
3882 #endif
3883 }
3884 #ifdef __DML_VBA_DEBUG__
3885 dml2_printf("DML::%s: compbuf_reserved_space_64b = %u\n", __func__, *p->compbuf_reserved_space_64b);
3886 #endif
3887
3888 *p->hw_debug5 = false;
3889 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
3890 if (!(p->mrq_present) && (!(*p->UnboundedRequestEnabled)) && (TotalActiveDPP == 1)
3891 && p->display_cfg->plane_descriptors[k].surface.dcc.enable
3892 && ((p->rob_buffer_size_kbytes * 1024 * (p->mrq_present ? MAXIMUMCOMPRESSION : 1)
3893 + *p->CompressedBufferSizeInkByte * MAXIMUMCOMPRESSION * 1024) > TTUFIFODEPTH * (RoundedUpSwathSizeBytesY[k] + RoundedUpSwathSizeBytesC[k])))
3894 *p->hw_debug5 = true;
3895 #ifdef __DML_VBA_DEBUG__
3896 dml2_printf("DML::%s: k=%u UnboundedRequestEnabled = %u\n", __func__, k, *p->UnboundedRequestEnabled);
3897 dml2_printf("DML::%s: k=%u MAXIMUMCOMPRESSION = %lu\n", __func__, k, MAXIMUMCOMPRESSION);
3898 dml2_printf("DML::%s: k=%u TTUFIFODEPTH = %lu\n", __func__, k, TTUFIFODEPTH);
3899 dml2_printf("DML::%s: k=%u CompressedBufferSizeInkByte = %u\n", __func__, k, *p->CompressedBufferSizeInkByte);
3900 dml2_printf("DML::%s: k=%u RoundedUpSwathSizeBytesC = %u\n", __func__, k, RoundedUpSwathSizeBytesC[k]);
3901 dml2_printf("DML::%s: k=%u hw_debug5 = %u\n", __func__, k, *p->hw_debug5);
3902 #endif
3903 }
3904 }
3905
DecideODMMode(unsigned int HActive,double MaxDispclk,unsigned int MaximumPixelsPerLinePerDSCUnit,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne)3906 static enum dml2_odm_mode DecideODMMode(unsigned int HActive,
3907 double MaxDispclk,
3908 unsigned int MaximumPixelsPerLinePerDSCUnit,
3909 enum dml2_output_format_class OutFormat,
3910 bool UseDSC,
3911 unsigned int NumberOfDSCSlices,
3912 double SurfaceRequiredDISPCLKWithoutODMCombine,
3913 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
3914 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
3915 double SurfaceRequiredDISPCLKWithODMCombineFourToOne)
3916 {
3917 enum dml2_odm_mode MinimumRequiredODMModeForMaxDispClock;
3918 enum dml2_odm_mode MinimumRequiredODMModeForMaxDSCHActive;
3919 enum dml2_odm_mode MinimumRequiredODMModeForMax420HActive;
3920 enum dml2_odm_mode ODMMode = dml2_odm_mode_bypass;
3921
3922 MinimumRequiredODMModeForMaxDispClock =
3923 (SurfaceRequiredDISPCLKWithoutODMCombine <= MaxDispclk) ? dml2_odm_mode_bypass :
3924 (SurfaceRequiredDISPCLKWithODMCombineTwoToOne <= MaxDispclk) ? dml2_odm_mode_combine_2to1 :
3925 (SurfaceRequiredDISPCLKWithODMCombineThreeToOne <= MaxDispclk) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
3926 if (ODMMode < MinimumRequiredODMModeForMaxDispClock)
3927 ODMMode = MinimumRequiredODMModeForMaxDispClock;
3928
3929 if (UseDSC) {
3930 MinimumRequiredODMModeForMaxDSCHActive =
3931 (HActive <= 1 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_bypass :
3932 (HActive <= 2 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_2to1 :
3933 (HActive <= 3 * MaximumPixelsPerLinePerDSCUnit) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
3934 if (ODMMode < MinimumRequiredODMModeForMaxDSCHActive)
3935 ODMMode = MinimumRequiredODMModeForMaxDSCHActive;
3936 }
3937
3938 if (OutFormat == dml2_420) {
3939 MinimumRequiredODMModeForMax420HActive =
3940 (HActive <= 1 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_bypass :
3941 (HActive <= 2 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_2to1 :
3942 (HActive <= 3 * DML2_MAX_FMT_420_BUFFER_WIDTH) ? dml2_odm_mode_combine_3to1 : dml2_odm_mode_combine_4to1;
3943 if (ODMMode < MinimumRequiredODMModeForMax420HActive)
3944 ODMMode = MinimumRequiredODMModeForMax420HActive;
3945 }
3946
3947 if (UseDSC) {
3948 if (ODMMode == dml2_odm_mode_bypass && NumberOfDSCSlices > 4)
3949 ODMMode = dml2_odm_mode_combine_2to1;
3950 if (ODMMode == dml2_odm_mode_combine_2to1 && NumberOfDSCSlices > 8)
3951 ODMMode = dml2_odm_mode_combine_3to1;
3952 if (ODMMode == dml2_odm_mode_combine_3to1 && NumberOfDSCSlices != 12)
3953 ODMMode = dml2_odm_mode_combine_4to1;
3954 }
3955
3956 return ODMMode;
3957 }
3958
CalculateODMConstraints(enum dml2_odm_mode ODMUse,double SurfaceRequiredDISPCLKWithoutODMCombine,double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,double SurfaceRequiredDISPCLKWithODMCombineFourToOne,unsigned int MaximumPixelsPerLinePerDSCUnit,double * DISPCLKRequired,unsigned int * NumberOfDPPRequired,unsigned int * MaxHActiveForDSC,unsigned int * MaxDSCSlices,unsigned int * MaxHActiveFor420)3959 static void CalculateODMConstraints(
3960 enum dml2_odm_mode ODMUse,
3961 double SurfaceRequiredDISPCLKWithoutODMCombine,
3962 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
3963 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
3964 double SurfaceRequiredDISPCLKWithODMCombineFourToOne,
3965 unsigned int MaximumPixelsPerLinePerDSCUnit,
3966 /* Output */
3967 double *DISPCLKRequired,
3968 unsigned int *NumberOfDPPRequired,
3969 unsigned int *MaxHActiveForDSC,
3970 unsigned int *MaxDSCSlices,
3971 unsigned int *MaxHActiveFor420)
3972 {
3973 switch (ODMUse) {
3974 case dml2_odm_mode_combine_2to1:
3975 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
3976 *NumberOfDPPRequired = 2;
3977 break;
3978 case dml2_odm_mode_combine_3to1:
3979 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
3980 *NumberOfDPPRequired = 3;
3981 break;
3982 case dml2_odm_mode_combine_4to1:
3983 *DISPCLKRequired = SurfaceRequiredDISPCLKWithODMCombineFourToOne;
3984 *NumberOfDPPRequired = 4;
3985 break;
3986 case dml2_odm_mode_auto:
3987 case dml2_odm_mode_split_1to2:
3988 case dml2_odm_mode_mso_1to2:
3989 case dml2_odm_mode_mso_1to4:
3990 case dml2_odm_mode_bypass:
3991 default:
3992 *DISPCLKRequired = SurfaceRequiredDISPCLKWithoutODMCombine;
3993 *NumberOfDPPRequired = 1;
3994 break;
3995 }
3996 *MaxHActiveForDSC = *NumberOfDPPRequired * MaximumPixelsPerLinePerDSCUnit;
3997 *MaxDSCSlices = *NumberOfDPPRequired * DML_MAX_NUM_OF_SLICES_PER_DSC;
3998 *MaxHActiveFor420 = *NumberOfDPPRequired * DML2_MAX_FMT_420_BUFFER_WIDTH;
3999 }
4000
ValidateODMMode(enum dml2_odm_mode ODMMode,double MaxDispclk,unsigned int HActive,enum dml2_output_format_class OutFormat,bool UseDSC,unsigned int NumberOfDSCSlices,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double DISPCLKRequired,unsigned int NumberOfDPPRequired,unsigned int MaxHActiveForDSC,unsigned int MaxDSCSlices,unsigned int MaxHActiveFor420)4001 static bool ValidateODMMode(enum dml2_odm_mode ODMMode,
4002 double MaxDispclk,
4003 unsigned int HActive,
4004 enum dml2_output_format_class OutFormat,
4005 bool UseDSC,
4006 unsigned int NumberOfDSCSlices,
4007 unsigned int TotalNumberOfActiveDPP,
4008 unsigned int MaxNumDPP,
4009 double DISPCLKRequired,
4010 unsigned int NumberOfDPPRequired,
4011 unsigned int MaxHActiveForDSC,
4012 unsigned int MaxDSCSlices,
4013 unsigned int MaxHActiveFor420)
4014 {
4015 bool are_odm_segments_symmetrical = (ODMMode == dml2_odm_mode_combine_3to1) ? UseDSC : true;
4016 bool is_max_dsc_slice_required = (ODMMode == dml2_odm_mode_combine_3to1);
4017 unsigned int pixels_per_clock_cycle = (OutFormat == dml2_420 || OutFormat == dml2_n422) ? 2 : 1;
4018 unsigned int h_timing_div_mode =
4019 (ODMMode == dml2_odm_mode_combine_4to1 || ODMMode == dml2_odm_mode_combine_3to1) ? 4 :
4020 (ODMMode == dml2_odm_mode_combine_2to1) ? 2 : pixels_per_clock_cycle;
4021
4022 if (DISPCLKRequired > MaxDispclk)
4023 return false;
4024 if ((TotalNumberOfActiveDPP + NumberOfDPPRequired) > MaxNumDPP)
4025 return false;
4026 if (are_odm_segments_symmetrical) {
4027 if (HActive % (NumberOfDPPRequired * pixels_per_clock_cycle))
4028 return false;
4029 }
4030 if (HActive % h_timing_div_mode)
4031 /*
4032 * TODO - OTG_H_TOTAL, OTG_H_BLANK_START/END and
4033 * OTG_H_SYNC_A_START/END all need to be visible by h timing div
4034 * mode. This logic only checks H active.
4035 */
4036 return false;
4037
4038 if (UseDSC) {
4039 if (HActive > MaxHActiveForDSC)
4040 return false;
4041 if (NumberOfDSCSlices > MaxDSCSlices)
4042 return false;
4043 if (HActive % NumberOfDSCSlices)
4044 return false;
4045 if (NumberOfDSCSlices % NumberOfDPPRequired)
4046 return false;
4047 if (is_max_dsc_slice_required) {
4048 if (NumberOfDSCSlices != MaxDSCSlices)
4049 return false;
4050 }
4051 }
4052
4053 if (OutFormat == dml2_420) {
4054 if (HActive > MaxHActiveFor420)
4055 return false;
4056 }
4057
4058 return true;
4059 }
4060
CalculateODMMode(unsigned int MaximumPixelsPerLinePerDSCUnit,unsigned int HActive,enum dml2_output_format_class OutFormat,enum dml2_output_encoder_class Output,enum dml2_odm_mode ODMUse,double MaxDispclk,bool DSCEnable,unsigned int TotalNumberOfActiveDPP,unsigned int MaxNumDPP,double PixelClock,unsigned int NumberOfDSCSlices,bool * TotalAvailablePipesSupport,unsigned int * NumberOfDPP,enum dml2_odm_mode * ODMMode,double * RequiredDISPCLKPerSurface)4061 static void CalculateODMMode(
4062 unsigned int MaximumPixelsPerLinePerDSCUnit,
4063 unsigned int HActive,
4064 enum dml2_output_format_class OutFormat,
4065 enum dml2_output_encoder_class Output,
4066 enum dml2_odm_mode ODMUse,
4067 double MaxDispclk,
4068 bool DSCEnable,
4069 unsigned int TotalNumberOfActiveDPP,
4070 unsigned int MaxNumDPP,
4071 double PixelClock,
4072 unsigned int NumberOfDSCSlices,
4073
4074 // Output
4075 bool *TotalAvailablePipesSupport,
4076 unsigned int *NumberOfDPP,
4077 enum dml2_odm_mode *ODMMode,
4078 double *RequiredDISPCLKPerSurface)
4079 {
4080 double SurfaceRequiredDISPCLKWithoutODMCombine;
4081 double SurfaceRequiredDISPCLKWithODMCombineTwoToOne;
4082 double SurfaceRequiredDISPCLKWithODMCombineThreeToOne;
4083 double SurfaceRequiredDISPCLKWithODMCombineFourToOne;
4084 double DISPCLKRequired;
4085 unsigned int NumberOfDPPRequired;
4086 unsigned int MaxHActiveForDSC;
4087 unsigned int MaxDSCSlices;
4088 unsigned int MaxHActiveFor420;
4089 bool success;
4090 bool UseDSC = DSCEnable && (NumberOfDSCSlices > 0);
4091 enum dml2_odm_mode DecidedODMMode;
4092
4093 SurfaceRequiredDISPCLKWithoutODMCombine = CalculateRequiredDispclk(dml2_odm_mode_bypass, PixelClock);
4094 SurfaceRequiredDISPCLKWithODMCombineTwoToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_2to1, PixelClock);
4095 SurfaceRequiredDISPCLKWithODMCombineThreeToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_3to1, PixelClock);
4096 SurfaceRequiredDISPCLKWithODMCombineFourToOne = CalculateRequiredDispclk(dml2_odm_mode_combine_4to1, PixelClock);
4097 #ifdef __DML_VBA_DEBUG__
4098 dml2_printf("DML::%s: ODMUse = %d\n", __func__, ODMUse);
4099 dml2_printf("DML::%s: Output = %d\n", __func__, Output);
4100 dml2_printf("DML::%s: DSCEnable = %d\n", __func__, DSCEnable);
4101 dml2_printf("DML::%s: MaxDispclk = %f\n", __func__, MaxDispclk);
4102 dml2_printf("DML::%s: MaximumPixelsPerLinePerDSCUnit = %d\n", __func__, MaximumPixelsPerLinePerDSCUnit);
4103 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithoutODMCombine = %f\n", __func__, SurfaceRequiredDISPCLKWithoutODMCombine);
4104 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineTwoToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineTwoToOne);
4105 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineThreeToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineThreeToOne);
4106 dml2_printf("DML::%s: SurfaceRequiredDISPCLKWithODMCombineFourToOne = %f\n", __func__, SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4107 #endif
4108 if (ODMUse == dml2_odm_mode_auto)
4109 DecidedODMMode = DecideODMMode(HActive,
4110 MaxDispclk,
4111 MaximumPixelsPerLinePerDSCUnit,
4112 OutFormat,
4113 UseDSC,
4114 NumberOfDSCSlices,
4115 SurfaceRequiredDISPCLKWithoutODMCombine,
4116 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4117 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4118 SurfaceRequiredDISPCLKWithODMCombineFourToOne);
4119 else
4120 DecidedODMMode = ODMUse;
4121 CalculateODMConstraints(DecidedODMMode,
4122 SurfaceRequiredDISPCLKWithoutODMCombine,
4123 SurfaceRequiredDISPCLKWithODMCombineTwoToOne,
4124 SurfaceRequiredDISPCLKWithODMCombineThreeToOne,
4125 SurfaceRequiredDISPCLKWithODMCombineFourToOne,
4126 MaximumPixelsPerLinePerDSCUnit,
4127 &DISPCLKRequired,
4128 &NumberOfDPPRequired,
4129 &MaxHActiveForDSC,
4130 &MaxDSCSlices,
4131 &MaxHActiveFor420);
4132 success = ValidateODMMode(DecidedODMMode,
4133 MaxDispclk,
4134 HActive,
4135 OutFormat,
4136 UseDSC,
4137 NumberOfDSCSlices,
4138 TotalNumberOfActiveDPP,
4139 MaxNumDPP,
4140 DISPCLKRequired,
4141 NumberOfDPPRequired,
4142 MaxHActiveForDSC,
4143 MaxDSCSlices,
4144 MaxHActiveFor420);
4145
4146 *ODMMode = DecidedODMMode;
4147 *TotalAvailablePipesSupport = success;
4148 *NumberOfDPP = NumberOfDPPRequired;
4149 *RequiredDISPCLKPerSurface = success ? DISPCLKRequired : 0;
4150 #ifdef __DML_VBA_DEBUG__
4151 dml2_printf("DML::%s: ODMMode = %d\n", __func__, *ODMMode);
4152 dml2_printf("DML::%s: NumberOfDPP = %d\n", __func__, *NumberOfDPP);
4153 dml2_printf("DML::%s: TotalAvailablePipesSupport = %d\n", __func__, *TotalAvailablePipesSupport);
4154 dml2_printf("DML::%s: RequiredDISPCLKPerSurface = %f\n", __func__, *RequiredDISPCLKPerSurface);
4155 #endif
4156 }
4157
CalculateOutputLink(struct dml2_core_internal_scratch * s,double PHYCLK,double PHYCLKD18,double PHYCLKD32,double Downspreading,enum dml2_output_encoder_class Output,enum dml2_output_format_class OutputFormat,unsigned int HTotal,unsigned int HActive,double PixelClockBackEnd,double ForcedOutputLinkBPP,unsigned int DSCInputBitPerComponent,unsigned int NumberOfDSCSlices,double AudioSampleRate,unsigned int AudioSampleLayout,enum dml2_odm_mode ODMModeNoDSC,enum dml2_odm_mode ODMModeDSC,enum dml2_dsc_enable_option DSCEnable,unsigned int OutputLinkDPLanes,enum dml2_output_link_dp_rate OutputLinkDPRate,bool * RequiresDSC,bool * RequiresFEC,double * OutBpp,enum dml2_core_internal_output_type * OutputType,enum dml2_core_internal_output_type_rate * OutputRate,unsigned int * RequiredSlots)4158 static void CalculateOutputLink(
4159 struct dml2_core_internal_scratch *s,
4160 double PHYCLK,
4161 double PHYCLKD18,
4162 double PHYCLKD32,
4163 double Downspreading,
4164 enum dml2_output_encoder_class Output,
4165 enum dml2_output_format_class OutputFormat,
4166 unsigned int HTotal,
4167 unsigned int HActive,
4168 double PixelClockBackEnd,
4169 double ForcedOutputLinkBPP,
4170 unsigned int DSCInputBitPerComponent,
4171 unsigned int NumberOfDSCSlices,
4172 double AudioSampleRate,
4173 unsigned int AudioSampleLayout,
4174 enum dml2_odm_mode ODMModeNoDSC,
4175 enum dml2_odm_mode ODMModeDSC,
4176 enum dml2_dsc_enable_option DSCEnable,
4177 unsigned int OutputLinkDPLanes,
4178 enum dml2_output_link_dp_rate OutputLinkDPRate,
4179
4180 // Output
4181 bool *RequiresDSC,
4182 bool *RequiresFEC,
4183 double *OutBpp,
4184 enum dml2_core_internal_output_type *OutputType,
4185 enum dml2_core_internal_output_type_rate *OutputRate,
4186 unsigned int *RequiredSlots)
4187 {
4188 bool LinkDSCEnable;
4189 unsigned int dummy;
4190 *RequiresDSC = false;
4191 *RequiresFEC = false;
4192 *OutBpp = 0;
4193
4194 *OutputType = dml2_core_internal_output_type_unknown;
4195 *OutputRate = dml2_core_internal_output_rate_unknown;
4196
4197 #ifdef __DML_VBA_DEBUG__
4198 dml2_printf("DML::%s: DSCEnable = %u (dis, en, en_if_necessary)\n", __func__, DSCEnable);
4199 dml2_printf("DML::%s: PHYCLK = %f\n", __func__, PHYCLK);
4200 dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4201 dml2_printf("DML::%s: AudioSampleRate = %f\n", __func__, AudioSampleRate);
4202 dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
4203 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
4204 dml2_printf("DML::%s: ODMModeNoDSC = %u\n", __func__, ODMModeNoDSC);
4205 dml2_printf("DML::%s: ODMModeDSC = %u\n", __func__, ODMModeDSC);
4206 dml2_printf("DML::%s: ForcedOutputLinkBPP = %f\n", __func__, ForcedOutputLinkBPP);
4207 dml2_printf("DML::%s: Output (encoder) = %u\n", __func__, Output);
4208 dml2_printf("DML::%s: OutputLinkDPRate = %u\n", __func__, OutputLinkDPRate);
4209 #endif
4210 {
4211 if (Output == dml2_hdmi) {
4212 *RequiresDSC = false;
4213 *RequiresFEC = false;
4214 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, math_min2(600, PHYCLK) * 10, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, false, Output,
4215 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4216 //OutputTypeAndRate = "HDMI";
4217 *OutputType = dml2_core_internal_output_type_hdmi;
4218 } else if (Output == dml2_dp || Output == dml2_dp2p0 || Output == dml2_edp) {
4219 if (DSCEnable == dml2_dsc_enable) {
4220 *RequiresDSC = true;
4221 LinkDSCEnable = true;
4222 if (Output == dml2_dp || Output == dml2_dp2p0) {
4223 *RequiresFEC = true;
4224 } else {
4225 *RequiresFEC = false;
4226 }
4227 } else {
4228 *RequiresDSC = false;
4229 LinkDSCEnable = false;
4230 if (Output == dml2_dp2p0) {
4231 *RequiresFEC = true;
4232 } else {
4233 *RequiresFEC = false;
4234 }
4235 }
4236 if (Output == dml2_dp2p0) {
4237 *OutBpp = 0;
4238 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr10) && PHYCLKD32 >= 10000.0 / 32) {
4239 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4240 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4241 if (*OutBpp == 0 && PHYCLKD32 < 13500.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4242 *RequiresDSC = true;
4243 LinkDSCEnable = true;
4244 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 10000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4245 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4246 }
4247 //OutputTypeAndRate = Output & " UHBR10";
4248 *OutputType = dml2_core_internal_output_type_dp2p0;
4249 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr10;
4250 }
4251 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr13p5) && *OutBpp == 0 && PHYCLKD32 >= 13500.0 / 32) {
4252 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4253 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4254
4255 if (*OutBpp == 0 && PHYCLKD32 < 20000.0 / 32 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4256 *RequiresDSC = true;
4257 LinkDSCEnable = true;
4258 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 13500, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4259 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4260 }
4261 //OutputTypeAndRate = Output & " UHBR13p5";
4262 *OutputType = dml2_core_internal_output_type_dp2p0;
4263 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr13p5;
4264 }
4265 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_uhbr20) && *OutBpp == 0 && PHYCLKD32 >= 20000.0 / 32) {
4266 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4267 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4268 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4269 *RequiresDSC = true;
4270 LinkDSCEnable = true;
4271 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 20000, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4272 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4273 }
4274 //OutputTypeAndRate = Output & " UHBR20";
4275 *OutputType = dml2_core_internal_output_type_dp2p0;
4276 *OutputRate = dml2_core_internal_output_rate_dp_rate_uhbr20;
4277 }
4278 } else { // output is dp or edp
4279 *OutBpp = 0;
4280 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr) && PHYCLK >= 270) {
4281 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4282 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4283 if (*OutBpp == 0 && PHYCLK < 540 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4284 *RequiresDSC = true;
4285 LinkDSCEnable = true;
4286 if (Output == dml2_dp) {
4287 *RequiresFEC = true;
4288 }
4289 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 2700, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4290 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4291 }
4292 //OutputTypeAndRate = Output & " HBR";
4293 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4294 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr;
4295 }
4296 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr2) && *OutBpp == 0 && PHYCLK >= 540) {
4297 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4298 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4299
4300 if (*OutBpp == 0 && PHYCLK < 810 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4301 *RequiresDSC = true;
4302 LinkDSCEnable = true;
4303 if (Output == dml2_dp) {
4304 *RequiresFEC = true;
4305 }
4306 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 5400, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4307 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4308 }
4309 //OutputTypeAndRate = Output & " HBR2";
4310 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4311 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr2;
4312 }
4313 if ((OutputLinkDPRate == dml2_dp_rate_na || OutputLinkDPRate == dml2_dp_rate_hbr3) && *OutBpp == 0 && PHYCLK >= 810) { // VBA_ERROR, vba code doesn't have hbr3 check
4314 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4315 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4316
4317 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4318 *RequiresDSC = true;
4319 LinkDSCEnable = true;
4320 if (Output == dml2_dp) {
4321 *RequiresFEC = true;
4322 }
4323 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, (1 - Downspreading / 100) * 8100, OutputLinkDPLanes, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output,
4324 OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, RequiredSlots);
4325 }
4326 //OutputTypeAndRate = Output & " HBR3";
4327 *OutputType = (Output == dml2_dp) ? dml2_core_internal_output_type_dp : dml2_core_internal_output_type_edp;
4328 *OutputRate = dml2_core_internal_output_rate_dp_rate_hbr3;
4329 }
4330 }
4331 } else if (Output == dml2_hdmifrl) {
4332 if (DSCEnable == dml2_dsc_enable) {
4333 *RequiresDSC = true;
4334 LinkDSCEnable = true;
4335 *RequiresFEC = true;
4336 } else {
4337 *RequiresDSC = false;
4338 LinkDSCEnable = false;
4339 *RequiresFEC = false;
4340 }
4341 *OutBpp = 0;
4342 if (PHYCLKD18 >= 3000.0 / 18) {
4343 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 3000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4344 //OutputTypeAndRate = Output & "3x3";
4345 *OutputType = dml2_core_internal_output_type_hdmifrl;
4346 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_3x3;
4347 }
4348 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4349 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 3, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4350 //OutputTypeAndRate = Output & "6x3";
4351 *OutputType = dml2_core_internal_output_type_hdmifrl;
4352 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x3;
4353 }
4354 if (*OutBpp == 0 && PHYCLKD18 >= 6000.0 / 18) {
4355 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 6000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4356 //OutputTypeAndRate = Output & "6x4";
4357 *OutputType = dml2_core_internal_output_type_hdmifrl;
4358 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_6x4;
4359 }
4360 if (*OutBpp == 0 && PHYCLKD18 >= 8000.0 / 18) {
4361 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 8000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4362 //OutputTypeAndRate = Output & "8x4";
4363 *OutputType = dml2_core_internal_output_type_hdmifrl;
4364 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_8x4;
4365 }
4366 if (*OutBpp == 0 && PHYCLKD18 >= 10000.0 / 18) {
4367 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4368 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0 && PHYCLKD18 < 12000.0 / 18) {
4369 *RequiresDSC = true;
4370 LinkDSCEnable = true;
4371 *RequiresFEC = true;
4372 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 10000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4373 }
4374 //OutputTypeAndRate = Output & "10x4";
4375 *OutputType = dml2_core_internal_output_type_hdmifrl;
4376 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_10x4;
4377 }
4378 if (*OutBpp == 0 && PHYCLKD18 >= 12000.0 / 18) {
4379 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4380 if (*OutBpp == 0 && DSCEnable == dml2_dsc_enable_if_necessary && ForcedOutputLinkBPP == 0) {
4381 *RequiresDSC = true;
4382 LinkDSCEnable = true;
4383 *RequiresFEC = true;
4384 *OutBpp = TruncToValidBPP(&s->TruncToValidBPP_locals, 12000, 4, HTotal, HActive, PixelClockBackEnd, ForcedOutputLinkBPP, LinkDSCEnable, Output, OutputFormat, DSCInputBitPerComponent, NumberOfDSCSlices, (unsigned int)AudioSampleRate, AudioSampleLayout, ODMModeNoDSC, ODMModeDSC, &dummy);
4385 }
4386 //OutputTypeAndRate = Output & "12x4";
4387 *OutputType = dml2_core_internal_output_type_hdmifrl;
4388 *OutputRate = dml2_core_internal_output_rate_hdmi_rate_12x4;
4389 }
4390 }
4391 }
4392 #ifdef __DML_VBA_DEBUG__
4393 dml2_printf("DML::%s: RequiresDSC = %u\n", __func__, *RequiresDSC);
4394 dml2_printf("DML::%s: RequiresFEC = %u\n", __func__, *RequiresFEC);
4395 dml2_printf("DML::%s: OutBpp = %f\n", __func__, *OutBpp);
4396 #endif
4397 }
4398
CalculateWriteBackDISPCLK(enum dml2_source_format_class WritebackPixelFormat,double PixelClock,double WritebackHRatio,double WritebackVRatio,unsigned int WritebackHTaps,unsigned int WritebackVTaps,unsigned int WritebackSourceWidth,unsigned int WritebackDestinationWidth,unsigned int HTotal,unsigned int WritebackLineBufferSize)4399 static double CalculateWriteBackDISPCLK(
4400 enum dml2_source_format_class WritebackPixelFormat,
4401 double PixelClock,
4402 double WritebackHRatio,
4403 double WritebackVRatio,
4404 unsigned int WritebackHTaps,
4405 unsigned int WritebackVTaps,
4406 unsigned int WritebackSourceWidth,
4407 unsigned int WritebackDestinationWidth,
4408 unsigned int HTotal,
4409 unsigned int WritebackLineBufferSize)
4410 {
4411 double DISPCLK_H, DISPCLK_V, DISPCLK_HB;
4412
4413 DISPCLK_H = PixelClock * math_ceil2((double)WritebackHTaps / 8.0, 1) / WritebackHRatio;
4414 DISPCLK_V = PixelClock * (WritebackVTaps * math_ceil2((double)WritebackDestinationWidth / 6.0, 1) + 8.0) / (double)HTotal;
4415 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / (double)WritebackSourceWidth;
4416 return math_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB);
4417 }
4418
RequiredDTBCLK(bool DSCEnable,double PixelClock,enum dml2_output_format_class OutputFormat,double OutputBpp,unsigned int DSCSlices,unsigned int HTotal,unsigned int HActive,unsigned int AudioRate,unsigned int AudioLayout)4419 static double RequiredDTBCLK(
4420 bool DSCEnable,
4421 double PixelClock,
4422 enum dml2_output_format_class OutputFormat,
4423 double OutputBpp,
4424 unsigned int DSCSlices,
4425 unsigned int HTotal,
4426 unsigned int HActive,
4427 unsigned int AudioRate,
4428 unsigned int AudioLayout)
4429 {
4430 if (DSCEnable != true) {
4431 return math_max2(PixelClock / 4.0 * OutputBpp / 24.0, 25.0);
4432 } else {
4433 double PixelWordRate = PixelClock / (OutputFormat == dml2_444 ? 1 : 2);
4434 double HCActive = math_ceil2(DSCSlices * math_ceil2(OutputBpp * math_ceil2(HActive / DSCSlices, 1) / 8.0, 1) / 3.0, 1);
4435 double HCBlank = 64 + 32 * math_ceil2(AudioRate * (AudioLayout == 1 ? 1 : 0.25) * HTotal / (PixelClock * 1000), 1);
4436 double AverageTribyteRate = PixelWordRate * (HCActive + HCBlank) / HTotal;
4437 double HActiveTribyteRate = PixelWordRate * HCActive / HActive;
4438 return math_max4(PixelWordRate / 4.0, AverageTribyteRate / 4.0, HActiveTribyteRate / 4.0, 25.0) * 1.002;
4439 }
4440 }
4441
DSCDelayRequirement(bool DSCEnabled,enum dml2_odm_mode ODMMode,unsigned int DSCInputBitPerComponent,double OutputBpp,unsigned int HActive,unsigned int HTotal,unsigned int NumberOfDSCSlices,enum dml2_output_format_class OutputFormat,enum dml2_output_encoder_class Output,double PixelClock,double PixelClockBackEnd)4442 static unsigned int DSCDelayRequirement(
4443 bool DSCEnabled,
4444 enum dml2_odm_mode ODMMode,
4445 unsigned int DSCInputBitPerComponent,
4446 double OutputBpp,
4447 unsigned int HActive,
4448 unsigned int HTotal,
4449 unsigned int NumberOfDSCSlices,
4450 enum dml2_output_format_class OutputFormat,
4451 enum dml2_output_encoder_class Output,
4452 double PixelClock,
4453 double PixelClockBackEnd)
4454 {
4455 unsigned int DSCDelayRequirement_val = 0;
4456 unsigned int NumberOfDSCSlicesFactor = 1;
4457
4458 if (DSCEnabled == true && OutputBpp != 0) {
4459
4460 if (ODMMode == dml2_odm_mode_combine_4to1)
4461 NumberOfDSCSlicesFactor = 4;
4462 else if (ODMMode == dml2_odm_mode_combine_3to1)
4463 NumberOfDSCSlicesFactor = 3;
4464 else if (ODMMode == dml2_odm_mode_combine_2to1)
4465 NumberOfDSCSlicesFactor = 2;
4466
4467 DSCDelayRequirement_val = NumberOfDSCSlicesFactor * (dscceComputeDelay(DSCInputBitPerComponent, OutputBpp, (unsigned int)(math_ceil2((double)HActive / (double)NumberOfDSCSlices, 1.0)),
4468 (NumberOfDSCSlices / NumberOfDSCSlicesFactor), OutputFormat, Output) + dscComputeDelay(OutputFormat, Output));
4469
4470 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val + (HTotal - HActive) * math_ceil2((double)DSCDelayRequirement_val / (double)HActive, 1.0));
4471 DSCDelayRequirement_val = (unsigned int)(DSCDelayRequirement_val * PixelClock / PixelClockBackEnd);
4472
4473 } else {
4474 DSCDelayRequirement_val = 0;
4475 }
4476 #ifdef __DML_VBA_DEBUG__
4477 dml2_printf("DML::%s: DSCEnabled= %u\n", __func__, DSCEnabled);
4478 dml2_printf("DML::%s: ODMMode = %u\n", __func__, ODMMode);
4479 dml2_printf("DML::%s: OutputBpp = %f\n", __func__, OutputBpp);
4480 dml2_printf("DML::%s: HActive = %u\n", __func__, HActive);
4481 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
4482 dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4483 dml2_printf("DML::%s: PixelClockBackEnd = %f\n", __func__, PixelClockBackEnd);
4484 dml2_printf("DML::%s: OutputFormat = %u\n", __func__, OutputFormat);
4485 dml2_printf("DML::%s: DSCInputBitPerComponent = %u\n", __func__, DSCInputBitPerComponent);
4486 dml2_printf("DML::%s: NumberOfDSCSlices = %u\n", __func__, NumberOfDSCSlices);
4487 dml2_printf("DML::%s: DSCDelayRequirement_val = %u\n", __func__, DSCDelayRequirement_val);
4488 #endif
4489
4490 return DSCDelayRequirement_val;
4491 }
4492
CalculateSurfaceSizeInMall(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int MALLAllocatedForDCN,unsigned int BytesPerPixelY[],unsigned int BytesPerPixelC[],unsigned int Read256BytesBlockWidthY[],unsigned int Read256BytesBlockWidthC[],unsigned int Read256BytesBlockHeightY[],unsigned int Read256BytesBlockHeightC[],unsigned int ReadBlockWidthY[],unsigned int ReadBlockWidthC[],unsigned int ReadBlockHeightY[],unsigned int ReadBlockHeightC[],unsigned int SurfaceSizeInMALL[],bool * ExceededMALLSize)4493 static void CalculateSurfaceSizeInMall(
4494 const struct dml2_display_cfg *display_cfg,
4495 unsigned int NumberOfActiveSurfaces,
4496 unsigned int MALLAllocatedForDCN,
4497 unsigned int BytesPerPixelY[],
4498 unsigned int BytesPerPixelC[],
4499 unsigned int Read256BytesBlockWidthY[],
4500 unsigned int Read256BytesBlockWidthC[],
4501 unsigned int Read256BytesBlockHeightY[],
4502 unsigned int Read256BytesBlockHeightC[],
4503 unsigned int ReadBlockWidthY[],
4504 unsigned int ReadBlockWidthC[],
4505 unsigned int ReadBlockHeightY[],
4506 unsigned int ReadBlockHeightC[],
4507
4508 // Output
4509 unsigned int SurfaceSizeInMALL[],
4510 bool *ExceededMALLSize)
4511 {
4512 unsigned int TotalSurfaceSizeInMALLForSS = 0;
4513 unsigned int TotalSurfaceSizeInMALLForSubVP = 0;
4514 unsigned int MALLAllocatedForDCNInBytes = MALLAllocatedForDCN * 1024 * 1024;
4515
4516 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4517 const struct dml2_composition_cfg *composition = &display_cfg->plane_descriptors[k].composition;
4518 const struct dml2_surface_cfg *surface = &display_cfg->plane_descriptors[k].surface;
4519
4520 if (composition->viewport.stationary) {
4521 SurfaceSizeInMALL[k] = (unsigned int)(math_min2(math_ceil2((double)surface->plane0.width, ReadBlockWidthY[k]),
4522 math_floor2(composition->viewport.plane0.x_start + composition->viewport.plane0.width + ReadBlockWidthY[k] - 1, ReadBlockWidthY[k]) -
4523 math_floor2((double)composition->viewport.plane0.x_start, ReadBlockWidthY[k])) *
4524 math_min2(math_ceil2((double)surface->plane0.height, ReadBlockHeightY[k]),
4525 math_floor2((double)composition->viewport.plane0.y_start + composition->viewport.plane0.height + ReadBlockHeightY[k] - 1, ReadBlockHeightY[k]) -
4526 math_floor2((double)composition->viewport.plane0.y_start, ReadBlockHeightY[k])) * BytesPerPixelY[k]);
4527
4528 if (ReadBlockWidthC[k] > 0) {
4529 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4530 math_min2(math_ceil2((double)surface->plane1.width, ReadBlockWidthC[k]),
4531 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.width + ReadBlockWidthC[k] - 1, ReadBlockWidthC[k]) -
4532 math_floor2((double)composition->viewport.plane1.y_start, ReadBlockWidthC[k])) *
4533 math_min2(math_ceil2((double)surface->plane1.height, ReadBlockHeightC[k]),
4534 math_floor2((double)composition->viewport.plane1.y_start + composition->viewport.plane1.height + ReadBlockHeightC[k] - 1, ReadBlockHeightC[k]) -
4535 math_floor2(composition->viewport.plane1.y_start, ReadBlockHeightC[k])) * BytesPerPixelC[k]);
4536 }
4537 } else {
4538 SurfaceSizeInMALL[k] = (unsigned int)(math_ceil2(math_min2(surface->plane0.width, composition->viewport.plane0.width + ReadBlockWidthY[k] - 1), ReadBlockWidthY[k]) *
4539 math_ceil2(math_min2(surface->plane0.height, composition->viewport.plane0.height + ReadBlockHeightY[k] - 1), ReadBlockHeightY[k]) * BytesPerPixelY[k]);
4540 if (ReadBlockWidthC[k] > 0) {
4541 SurfaceSizeInMALL[k] = (unsigned int)(SurfaceSizeInMALL[k] +
4542 math_ceil2(math_min2(surface->plane1.width, composition->viewport.plane1.width + ReadBlockWidthC[k] - 1), ReadBlockWidthC[k]) *
4543 math_ceil2(math_min2(surface->plane1.height, composition->viewport.plane1.height + ReadBlockHeightC[k] - 1), ReadBlockHeightC[k]) * BytesPerPixelC[k]);
4544 }
4545 }
4546 }
4547
4548 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4549 /* SS and Subvp counted separate as they are never used at the same time */
4550 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
4551 TotalSurfaceSizeInMALLForSubVP += SurfaceSizeInMALL[k];
4552 else if (display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable)
4553 TotalSurfaceSizeInMALLForSS += SurfaceSizeInMALL[k];
4554 }
4555
4556 *ExceededMALLSize = (TotalSurfaceSizeInMALLForSS > MALLAllocatedForDCNInBytes) ||
4557 (TotalSurfaceSizeInMALLForSubVP > MALLAllocatedForDCNInBytes);
4558
4559 #ifdef __DML_VBA_DEBUG__
4560 dml2_printf("DML::%s: MALLAllocatedForDCN = %u\n", __func__, MALLAllocatedForDCN * 1024 * 1024);
4561 dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSubVP = %u\n", __func__, TotalSurfaceSizeInMALLForSubVP);
4562 dml2_printf("DML::%s: TotalSurfaceSizeInMALLForSS = %u\n", __func__, TotalSurfaceSizeInMALLForSS);
4563 dml2_printf("DML::%s: ExceededMALLSize = %u\n", __func__, *ExceededMALLSize);
4564 #endif
4565 }
4566
calculate_tdlut_setting(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_calculate_tdlut_setting_params * p)4567 static void calculate_tdlut_setting(
4568 struct dml2_core_internal_scratch *scratch,
4569 struct dml2_core_calcs_calculate_tdlut_setting_params *p)
4570 {
4571 // locals
4572 unsigned int tdlut_bpe = 8;
4573 unsigned int tdlut_width;
4574 unsigned int tdlut_pitch_bytes;
4575 unsigned int tdlut_footprint_bytes;
4576 unsigned int vmpg_bytes;
4577 unsigned int tdlut_vmpg_per_frame;
4578 unsigned int tdlut_pte_req_per_frame;
4579 unsigned int tdlut_bytes_per_line;
4580 unsigned int tdlut_delivery_cycles;
4581 double tdlut_drain_rate;
4582 unsigned int tdlut_mpc_width;
4583 unsigned int tdlut_bytes_per_group_simple;
4584
4585 if (!p->setup_for_tdlut) {
4586 *p->tdlut_groups_per_2row_ub = 0;
4587 *p->tdlut_opt_time = 0;
4588 *p->tdlut_drain_time = 0;
4589 *p->tdlut_bytes_per_group = 0;
4590 *p->tdlut_pte_bytes_per_frame = 0;
4591 *p->tdlut_bytes_per_frame = 0;
4592 return;
4593 }
4594
4595 if (p->tdlut_mpc_width_flag) {
4596 tdlut_mpc_width = 33;
4597 tdlut_bytes_per_group_simple = 39*256;
4598 } else {
4599 tdlut_mpc_width = 17;
4600 tdlut_bytes_per_group_simple = 10*256;
4601 }
4602
4603 vmpg_bytes = p->gpuvm_page_size_kbytes * 1024;
4604
4605 if (p->tdlut_addressing_mode == dml2_tdlut_simple_linear) {
4606 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4607 tdlut_width = 4916;
4608 else
4609 tdlut_width = 35940;
4610 } else {
4611 if (p->tdlut_width_mode == dml2_tdlut_width_17_cube)
4612 tdlut_width = 17;
4613 else // dml2_tdlut_width_33_cube
4614 tdlut_width = 33;
4615 }
4616
4617 if (p->is_gfx11)
4618 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256); //256B alignment
4619 else
4620 tdlut_pitch_bytes = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 128); //128B alignment
4621
4622 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear)
4623 tdlut_footprint_bytes = tdlut_pitch_bytes * tdlut_width * tdlut_width;
4624 else
4625 tdlut_footprint_bytes = tdlut_pitch_bytes;
4626
4627 if (!p->gpuvm_enable) {
4628 tdlut_vmpg_per_frame = 0;
4629 tdlut_pte_req_per_frame = 0;
4630 } else {
4631 tdlut_vmpg_per_frame = (unsigned int)math_ceil2(tdlut_footprint_bytes - 1, vmpg_bytes) / vmpg_bytes + 1;
4632 tdlut_pte_req_per_frame = (unsigned int)math_ceil2(tdlut_vmpg_per_frame - 1, 8) / 8 + 1;
4633 }
4634 tdlut_bytes_per_line = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 64); //64b request
4635 *p->tdlut_pte_bytes_per_frame = tdlut_pte_req_per_frame * 64;
4636
4637 if (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) {
4638 //the tdlut_width is either 17 or 33 but the 33x33x33 is subsampled every other line/slice
4639 *p->tdlut_bytes_per_frame = tdlut_bytes_per_line * tdlut_mpc_width * tdlut_mpc_width;
4640 *p->tdlut_bytes_per_group = tdlut_bytes_per_line * tdlut_mpc_width;
4641 //the delivery cycles is DispClk cycles per line * number of lines * number of slices
4642 tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_mpc_width/2.0, 1) * tdlut_mpc_width * tdlut_mpc_width;
4643 tdlut_drain_rate = tdlut_bytes_per_line * p->dispclk_mhz / math_ceil2(tdlut_mpc_width/2.0, 1);
4644 } else {
4645 //tdlut_addressing_mode = tdlut_simple_linear, 3dlut width should be 4*1229=4916 elements
4646 *p->tdlut_bytes_per_frame = (unsigned int)math_ceil2(tdlut_width * tdlut_bpe, 256);
4647 *p->tdlut_bytes_per_group = tdlut_bytes_per_group_simple;
4648 tdlut_delivery_cycles = (unsigned int)math_ceil2(tdlut_width/2.0, 1);
4649 tdlut_drain_rate = 2 * tdlut_bpe * p->dispclk_mhz;
4650 }
4651
4652 //the tdlut is fetched during the 2 row times of prefetch.
4653 if (p->setup_for_tdlut) {
4654 *p->tdlut_groups_per_2row_ub = (unsigned int)math_ceil2((double) *p->tdlut_bytes_per_frame / *p->tdlut_bytes_per_group, 1);
4655 *p->tdlut_opt_time = (*p->tdlut_bytes_per_frame - p->cursor_buffer_size * 1024) / tdlut_drain_rate;
4656 *p->tdlut_drain_time = p->cursor_buffer_size * 1024 / tdlut_drain_rate;
4657 }
4658
4659 #ifdef __DML_VBA_DEBUG__
4660 dml2_printf("DML::%s: gpuvm_enable = %d\n", __func__, p->gpuvm_enable);
4661 dml2_printf("DML::%s: vmpg_bytes = %d\n", __func__, vmpg_bytes);
4662 dml2_printf("DML::%s: tdlut_vmpg_per_frame = %d\n", __func__, tdlut_vmpg_per_frame);
4663 dml2_printf("DML::%s: tdlut_pte_req_per_frame = %d\n", __func__, tdlut_pte_req_per_frame);
4664
4665 dml2_printf("DML::%s: dispclk_mhz = %f\n", __func__, p->dispclk_mhz);
4666 dml2_printf("DML::%s: tdlut_width = %u\n", __func__, tdlut_width);
4667 dml2_printf("DML::%s: tdlut_addressing_mode = %s\n", __func__, (p->tdlut_addressing_mode == dml2_tdlut_sw_linear) ? "sw_linear" : "simple_linear");
4668 dml2_printf("DML::%s: tdlut_pitch_bytes = %u\n", __func__, tdlut_pitch_bytes);
4669 dml2_printf("DML::%s: tdlut_footprint_bytes = %u\n", __func__, tdlut_footprint_bytes);
4670 dml2_printf("DML::%s: tdlut_bytes_per_frame = %u\n", __func__, *p->tdlut_bytes_per_frame);
4671 dml2_printf("DML::%s: tdlut_bytes_per_line = %u\n", __func__, tdlut_bytes_per_line);
4672 dml2_printf("DML::%s: tdlut_bytes_per_group = %u\n", __func__, *p->tdlut_bytes_per_group);
4673 dml2_printf("DML::%s: tdlut_drain_rate = %f\n", __func__, tdlut_drain_rate);
4674 dml2_printf("DML::%s: tdlut_delivery_cycles = %u\n", __func__, tdlut_delivery_cycles);
4675 dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, *p->tdlut_opt_time);
4676 dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, *p->tdlut_drain_time);
4677 dml2_printf("DML::%s: tdlut_groups_per_2row_ub = %d\n", __func__, *p->tdlut_groups_per_2row_ub);
4678 #endif
4679 }
4680
CalculateTarb(const struct dml2_display_cfg * display_cfg,unsigned int PixelChunkSizeInKByte,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,double ReturnBW,unsigned int MetaChunkSize,double * Tarb,double * Tarb_prefetch)4681 static void CalculateTarb(
4682 const struct dml2_display_cfg *display_cfg,
4683 unsigned int PixelChunkSizeInKByte,
4684 unsigned int NumberOfActiveSurfaces,
4685 unsigned int NumberOfDPP[],
4686 unsigned int dpte_group_bytes[],
4687 unsigned int tdlut_bytes_per_group[],
4688 double HostVMInefficiencyFactor,
4689 double HostVMInefficiencyFactorPrefetch,
4690 unsigned int HostVMMinPageSize,
4691 double ReturnBW,
4692 unsigned int MetaChunkSize,
4693
4694 // output
4695 double *Tarb,
4696 double *Tarb_prefetch)
4697 {
4698 double extra_bytes = 0;
4699 double extra_bytes_prefetch = 0;
4700 double HostVMDynamicLevels = CalculateHostVMDynamicLevels(display_cfg->gpuvm_enable, display_cfg->hostvm_enable, HostVMMinPageSize, display_cfg->hostvm_max_non_cached_page_table_levels);
4701
4702 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4703 extra_bytes = extra_bytes + (NumberOfDPP[k] * PixelChunkSizeInKByte * 1024);
4704
4705 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
4706 extra_bytes = extra_bytes + (MetaChunkSize * 1024);
4707
4708 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
4709 extra_bytes = extra_bytes + tdlut_bytes_per_group[k];
4710 }
4711
4712 extra_bytes_prefetch = extra_bytes;
4713
4714 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4715 if (display_cfg->gpuvm_enable == true) {
4716 extra_bytes = extra_bytes + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor;
4717 extra_bytes_prefetch = extra_bytes_prefetch + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactorPrefetch;
4718 }
4719 }
4720 *Tarb = extra_bytes / ReturnBW;
4721 *Tarb_prefetch = extra_bytes_prefetch / ReturnBW;
4722 #ifdef __DML_VBA_DEBUG__
4723 dml2_printf("DML::%s: PixelChunkSizeInKByte = %d\n", __func__, PixelChunkSizeInKByte);
4724 dml2_printf("DML::%s: MetaChunkSize = %d\n", __func__, MetaChunkSize);
4725 dml2_printf("DML::%s: extra_bytes = %f\n", __func__, extra_bytes);
4726 dml2_printf("DML::%s: extra_bytes_prefetch = %f\n", __func__, extra_bytes_prefetch);
4727 #endif
4728 }
4729
CalculateTWait(long reserved_vblank_time_ns,double UrgentLatency,double Ttrip,double g6_temp_read_blackout_us)4730 static double CalculateTWait(
4731 long reserved_vblank_time_ns,
4732 double UrgentLatency,
4733 double Ttrip,
4734 double g6_temp_read_blackout_us)
4735 {
4736 double TWait;
4737 double t_urg_trip = math_max2(UrgentLatency, Ttrip);
4738 TWait = math_max2(reserved_vblank_time_ns/1000.0, g6_temp_read_blackout_us) + t_urg_trip;
4739
4740 #ifdef __DML_VBA_DEBUG__
4741 dml2_printf("DML::%s: reserved_vblank_time_ns = %d\n", __func__, reserved_vblank_time_ns);
4742 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency);
4743 dml2_printf("DML::%s: Ttrip = %f\n", __func__, Ttrip);
4744 dml2_printf("DML::%s: TWait = %f\n", __func__, TWait);
4745 #endif
4746 return TWait;
4747 }
4748
4749
CalculateVUpdateAndDynamicMetadataParameters(unsigned int MaxInterDCNTileRepeaters,double Dppclk,double Dispclk,double DCFClkDeepSleep,double PixelClock,unsigned int HTotal,unsigned int VBlank,unsigned int DynamicMetadataTransmittedBytes,unsigned int DynamicMetadataLinesBeforeActiveRequired,unsigned int InterlaceEnable,bool ProgressiveToInterlaceUnitInOPP,double * TSetup,double * Tdmbf,double * Tdmec,double * Tdmsks,unsigned int * VUpdateOffsetPix,unsigned int * VUpdateWidthPix,unsigned int * VReadyOffsetPix)4750 static void CalculateVUpdateAndDynamicMetadataParameters(
4751 unsigned int MaxInterDCNTileRepeaters,
4752 double Dppclk,
4753 double Dispclk,
4754 double DCFClkDeepSleep,
4755 double PixelClock,
4756 unsigned int HTotal,
4757 unsigned int VBlank,
4758 unsigned int DynamicMetadataTransmittedBytes,
4759 unsigned int DynamicMetadataLinesBeforeActiveRequired,
4760 unsigned int InterlaceEnable,
4761 bool ProgressiveToInterlaceUnitInOPP,
4762
4763 // Output
4764 double *TSetup,
4765 double *Tdmbf,
4766 double *Tdmec,
4767 double *Tdmsks,
4768 unsigned int *VUpdateOffsetPix,
4769 unsigned int *VUpdateWidthPix,
4770 unsigned int *VReadyOffsetPix)
4771 {
4772 double TotalRepeaterDelayTime;
4773 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / Dppclk + 3 / Dispclk);
4774 *VUpdateWidthPix = (unsigned int)(math_ceil2((14.0 / DCFClkDeepSleep + 12.0 / Dppclk + TotalRepeaterDelayTime) * PixelClock, 1.0));
4775 *VReadyOffsetPix = (unsigned int)(math_ceil2(math_max2(150.0 / Dppclk, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / Dppclk) * PixelClock, 1.0));
4776 *VUpdateOffsetPix = (unsigned int)(math_ceil2(HTotal / 4.0, 1.0));
4777 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock;
4778 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / Dispclk;
4779 *Tdmec = HTotal / PixelClock;
4780
4781 if (DynamicMetadataLinesBeforeActiveRequired == 0) {
4782 *Tdmsks = VBlank * HTotal / PixelClock / 2.0;
4783 } else {
4784 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock;
4785 }
4786 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) {
4787 *Tdmsks = *Tdmsks / 2;
4788 }
4789 #ifdef __DML_VBA_DEBUG__
4790 dml2_printf("DML::%s: DynamicMetadataLinesBeforeActiveRequired = %u\n", __func__, DynamicMetadataLinesBeforeActiveRequired);
4791 dml2_printf("DML::%s: VBlank = %u\n", __func__, VBlank);
4792 dml2_printf("DML::%s: HTotal = %u\n", __func__, HTotal);
4793 dml2_printf("DML::%s: PixelClock = %f\n", __func__, PixelClock);
4794 dml2_printf("DML::%s: Dppclk = %f\n", __func__, Dppclk);
4795 dml2_printf("DML::%s: DCFClkDeepSleep = %f\n", __func__, DCFClkDeepSleep);
4796 dml2_printf("DML::%s: MaxInterDCNTileRepeaters = %u\n", __func__, MaxInterDCNTileRepeaters);
4797 dml2_printf("DML::%s: TotalRepeaterDelayTime = %f\n", __func__, TotalRepeaterDelayTime);
4798
4799 dml2_printf("DML::%s: VUpdateWidthPix = %u\n", __func__, *VUpdateWidthPix);
4800 dml2_printf("DML::%s: VReadyOffsetPix = %u\n", __func__, *VReadyOffsetPix);
4801 dml2_printf("DML::%s: VUpdateOffsetPix = %u\n", __func__, *VUpdateOffsetPix);
4802
4803 dml2_printf("DML::%s: Tdmsks = %f\n", __func__, *Tdmsks);
4804 #endif
4805 }
4806
get_urgent_bandwidth_required(struct dml2_core_shared_get_urgent_bandwidth_required_locals * l,const struct dml2_display_cfg * display_cfg,enum dml2_core_internal_soc_state_type state_type,enum dml2_core_internal_bw_type bw_type,bool inc_flip_bw,bool use_qual_row_bw,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double dcc_dram_bw_pref_overhead_factor_p0[],double dcc_dram_bw_pref_overhead_factor_p1[],double mall_prefetch_sdp_overhead_factor[],double mall_prefetch_dram_overhead_factor[],double ReadBandwidthLuma[],double ReadBandwidthChroma[],double PrefetchBandwidthLuma[],double PrefetchBandwidthChroma[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[],double cursor_bw[],double dpte_row_bw[],double meta_row_bw[],double prefetch_cursor_bw[],double prefetch_vmrow_bw[],double flip_bw[],double UrgentBurstFactorLuma[],double UrgentBurstFactorChroma[],double UrgentBurstFactorCursor[],double UrgentBurstFactorLumaPre[],double UrgentBurstFactorChromaPre[],double UrgentBurstFactorCursorPre[],double surface_required_bw[],double surface_peak_required_bw[])4807 static double get_urgent_bandwidth_required(
4808 struct dml2_core_shared_get_urgent_bandwidth_required_locals *l,
4809 const struct dml2_display_cfg *display_cfg,
4810 enum dml2_core_internal_soc_state_type state_type,
4811 enum dml2_core_internal_bw_type bw_type,
4812 bool inc_flip_bw, // including flip bw
4813 bool use_qual_row_bw,
4814 unsigned int NumberOfActiveSurfaces,
4815 unsigned int NumberOfDPP[],
4816 double dcc_dram_bw_nom_overhead_factor_p0[],
4817 double dcc_dram_bw_nom_overhead_factor_p1[],
4818 double dcc_dram_bw_pref_overhead_factor_p0[],
4819 double dcc_dram_bw_pref_overhead_factor_p1[],
4820 double mall_prefetch_sdp_overhead_factor[],
4821 double mall_prefetch_dram_overhead_factor[],
4822 double ReadBandwidthLuma[],
4823 double ReadBandwidthChroma[],
4824 double PrefetchBandwidthLuma[],
4825 double PrefetchBandwidthChroma[],
4826 double excess_vactive_fill_bw_l[],
4827 double excess_vactive_fill_bw_c[],
4828 double cursor_bw[],
4829 double dpte_row_bw[],
4830 double meta_row_bw[],
4831 double prefetch_cursor_bw[],
4832 double prefetch_vmrow_bw[],
4833 double flip_bw[],
4834 double UrgentBurstFactorLuma[],
4835 double UrgentBurstFactorChroma[],
4836 double UrgentBurstFactorCursor[],
4837 double UrgentBurstFactorLumaPre[],
4838 double UrgentBurstFactorChromaPre[],
4839 double UrgentBurstFactorCursorPre[],
4840 /* outputs */
4841 double surface_required_bw[],
4842 double surface_peak_required_bw[])
4843 {
4844 // set inc_flip_bw = 0 for total_dchub_urgent_read_bw_noflip calculation, 1 for total_dchub_urgent_read_bw as described in the MAS
4845 // set use_qual_row_bw = 1 to calculate using qualified row bandwidth, used for total_flip_bw calculation
4846
4847 memset(l, 0, sizeof(struct dml2_core_shared_get_urgent_bandwidth_required_locals));
4848
4849 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
4850 l->mall_svp_prefetch_factor = (state_type == dml2_core_internal_soc_state_svp_prefetch) ? (bw_type == dml2_core_internal_bw_dram ? mall_prefetch_dram_overhead_factor[k] : mall_prefetch_sdp_overhead_factor[k]) : 1.0;
4851 l->tmp_nom_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4852 l->tmp_nom_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4853 l->tmp_pref_adj_factor_p0 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p0[k] : 1.0) * l->mall_svp_prefetch_factor;
4854 l->tmp_pref_adj_factor_p1 = (bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_pref_overhead_factor_p1[k] : 1.0) * l->mall_svp_prefetch_factor;
4855
4856 l->adj_factor_p0 = UrgentBurstFactorLuma[k] * l->tmp_nom_adj_factor_p0;
4857 l->adj_factor_p1 = UrgentBurstFactorChroma[k] * l->tmp_nom_adj_factor_p1;
4858 l->adj_factor_cur = UrgentBurstFactorCursor[k];
4859 l->adj_factor_p0_pre = UrgentBurstFactorLumaPre[k] * l->tmp_pref_adj_factor_p0;
4860 l->adj_factor_p1_pre = UrgentBurstFactorChromaPre[k] * l->tmp_pref_adj_factor_p1;
4861 l->adj_factor_cur_pre = UrgentBurstFactorCursorPre[k];
4862
4863 bool is_phantom = dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]);
4864 bool exclude_this_plane = 0;
4865
4866 // Exclude phantom pipe in bw calculation for non svp prefetch state
4867 if (state_type != dml2_core_internal_soc_state_svp_prefetch && is_phantom)
4868 exclude_this_plane = 1;
4869
4870 // The qualified row bandwidth, qual_row_bw, accounts for the regular non-flip row bandwidth when there is no possible immediate flip or HostVM invalidation flip.
4871 // The qual_row_bw is zero if HostVM is possible and only non-zero and equal to row_bw(i) if immediate flip is not allowed for that pipe.
4872 if (use_qual_row_bw) {
4873 if (display_cfg->hostvm_enable)
4874 l->per_plane_flip_bw[k] = 0; // qual_row_bw
4875 else if (!display_cfg->plane_descriptors[k].immediate_flip)
4876 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4877 } else {
4878 // the final_flip_bw includes the regular row_bw when immediate flip is disallowed (and no HostVM)
4879 if ((!display_cfg->plane_descriptors[k].immediate_flip && !display_cfg->hostvm_enable) || !inc_flip_bw)
4880 l->per_plane_flip_bw[k] = NumberOfDPP[k] * (dpte_row_bw[k] + meta_row_bw[k]);
4881 else
4882 l->per_plane_flip_bw[k] = NumberOfDPP[k] * flip_bw[k];
4883 }
4884
4885 if (!exclude_this_plane) {
4886 l->vm_row_bw = NumberOfDPP[k] * prefetch_vmrow_bw[k];
4887 l->flip_and_active_bw = l->per_plane_flip_bw[k] + ReadBandwidthLuma[k] * l->adj_factor_p0 + ReadBandwidthChroma[k] * l->adj_factor_p1 + cursor_bw[k] * l->adj_factor_cur;
4888 l->flip_and_prefetch_bw = l->per_plane_flip_bw[k] + NumberOfDPP[k] * (PrefetchBandwidthLuma[k] * l->adj_factor_p0_pre + PrefetchBandwidthChroma[k] * l->adj_factor_p1_pre) + prefetch_cursor_bw[k] * l->adj_factor_cur_pre;
4889 l->active_and_excess_bw = (ReadBandwidthLuma[k] + excess_vactive_fill_bw_l[k]) * l->tmp_nom_adj_factor_p0 + (ReadBandwidthChroma[k] + excess_vactive_fill_bw_c[k]) * l->tmp_nom_adj_factor_p1 + dpte_row_bw[k] + meta_row_bw[k];
4890 surface_required_bw[k] = math_max4(l->vm_row_bw, l->flip_and_active_bw, l->flip_and_prefetch_bw, l->active_and_excess_bw);
4891
4892 /* export peak required bandwidth for the surface */
4893 surface_peak_required_bw[k] = math_max2(surface_required_bw[k], surface_peak_required_bw[k]);
4894
4895 #ifdef __DML_VBA_DEBUG__
4896 dml2_printf("DML::%s: k=%d, max1: vm_row_bw=%f\n", __func__, k, l->vm_row_bw);
4897 dml2_printf("DML::%s: k=%d, max2: flip_and_active_bw=%f\n", __func__, k, l->flip_and_active_bw);
4898 dml2_printf("DML::%s: k=%d, max3: flip_and_prefetch_bw=%f\n", __func__, k, l->flip_and_prefetch_bw);
4899 dml2_printf("DML::%s: k=%d, max4: active_and_excess_bw=%f\n", __func__, k, l->active_and_excess_bw);
4900 dml2_printf("DML::%s: k=%d, surface_required_bw=%f\n", __func__, k, surface_required_bw[k]);
4901 dml2_printf("DML::%s: k=%d, surface_peak_required_bw=%f\n", __func__, k, surface_peak_required_bw[k]);
4902 #endif
4903 } else {
4904 surface_required_bw[k] = 0.0;
4905 }
4906
4907 l->required_bandwidth_mbps += surface_required_bw[k];
4908
4909 #ifdef __DML_VBA_DEBUG__
4910 dml2_printf("DML::%s: k=%d, NumberOfDPP=%d\n", __func__, k, NumberOfDPP[k]);
4911 dml2_printf("DML::%s: k=%d, use_qual_row_bw=%d\n", __func__, k, use_qual_row_bw);
4912 dml2_printf("DML::%s: k=%d, immediate_flip=%d\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
4913 dml2_printf("DML::%s: k=%d, mall_svp_prefetch_factor=%f\n", __func__, k, l->mall_svp_prefetch_factor);
4914 dml2_printf("DML::%s: k=%d, adj_factor_p0=%f\n", __func__, k, l->adj_factor_p0);
4915 dml2_printf("DML::%s: k=%d, adj_factor_p1=%f\n", __func__, k, l->adj_factor_p1);
4916 dml2_printf("DML::%s: k=%d, adj_factor_cur=%f\n", __func__, k, l->adj_factor_cur);
4917
4918 dml2_printf("DML::%s: k=%d, adj_factor_p0_pre=%f\n", __func__, k, l->adj_factor_p0_pre);
4919 dml2_printf("DML::%s: k=%d, adj_factor_p1_pre=%f\n", __func__, k, l->adj_factor_p1_pre);
4920 dml2_printf("DML::%s: k=%d, adj_factor_cur_pre=%f\n", __func__, k, l->adj_factor_cur_pre);
4921
4922 dml2_printf("DML::%s: k=%d, per_plane_flip_bw=%f\n", __func__, k, l->per_plane_flip_bw[k]);
4923 dml2_printf("DML::%s: k=%d, prefetch_vmrow_bw=%f\n", __func__, k, prefetch_vmrow_bw[k]);
4924 dml2_printf("DML::%s: k=%d, ReadBandwidthLuma=%f\n", __func__, k, ReadBandwidthLuma[k]);
4925 dml2_printf("DML::%s: k=%d, ReadBandwidthChroma=%f\n", __func__, k, ReadBandwidthChroma[k]);
4926 dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_l=%f\n", __func__, k, excess_vactive_fill_bw_l[k]);
4927 dml2_printf("DML::%s: k=%d, excess_vactive_fill_bw_c=%f\n", __func__, k, excess_vactive_fill_bw_c[k]);
4928 dml2_printf("DML::%s: k=%d, cursor_bw=%f\n", __func__, k, cursor_bw[k]);
4929
4930 dml2_printf("DML::%s: k=%d, meta_row_bw=%f\n", __func__, k, meta_row_bw[k]);
4931 dml2_printf("DML::%s: k=%d, dpte_row_bw=%f\n", __func__, k, dpte_row_bw[k]);
4932 dml2_printf("DML::%s: k=%d, PrefetchBandwidthLuma=%f\n", __func__, k, PrefetchBandwidthLuma[k]);
4933 dml2_printf("DML::%s: k=%d, PrefetchBandwidthChroma=%f\n", __func__, k, PrefetchBandwidthChroma[k]);
4934 dml2_printf("DML::%s: k=%d, prefetch_cursor_bw=%f\n", __func__, k, prefetch_cursor_bw[k]);
4935 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
4936 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), soc_state=%s, inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, dml2_core_internal_soc_state_type_str(state_type), inc_flip_bw, is_phantom, exclude_this_plane);
4937 dml2_printf("DML::%s: k=%d, required_bandwidth_mbps=%f (total), inc_flip_bw=%d, is_phantom=%d exclude_this_plane=%d\n", __func__, k, l->required_bandwidth_mbps, inc_flip_bw, is_phantom, exclude_this_plane);
4938 #endif
4939 }
4940
4941 return l->required_bandwidth_mbps;
4942 }
4943
CalculateExtraLatency(const struct dml2_display_cfg * display_cfg,unsigned int ROBBufferSizeInKByte,unsigned int RoundTripPingLatencyCycles,unsigned int ReorderingBytes,double DCFCLK,double FabricClock,unsigned int PixelChunkSizeInKByte,double ReturnBW,unsigned int NumberOfActiveSurfaces,unsigned int NumberOfDPP[],unsigned int dpte_group_bytes[],unsigned int tdlut_bytes_per_group[],double HostVMInefficiencyFactor,double HostVMInefficiencyFactorPrefetch,unsigned int HostVMMinPageSize,enum dml2_qos_param_type qos_type,bool max_oustanding_when_urgent_expected,unsigned int max_outstanding_requests,unsigned int request_size_bytes_luma[],unsigned int request_size_bytes_chroma[],unsigned int MetaChunkSize,unsigned int dchub_arb_to_ret_delay,double Ttrip,unsigned int hostvm_mode,double * ExtraLatency,double * ExtraLatency_sr,double * ExtraLatencyPrefetch)4944 static void CalculateExtraLatency(
4945 const struct dml2_display_cfg *display_cfg,
4946 unsigned int ROBBufferSizeInKByte,
4947 unsigned int RoundTripPingLatencyCycles,
4948 unsigned int ReorderingBytes,
4949 double DCFCLK,
4950 double FabricClock,
4951 unsigned int PixelChunkSizeInKByte,
4952 double ReturnBW,
4953 unsigned int NumberOfActiveSurfaces,
4954 unsigned int NumberOfDPP[],
4955 unsigned int dpte_group_bytes[],
4956 unsigned int tdlut_bytes_per_group[],
4957 double HostVMInefficiencyFactor,
4958 double HostVMInefficiencyFactorPrefetch,
4959 unsigned int HostVMMinPageSize,
4960 enum dml2_qos_param_type qos_type,
4961 bool max_oustanding_when_urgent_expected,
4962 unsigned int max_outstanding_requests,
4963 unsigned int request_size_bytes_luma[],
4964 unsigned int request_size_bytes_chroma[],
4965 unsigned int MetaChunkSize,
4966 unsigned int dchub_arb_to_ret_delay,
4967 double Ttrip,
4968 unsigned int hostvm_mode,
4969
4970 // output
4971 double *ExtraLatency, // Tex
4972 double *ExtraLatency_sr, // Tex_sr
4973 double *ExtraLatencyPrefetch)
4974
4975 {
4976 double Tarb;
4977 double Tarb_prefetch;
4978 double Tex_trips;
4979 unsigned int max_request_size_bytes = 0;
4980
4981 CalculateTarb(
4982 display_cfg,
4983 PixelChunkSizeInKByte,
4984 NumberOfActiveSurfaces,
4985 NumberOfDPP,
4986 dpte_group_bytes,
4987 tdlut_bytes_per_group,
4988 HostVMInefficiencyFactor,
4989 HostVMInefficiencyFactorPrefetch,
4990 HostVMMinPageSize,
4991 ReturnBW,
4992 MetaChunkSize,
4993 // output
4994 &Tarb,
4995 &Tarb_prefetch);
4996
4997 Tex_trips = (display_cfg->hostvm_enable && hostvm_mode == 1) ? (2.0 * Ttrip) : 0.0;
4998
4999 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
5000 if (request_size_bytes_luma[k] > max_request_size_bytes)
5001 max_request_size_bytes = request_size_bytes_luma[k];
5002 if (request_size_bytes_chroma[k] > max_request_size_bytes)
5003 max_request_size_bytes = request_size_bytes_chroma[k];
5004 }
5005
5006 if (qos_type == dml2_qos_param_type_dcn4x) {
5007 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK;
5008 *ExtraLatency = *ExtraLatency_sr;
5009 if (max_oustanding_when_urgent_expected)
5010 *ExtraLatency = *ExtraLatency + (ROBBufferSizeInKByte * 1024 - max_outstanding_requests * max_request_size_bytes) / ReturnBW;
5011 } else {
5012 *ExtraLatency_sr = dchub_arb_to_ret_delay / DCFCLK + RoundTripPingLatencyCycles / FabricClock + ReorderingBytes / ReturnBW;
5013 *ExtraLatency = *ExtraLatency_sr;
5014 }
5015 *ExtraLatency = *ExtraLatency + Tex_trips;
5016 *ExtraLatencyPrefetch = *ExtraLatency + Tarb_prefetch;
5017 *ExtraLatency = *ExtraLatency + Tarb;
5018 *ExtraLatency_sr = *ExtraLatency_sr + Tarb;
5019
5020 #ifdef __DML_VBA_DEBUG__
5021 dml2_printf("DML::%s: qos_type=%u\n", __func__, qos_type);
5022 dml2_printf("DML::%s: hostvm_mode=%u\n", __func__, hostvm_mode);
5023 dml2_printf("DML::%s: Tex_trips=%u\n", __func__, Tex_trips);
5024 dml2_printf("DML::%s: max_oustanding_when_urgent_expected=%u\n", __func__, max_oustanding_when_urgent_expected);
5025 dml2_printf("DML::%s: FabricClock=%f\n", __func__, FabricClock);
5026 dml2_printf("DML::%s: DCFCLK=%f\n", __func__, DCFCLK);
5027 dml2_printf("DML::%s: ReturnBW=%f\n", __func__, ReturnBW);
5028 dml2_printf("DML::%s: RoundTripPingLatencyCycles=%u\n", __func__, RoundTripPingLatencyCycles);
5029 dml2_printf("DML::%s: ReorderingBytes=%u\n", __func__, ReorderingBytes);
5030 dml2_printf("DML::%s: Tarb=%f\n", __func__, Tarb);
5031 dml2_printf("DML::%s: ExtraLatency=%f\n", __func__, *ExtraLatency);
5032 dml2_printf("DML::%s: ExtraLatency_sr=%f\n", __func__, *ExtraLatency_sr);
5033 dml2_printf("DML::%s: ExtraLatencyPrefetch=%f\n", __func__, *ExtraLatencyPrefetch);
5034 #endif
5035 }
5036
CalculatePrefetchSchedule(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculatePrefetchSchedule_params * p)5037 static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch, struct dml2_core_calcs_CalculatePrefetchSchedule_params *p)
5038 {
5039 struct dml2_core_calcs_CalculatePrefetchSchedule_locals *s = &scratch->CalculatePrefetchSchedule_locals;
5040 bool dcc_mrq_enable;
5041
5042 unsigned int vm_bytes;
5043 unsigned int extra_tdpe_bytes;
5044 unsigned int tdlut_row_bytes;
5045 unsigned int Lo;
5046
5047 s->NoTimeToPrefetch = false;
5048 s->DPPCycles = 0;
5049 s->DISPCLKCycles = 0;
5050 s->DSTTotalPixelsAfterScaler = 0.0;
5051 s->LineTime = 0.0;
5052 s->dst_y_prefetch_equ = 0.0;
5053 s->prefetch_bw_oto = 0.0;
5054 s->Tvm_oto = 0.0;
5055 s->Tr0_oto = 0.0;
5056 s->Tvm_oto_lines = 0.0;
5057 s->Tr0_oto_lines = 0.0;
5058 s->dst_y_prefetch_oto = 0.0;
5059 s->TimeForFetchingVM = 0.0;
5060 s->TimeForFetchingRowInVBlank = 0.0;
5061 s->LinesToRequestPrefetchPixelData = 0.0;
5062 s->HostVMDynamicLevelsTrips = 0;
5063 s->trip_to_mem = 0.0;
5064 *p->Tvm_trips = 0.0;
5065 *p->Tr0_trips = 0.0;
5066 s->Tvm_no_trip_oto = 0.0;
5067 s->Tr0_no_trip_oto = 0.0;
5068 s->Tvm_trips_rounded = 0.0;
5069 s->Tr0_trips_rounded = 0.0;
5070 s->max_Tsw = 0.0;
5071 s->Lsw_oto = 0.0;
5072 s->Tpre_rounded = 0.0;
5073 s->prefetch_bw_equ = 0.0;
5074 s->Tvm_equ = 0.0;
5075 s->Tr0_equ = 0.0;
5076 s->Tdmbf = 0.0;
5077 s->Tdmec = 0.0;
5078 s->Tdmsks = 0.0;
5079 s->prefetch_sw_bytes = 0.0;
5080 s->prefetch_bw_pr = 0.0;
5081 s->bytes_pp = 0.0;
5082 s->dep_bytes = 0.0;
5083 s->min_Lsw_oto = 0.0;
5084 s->min_Lsw_equ = 0.0;
5085 s->Tsw_est1 = 0.0;
5086 s->Tsw_est2 = 0.0;
5087 s->Tsw_est3 = 0.0;
5088 s->cursor_prefetch_bytes = 0;
5089 *p->prefetch_cursor_bw = 0;
5090
5091 dcc_mrq_enable = (p->dcc_enable && p->mrq_present);
5092
5093 s->TWait_p = p->TWait - p->Ttrip; // TWait includes max(Turg, Ttrip) and Ttrip here is already max(Turg, Ttrip)
5094
5095 if (p->display_cfg->gpuvm_enable == true && p->display_cfg->hostvm_enable == true) {
5096 s->HostVMDynamicLevelsTrips = p->display_cfg->hostvm_max_non_cached_page_table_levels;
5097 } else {
5098 s->HostVMDynamicLevelsTrips = 0;
5099 }
5100 #ifdef __DML_VBA_DEBUG__
5101 dml2_printf("DML::%s: dcc_enable = %u\n", __func__, p->dcc_enable);
5102 dml2_printf("DML::%s: mrq_present = %u\n", __func__, p->mrq_present);
5103 dml2_printf("DML::%s: dcc_mrq_enable = %u\n", __func__, dcc_mrq_enable);
5104 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, p->display_cfg->gpuvm_enable);
5105 dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5106 dml2_printf("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
5107 dml2_printf("DML::%s: VStartup = %u\n", __func__, p->VStartup);
5108 dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, p->display_cfg->hostvm_enable);
5109 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5110 dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait);
5111 dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5112 dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5113 dml2_printf("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5114 dml2_printf("DML::%s: myPipe->Dispclk = %f\n", __func__, p->myPipe->Dispclk);
5115 #endif
5116 CalculateVUpdateAndDynamicMetadataParameters(
5117 p->MaxInterDCNTileRepeaters,
5118 p->myPipe->Dppclk,
5119 p->myPipe->Dispclk,
5120 p->myPipe->DCFClkDeepSleep,
5121 p->myPipe->PixelClock,
5122 p->myPipe->HTotal,
5123 p->myPipe->VBlank,
5124 p->DynamicMetadataTransmittedBytes,
5125 p->DynamicMetadataLinesBeforeActiveRequired,
5126 p->myPipe->InterlaceEnable,
5127 p->myPipe->ProgressiveToInterlaceUnitInOPP,
5128 p->TSetup,
5129
5130 // Output
5131 &s->Tdmbf,
5132 &s->Tdmec,
5133 &s->Tdmsks,
5134 p->VUpdateOffsetPix,
5135 p->VUpdateWidthPix,
5136 p->VReadyOffsetPix);
5137
5138 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
5139 s->trip_to_mem = p->Ttrip;
5140 *p->Tvm_trips = p->ExtraLatencyPrefetch + math_max2(s->trip_to_mem * (p->display_cfg->gpuvm_max_page_table_levels * (s->HostVMDynamicLevelsTrips + 1)), p->Turg);
5141 if (dcc_mrq_enable)
5142 *p->Tvm_trips_flip = *p->Tvm_trips;
5143 else
5144 *p->Tvm_trips_flip = *p->Tvm_trips - s->trip_to_mem;
5145
5146 *p->Tr0_trips_flip = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);
5147 *p->Tr0_trips = math_max2(*p->Tr0_trips_flip, p->tdlut_opt_time / 2);
5148
5149 if (p->DynamicMetadataVMEnabled == true) {
5150 *p->Tdmdl_vm = s->TWait_p + *p->Tvm_trips;
5151 *p->Tdmdl = *p->Tdmdl_vm + p->Ttrip;
5152 } else {
5153 *p->Tdmdl_vm = 0;
5154 *p->Tdmdl = s->TWait_p + p->ExtraLatencyPrefetch + p->Ttrip; // Tex
5155 }
5156
5157 if (p->DynamicMetadataEnable == true) {
5158 if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
5159 *p->NotEnoughTimeForDynamicMetadata = true;
5160 dml2_printf("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
5161 dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5162 dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5163 dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5164 dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5165 } else {
5166 *p->NotEnoughTimeForDynamicMetadata = false;
5167 }
5168 } else {
5169 *p->NotEnoughTimeForDynamicMetadata = false;
5170 }
5171
5172 if (p->myPipe->ScalerEnabled)
5173 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
5174 else
5175 s->DPPCycles = (unsigned int)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);
5176
5177 s->DPPCycles = (unsigned int)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);
5178
5179 s->DISPCLKCycles = (unsigned int)p->DISPCLKDelaySubtotal;
5180
5181 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
5182 return true;
5183
5184 *p->DSTXAfterScaler = (unsigned int)math_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay);
5185 *p->DSTXAfterScaler = (unsigned int)math_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml2_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
5186 ((p->myPipe->ODMMode == dml2_odm_mode_split_1to2 || p->myPipe->ODMMode == dml2_odm_mode_mso_1to2) ? (double)p->myPipe->HActive / 2.0 : 0) +
5187 ((p->myPipe->ODMMode == dml2_odm_mode_mso_1to4) ? (double)p->myPipe->HActive * 3.0 / 4.0 : 0));
5188
5189 #ifdef __DML_VBA_DEBUG__
5190 dml2_printf("DML::%s: DynamicMetadataVMEnabled = %u\n", __func__, p->DynamicMetadataVMEnabled);
5191 dml2_printf("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
5192 dml2_printf("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
5193 dml2_printf("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
5194 dml2_printf("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
5195 dml2_printf("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
5196 dml2_printf("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
5197 dml2_printf("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
5198 dml2_printf("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
5199 dml2_printf("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
5200
5201 dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut);
5202 dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time);
5203 dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame);
5204 #endif
5205
5206 if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
5207 *p->DSTYAfterScaler = 1;
5208 else
5209 *p->DSTYAfterScaler = 0;
5210
5211 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
5212 *p->DSTYAfterScaler = (unsigned int)(math_floor2(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
5213 *p->DSTXAfterScaler = (unsigned int)(s->DSTTotalPixelsAfterScaler - ((double)(*p->DSTYAfterScaler * p->myPipe->HTotal)));
5214 #ifdef __DML_VBA_DEBUG__
5215 dml2_printf("DML::%s: DSTXAfterScaler = %u (final)\n", __func__, *p->DSTXAfterScaler);
5216 dml2_printf("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
5217 #endif
5218
5219 #ifdef __DML_VBA_DEBUG__
5220 dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5221 dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5222 dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5223 dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5224 dml2_printf("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->display_cfg->gpuvm_max_page_table_levels);
5225 dml2_printf("DML::%s: HostVMDynamicLevelsTrips = %u\n", __func__, s->HostVMDynamicLevelsTrips);
5226 #endif
5227 if (p->display_cfg->gpuvm_enable) {
5228 s->Tvm_trips_rounded = math_ceil2(4.0 * *p->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5229 *p->Tvm_trips_flip_rounded = math_ceil2(4.0 * *p->Tvm_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5230 } else {
5231 if (p->DynamicMetadataEnable || dcc_mrq_enable || p->setup_for_tdlut)
5232 s->Tvm_trips_rounded = math_max2(s->LineTime * math_ceil2(4.0*math_max3(p->ExtraLatencyPrefetch, p->Turg, s->trip_to_mem)/s->LineTime, 1)/4, s->LineTime/4.0);
5233 else
5234 s->Tvm_trips_rounded = s->LineTime / 4.0;
5235 *p->Tvm_trips_flip_rounded = s->LineTime / 4.0;
5236 }
5237
5238 s->Tvm_trips_rounded = math_max2(s->Tvm_trips_rounded, s->LineTime / 4.0);
5239 *p->Tvm_trips_flip_rounded = math_max2(*p->Tvm_trips_flip_rounded, s->LineTime / 4.0);
5240
5241 if (p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable) {
5242 s->Tr0_trips_rounded = math_ceil2(4.0 * *p->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
5243 *p->Tr0_trips_flip_rounded = math_ceil2(4.0 * *p->Tr0_trips_flip / s->LineTime, 1.0) / 4.0 * s->LineTime;
5244 } else {
5245 s->Tr0_trips_rounded = s->LineTime / 4.0;
5246 *p->Tr0_trips_flip_rounded = s->LineTime / 4.0;
5247 }
5248 s->Tr0_trips_rounded = math_max2(s->Tr0_trips_rounded, s->LineTime / 4.0);
5249 *p->Tr0_trips_flip_rounded = math_max2(*p->Tr0_trips_flip_rounded, s->LineTime / 4.0);
5250
5251 if (p->display_cfg->gpuvm_enable == true) {
5252 if (p->display_cfg->gpuvm_max_page_table_levels >= 3) {
5253 *p->Tno_bw = p->ExtraLatencyPrefetch + s->trip_to_mem * (double)((p->display_cfg->gpuvm_max_page_table_levels - 2) * (s->HostVMDynamicLevelsTrips + 1));
5254 } else if (p->display_cfg->gpuvm_max_page_table_levels == 1 && !dcc_mrq_enable && !p->setup_for_tdlut) {
5255 *p->Tno_bw = p->ExtraLatencyPrefetch;
5256 } else {
5257 *p->Tno_bw = 0;
5258 }
5259 } else {
5260 *p->Tno_bw = 0;
5261 }
5262
5263 if (p->mrq_present || p->display_cfg->gpuvm_max_page_table_levels >= 3)
5264 *p->Tno_bw_flip = *p->Tno_bw;
5265 else
5266 *p->Tno_bw_flip = 0; //because there is no 3DLUT for iFlip
5267
5268 if (dml_is_420(p->myPipe->SourcePixelFormat)) {
5269 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4.0;
5270 } else {
5271 s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
5272 }
5273
5274 s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (double)p->myPipe->DPPPerSurface;
5275 if (p->myPipe->VRatio < 1.0)
5276 s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;
5277 s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime);
5278
5279 s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
5280 s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor;
5281 s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor;
5282 s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw);
5283
5284 s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__;
5285 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0);
5286 s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime);
5287
5288 s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__;
5289 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0);
5290 s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime);
5291
5292 vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes;
5293 extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128);
5294
5295 if (p->setup_for_tdlut)
5296 vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0);
5297
5298 tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0);
5299 s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto,
5300 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5301 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5302 s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0;
5303
5304 if (p->display_cfg->gpuvm_enable == true) {
5305 s->Tvm_no_trip_oto = math_max2(
5306 *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
5307 s->LineTime / 4.0);
5308 s->Tvm_oto = math_max2(
5309 *p->Tvm_trips,
5310 s->Tvm_no_trip_oto);
5311 #ifdef __DML_VBA_DEBUG__
5312 dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips);
5313 dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto);
5314 dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0);
5315 #endif
5316 } else {
5317 s->Tvm_no_trip_oto = s->Tvm_trips_rounded;
5318 s->Tvm_oto = s->Tvm_trips_rounded;
5319 }
5320
5321 if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) {
5322 s->Tr0_no_trip_oto = math_max2(
5323 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto,
5324 s->LineTime / 4.0);
5325 s->Tr0_oto = math_max2(
5326 *p->Tr0_trips,
5327 s->Tr0_no_trip_oto);
5328 #ifdef __DML_VBA_DEBUG__
5329 dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips);
5330 dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto);
5331 dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4);
5332 #endif
5333 } else {
5334 s->Tr0_no_trip_oto = (s->LineTime - s->Tvm_oto) / 4.0;
5335 s->Tr0_oto = s->Tr0_no_trip_oto;
5336 }
5337
5338 s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
5339 s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
5340 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
5341
5342 //To (time for delay after scaler) in line time
5343 Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal);
5344
5345 //Tpre_equ in line time
5346 if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable)
5347 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo;
5348 else
5349 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo;
5350 s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
5351
5352 #ifdef __DML_VBA_DEBUG__
5353 dml2_printf("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
5354 dml2_printf("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
5355 dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5356 dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5357 dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, *p->Tno_bw_flip);
5358 dml2_printf("DML::%s: ExtraLatencyPrefetch = %f\n", __func__, p->ExtraLatencyPrefetch);
5359 dml2_printf("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
5360 dml2_printf("DML::%s: mall_prefetch_sdp_overhead_factor = %f\n", __func__, p->mall_prefetch_sdp_overhead_factor);
5361 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5362 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5363 dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5364 dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
5365 dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5366 dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
5367 dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
5368 dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw);
5369 dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
5370 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes);
5371 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5372 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5373 dml2_printf("DML::%s: Tvm_trips = %f\n", __func__, *p->Tvm_trips);
5374 dml2_printf("DML::%s: Tr0_trips = %f\n", __func__, *p->Tr0_trips);
5375 dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, *p->Tvm_trips_flip);
5376 dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, *p->Tr0_trips_flip);
5377 dml2_printf("DML::%s: prefetch_bw_pr = %f\n", __func__, s->prefetch_bw_pr);
5378 dml2_printf("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
5379 dml2_printf("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
5380 dml2_printf("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
5381 dml2_printf("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
5382 dml2_printf("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
5383 dml2_printf("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
5384 dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
5385 dml2_printf("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
5386 dml2_printf("DML::%s: tdlut_row_bytes = %d\n", __func__, tdlut_row_bytes);
5387 dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, p->meta_row_bytes);
5388 #endif
5389 double Tpre = s->dst_y_prefetch_equ * s->LineTime;
5390 s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
5391 s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;
5392
5393 #ifdef __DML_VBA_DEBUG__
5394 dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);
5395 dml2_printf("DML::%s: LineTime: %f\n", __func__, s->LineTime);
5396 dml2_printf("DML::%s: VStartup: %u\n", __func__, p->VStartup);
5397 dml2_printf("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
5398 dml2_printf("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
5399 dml2_printf("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
5400 dml2_printf("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
5401 dml2_printf("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
5402 dml2_printf("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
5403 dml2_printf("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
5404 dml2_printf("DML::%s: TWait = %f\n", __func__, p->TWait);
5405 dml2_printf("DML::%s: TWait_p = %f\n", __func__, s->TWait_p);
5406 dml2_printf("DML::%s: Ttrip = %f\n", __func__, p->Ttrip);
5407 dml2_printf("DML::%s: Tex = %f\n", __func__, p->ExtraLatencyPrefetch);
5408 dml2_printf("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
5409 dml2_printf("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
5410 dml2_printf("DML::%s: TWait_p: %fus\n", __func__, s->TWait_p);
5411 dml2_printf("DML::%s: Ttrip: %fus\n", __func__, p->Ttrip);
5412 dml2_printf("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
5413 dml2_printf("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);
5414 dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor);
5415 dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes);
5416 dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5417 dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre));
5418 dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5419 #endif
5420
5421 *p->dst_y_per_vm_vblank = 0;
5422 *p->dst_y_per_row_vblank = 0;
5423 *p->VRatioPrefetchY = 0;
5424 *p->VRatioPrefetchC = 0;
5425 *p->RequiredPrefetchPixelDataBWLuma = 0;
5426
5427 // Derive bandwidth by finding how much data to move within the time constraint
5428 // Tpre_rounded is Tpre rounding to 2-bit fraction
5429 // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time
5430 // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time
5431 // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less
5432 bool min_Lsw_equ_ok = s->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime;
5433
5434 if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok) {
5435 s->prefetch_bw1 = 0.;
5436 s->prefetch_bw2 = 0.;
5437 s->prefetch_bw3 = 0.;
5438 s->prefetch_bw4 = 0.;
5439
5440 // prefetch_bw1: VM + 2*R0 + SW
5441 if (s->Tpre_rounded - *p->Tno_bw > 0) {
5442 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor
5443 + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)
5444 + s->prefetch_sw_bytes)
5445 / (s->Tpre_rounded - *p->Tno_bw);
5446 s->Tsw_est1 = s->prefetch_sw_bytes / s->prefetch_bw1;
5447 } else
5448 s->prefetch_bw1 = 0;
5449
5450 dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1);
5451 if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) {
5452 s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) /
5453 (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
5454 #ifdef __DML_VBA_DEBUG__
5455 dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)));
5456 dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded);
5457 dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw);
5458 dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ);
5459 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5460 dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw);
5461 dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw));
5462 dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1);
5463 #endif
5464 }
5465
5466 // prefetch_bw2: VM + SW
5467 if (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) {
5468 s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
5469 (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded);
5470 s->Tsw_est2 = s->prefetch_sw_bytes / s->prefetch_bw2;
5471 } else
5472 s->prefetch_bw2 = 0;
5473
5474 dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2);
5475 if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) {
5476 s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime);
5477 dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2);
5478 }
5479
5480 // prefetch_bw3: 2*R0 + SW
5481 if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
5482 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) /
5483 (s->Tpre_rounded - s->Tvm_trips_rounded);
5484 s->Tsw_est3 = s->prefetch_sw_bytes / s->prefetch_bw3;
5485 } else
5486 s->prefetch_bw3 = 0;
5487
5488 dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3);
5489 if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) {
5490 s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
5491 dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3);
5492 }
5493
5494 // prefetch_bw4: SW
5495 if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
5496 s->prefetch_bw4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
5497 else
5498 s->prefetch_bw4 = 0;
5499
5500 #ifdef __DML_VBA_DEBUG__
5501 dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
5502 dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre));
5503 dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips));
5504 dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips));
5505 dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
5506 dml2_printf("DML::%s: Tsw_est2: %f\n", __func__, s->Tsw_est2);
5507 dml2_printf("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
5508 dml2_printf("DML::%s: prefetch_bw1: %f (final)\n", __func__, s->prefetch_bw1);
5509 dml2_printf("DML::%s: prefetch_bw2: %f (final)\n", __func__, s->prefetch_bw2);
5510 dml2_printf("DML::%s: prefetch_bw3: %f (final)\n", __func__, s->prefetch_bw3);
5511 dml2_printf("DML::%s: prefetch_bw4: %f (final)\n", __func__, s->prefetch_bw4);
5512 #endif
5513 {
5514 bool Case1OK = false;
5515 bool Case2OK = false;
5516 bool Case3OK = false;
5517
5518 // get "equalized" bw among all stages (vm, r0, sw), so based is all 3 stages are just above the latency-based requirement
5519 // so it is not too dis-portionally favor a particular stage, next is either r0 more agressive and next is vm more agressive, the worst is all are agressive
5520 // vs the latency based number
5521
5522 // prefetch_bw1: VM + 2*R0 + SW
5523 // so prefetch_bw1 will have enough bw to transfer the necessary data within Tpre_rounded - Tno_bw (Tpre is the the worst-case latency based time to fetch the data)
5524 // here is to make sure equ bw wont be more agressive than the latency-based requirement.
5525 // check vm time >= vm_trips
5526 // check r0 time >= r0_trips
5527
5528 double total_row_bytes = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes);
5529
5530 dml2_printf("DML::%s: Tvm_trips_rounded = %f\n", __func__, s->Tvm_trips_rounded);
5531 dml2_printf("DML::%s: Tr0_trips_rounded = %f\n", __func__, s->Tr0_trips_rounded);
5532
5533 if (s->prefetch_bw1 > 0) {
5534 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw1;
5535 double row_transfer_time = total_row_bytes / s->prefetch_bw1;
5536 dml2_printf("DML::%s: Case1: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5537 dml2_printf("DML::%s: Case1: row_transfer_time = %f\n", __func__, row_transfer_time);
5538 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5539 Case1OK = true;
5540 }
5541 }
5542
5543 // prefetch_bw2: VM + SW
5544 // prefetch_bw2 will be enough bw to transfer VM and SW data within (Tpre_rounded - Tr0_trips_rounded - Tno_bw)
5545 // check vm time >= vm_trips
5546 // check r0 time < r0_trips
5547 if (s->prefetch_bw2 > 0) {
5548 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw2;
5549 double row_transfer_time = total_row_bytes / s->prefetch_bw2;
5550 dml2_printf("DML::%s: Case2: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5551 dml2_printf("DML::%s: Case2: row_transfer_time = %f\n", __func__, row_transfer_time);
5552 if (vm_transfer_time >= s->Tvm_trips_rounded && row_transfer_time < s->Tr0_trips_rounded) {
5553 Case2OK = true;
5554 }
5555 }
5556
5557 // prefetch_bw3: VM + 2*R0
5558 // check vm time < vm_trips
5559 // check r0 time >= r0_trips
5560 if (s->prefetch_bw3 > 0) {
5561 double vm_transfer_time = *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw3;
5562 double row_transfer_time = total_row_bytes / s->prefetch_bw3;
5563 dml2_printf("DML::%s: Case3: vm_transfer_time = %f\n", __func__, vm_transfer_time);
5564 dml2_printf("DML::%s: Case3: row_transfer_time = %f\n", __func__, row_transfer_time);
5565 if (vm_transfer_time < s->Tvm_trips_rounded && row_transfer_time >= s->Tr0_trips_rounded) {
5566 Case3OK = true;
5567 }
5568 }
5569
5570 if (Case1OK) {
5571 s->prefetch_bw_equ = s->prefetch_bw1;
5572 } else if (Case2OK) {
5573 s->prefetch_bw_equ = s->prefetch_bw2;
5574 } else if (Case3OK) {
5575 s->prefetch_bw_equ = s->prefetch_bw3;
5576 } else {
5577 s->prefetch_bw_equ = s->prefetch_bw4;
5578 }
5579
5580 s->prefetch_bw_equ = math_max3(s->prefetch_bw_equ,
5581 p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw,
5582 (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime));
5583 #ifdef __DML_VBA_DEBUG__
5584 dml2_printf("DML::%s: Case1OK: %u\n", __func__, Case1OK);
5585 dml2_printf("DML::%s: Case2OK: %u\n", __func__, Case2OK);
5586 dml2_printf("DML::%s: Case3OK: %u\n", __func__, Case3OK);
5587 dml2_printf("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
5588 #endif
5589
5590 if (s->prefetch_bw_equ > 0) {
5591 if (p->display_cfg->gpuvm_enable == true) {
5592 s->Tvm_equ = math_max3(*p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, *p->Tvm_trips, s->LineTime / 4);
5593 } else {
5594 s->Tvm_equ = s->LineTime / 4;
5595 }
5596
5597 if (p->display_cfg->gpuvm_enable == true || dcc_mrq_enable || p->setup_for_tdlut) {
5598 s->Tr0_equ = math_max3((p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_equ, // PixelPTEBytesPerRow is dpte_row_bytes
5599 *p->Tr0_trips,
5600 s->LineTime / 4);
5601 } else {
5602 s->Tr0_equ = s->LineTime / 4;
5603 }
5604 } else {
5605 s->Tvm_equ = 0;
5606 s->Tr0_equ = 0;
5607 dml2_printf("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
5608 }
5609 }
5610 #ifdef __DML_VBA_DEBUG__
5611 dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ);
5612 dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ);
5613 #endif
5614 // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank)
5615 s->Lsw_equ = s->dst_y_prefetch_equ - math_ceil2(4.0 * (s->Tvm_equ + 2 * s->Tr0_equ) / s->LineTime, 1.0) / 4.0;
5616
5617 // Use the more stressful prefetch schedule
5618 if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
5619 *p->dst_y_prefetch = s->dst_y_prefetch_oto;
5620 s->TimeForFetchingVM = s->Tvm_oto;
5621 s->TimeForFetchingRowInVBlank = s->Tr0_oto;
5622
5623 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5624 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5625 s->dst_y_per_vm_no_trip_vblank = math_ceil2(4.0 * s->Tvm_no_trip_oto / s->LineTime, 1.0) / 4.0;
5626 s->dst_y_per_row_no_trip_vblank = math_ceil2(4.0 * s->Tr0_no_trip_oto / s->LineTime, 1.0) / 4.0;
5627 #ifdef __DML_VBA_DEBUG__
5628 dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__);
5629 #endif
5630 } else {
5631 *p->dst_y_prefetch = s->dst_y_prefetch_equ;
5632 s->TimeForFetchingVM = s->Tvm_equ;
5633 s->TimeForFetchingRowInVBlank = s->Tr0_equ;
5634
5635 *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0;
5636 *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
5637 s->dst_y_per_vm_no_trip_vblank = *p->dst_y_per_vm_vblank;
5638 s->dst_y_per_row_no_trip_vblank = *p->dst_y_per_row_vblank;
5639
5640 #ifdef __DML_VBA_DEBUG__
5641 dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__);
5642 #endif
5643 }
5644
5645 /* take worst case Lsw to calculate bandwidth requirement regardless of schedule */
5646 s->LinesToRequestPrefetchPixelData = math_min2(s->Lsw_equ, s->Lsw_oto); // Lsw
5647
5648 s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line);
5649 *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime);
5650
5651 #ifdef __DML_VBA_DEBUG__
5652 dml2_printf("DML::%s: TimeForFetchingVM = %f\n", __func__, s->TimeForFetchingVM);
5653 dml2_printf("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
5654 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5655 dml2_printf("DML::%s: dst_y_prefetch = %f\n", __func__, *p->dst_y_prefetch);
5656 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5657 dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5658 dml2_printf("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
5659 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5660
5661 dml2_printf("DML::%s: cursor_bytes_per_chunk = %d\n", __func__, p->cursor_bytes_per_chunk);
5662 dml2_printf("DML::%s: cursor_bytes_per_line = %d\n", __func__, p->cursor_bytes_per_line);
5663 dml2_printf("DML::%s: cursor_prefetch_bytes = %d\n", __func__, s->cursor_prefetch_bytes);
5664 dml2_printf("DML::%s: prefetch_cursor_bw = %f\n", __func__, *p->prefetch_cursor_bw);
5665 #endif
5666 dml2_assert(*p->dst_y_prefetch < 64);
5667
5668 unsigned int min_lsw_required = (unsigned int)math_max2(2, p->tdlut_drain_time / s->LineTime);
5669 if (s->LinesToRequestPrefetchPixelData >= min_lsw_required && s->prefetch_bw_equ > 0) {
5670 *p->VRatioPrefetchY = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
5671 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY, 1.0);
5672 #ifdef __DML_VBA_DEBUG__
5673 dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5674 dml2_printf("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
5675 dml2_printf("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
5676 #endif
5677 if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
5678 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
5679 *p->VRatioPrefetchY = math_max2(*p->VRatioPrefetchY,
5680 (double)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
5681 } else {
5682 s->NoTimeToPrefetch = true;
5683 dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
5684 *p->VRatioPrefetchY = 0;
5685 }
5686 #ifdef __DML_VBA_DEBUG__
5687 dml2_printf("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
5688 dml2_printf("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
5689 dml2_printf("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
5690 #endif
5691 }
5692
5693 *p->VRatioPrefetchC = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
5694 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, 1.0);
5695
5696 #ifdef __DML_VBA_DEBUG__
5697 dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5698 dml2_printf("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
5699 dml2_printf("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
5700 #endif
5701 if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
5702 if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
5703 *p->VRatioPrefetchC = math_max2(*p->VRatioPrefetchC, (double)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
5704 } else {
5705 s->NoTimeToPrefetch = true;
5706 dml2_printf("DML::%s: No time to prefetch!. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
5707 *p->VRatioPrefetchC = 0;
5708 }
5709 #ifdef __DML_VBA_DEBUG__
5710 dml2_printf("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
5711 dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
5712 dml2_printf("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
5713 #endif
5714 }
5715
5716 *p->RequiredPrefetchPixelDataBWLuma = (double)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelY * p->swath_width_luma_ub / s->LineTime;
5717 *p->RequiredPrefetchPixelDataBWChroma = (double)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData * p->myPipe->BytePerPixelC * p->swath_width_chroma_ub / s->LineTime;
5718
5719 #ifdef __DML_VBA_DEBUG__
5720 dml2_printf("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
5721 dml2_printf("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
5722 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5723 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5724 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5725 #endif
5726 } else {
5727 s->NoTimeToPrefetch = true;
5728 dml2_printf("DML::%s: No time to prefetch!, LinesToRequestPrefetchPixelData: %f, should be >= %d\n", __func__, s->LinesToRequestPrefetchPixelData, min_lsw_required);
5729 dml2_printf("DML::%s: No time to prefetch!, prefetch_bw_equ: %f, should be > 0\n", __func__, s->prefetch_bw_equ);
5730 *p->VRatioPrefetchY = 0;
5731 *p->VRatioPrefetchC = 0;
5732 *p->RequiredPrefetchPixelDataBWLuma = 0;
5733 *p->RequiredPrefetchPixelDataBWChroma = 0;
5734 }
5735 dml2_printf("DML: Tpre: %fus - sum of time to request 2 x data pte, swaths\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingVM);
5736 dml2_printf("DML: Tvm: %fus - time to fetch vm\n", s->TimeForFetchingVM);
5737 dml2_printf("DML: Tr0: %fus - time to fetch first row of data pagetables\n", s->TimeForFetchingRowInVBlank);
5738 dml2_printf("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (double)s->LinesToRequestPrefetchPixelData * s->LineTime);
5739 dml2_printf("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime);
5740 dml2_printf("DML: Tvstartup - TSetup - Tcalc - TWait - Tpre - To > 0\n");
5741 dml2_printf("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingVM - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((double)(*p->DSTXAfterScaler) / (double)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
5742 dml2_printf("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);
5743
5744 } else {
5745 dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
5746 dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n",
5747 __func__, min_Lsw_equ_ok, s->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime);
5748 s->NoTimeToPrefetch = true;
5749 s->TimeForFetchingVM = 0;
5750 s->TimeForFetchingRowInVBlank = 0;
5751 *p->dst_y_per_vm_vblank = 0;
5752 *p->dst_y_per_row_vblank = 0;
5753 s->LinesToRequestPrefetchPixelData = 0;
5754 *p->VRatioPrefetchY = 0;
5755 *p->VRatioPrefetchC = 0;
5756 *p->RequiredPrefetchPixelDataBWLuma = 0;
5757 *p->RequiredPrefetchPixelDataBWChroma = 0;
5758 }
5759
5760 {
5761 double prefetch_vm_bw;
5762 double prefetch_row_bw;
5763
5764 if (vm_bytes == 0) {
5765 prefetch_vm_bw = 0;
5766 } else if (s->dst_y_per_vm_no_trip_vblank > 0) {
5767 #ifdef __DML_VBA_DEBUG__
5768 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
5769 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank);
5770 dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime);
5771 #endif
5772 prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (s->dst_y_per_vm_no_trip_vblank * s->LineTime);
5773 #ifdef __DML_VBA_DEBUG__
5774 dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
5775 #endif
5776 } else {
5777 prefetch_vm_bw = 0;
5778 s->NoTimeToPrefetch = true;
5779 dml2_printf("DML::%s: No time to prefetch!. dst_y_per_vm_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_vm_vblank);
5780 }
5781
5782 if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) {
5783 prefetch_row_bw = 0;
5784 } else if (s->dst_y_per_row_no_trip_vblank > 0) {
5785 prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (s->dst_y_per_row_no_trip_vblank * s->LineTime);
5786
5787 #ifdef __DML_VBA_DEBUG__
5788 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
5789 dml2_printf("DML::%s: dst_y_per_row_vblank = %f\n", __func__, *p->dst_y_per_row_vblank);
5790 dml2_printf("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
5791 #endif
5792 } else {
5793 prefetch_row_bw = 0;
5794 s->NoTimeToPrefetch = true;
5795 dml2_printf("DML::%s: No time to prefetch!. dst_y_per_row_vblank=%f (should be > 0)\n", __func__, *p->dst_y_per_row_vblank);
5796 }
5797
5798 *p->prefetch_vmrow_bw = math_max2(prefetch_vm_bw, prefetch_row_bw);
5799 }
5800
5801 if (s->NoTimeToPrefetch) {
5802 s->TimeForFetchingVM = 0;
5803 s->TimeForFetchingRowInVBlank = 0;
5804 *p->dst_y_per_vm_vblank = 0;
5805 *p->dst_y_per_row_vblank = 0;
5806 *p->dst_y_prefetch = 0;
5807 s->LinesToRequestPrefetchPixelData = 0;
5808 *p->VRatioPrefetchY = 0;
5809 *p->VRatioPrefetchC = 0;
5810 *p->RequiredPrefetchPixelDataBWLuma = 0;
5811 *p->RequiredPrefetchPixelDataBWChroma = 0;
5812 *p->prefetch_vmrow_bw = 0;
5813 }
5814
5815 dml2_printf("DML::%s: dst_y_per_vm_vblank = %f (final)\n", __func__, *p->dst_y_per_vm_vblank);
5816 dml2_printf("DML::%s: dst_y_per_row_vblank = %f (final)\n", __func__, *p->dst_y_per_row_vblank);
5817 dml2_printf("DML::%s: prefetch_vmrow_bw = %f (final)\n", __func__, *p->prefetch_vmrow_bw);
5818 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWLuma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWLuma);
5819 dml2_printf("DML::%s: RequiredPrefetchPixelDataBWChroma = %f (final)\n", __func__, *p->RequiredPrefetchPixelDataBWChroma);
5820 dml2_printf("DML::%s: NoTimeToPrefetch=%d\n", __func__, s->NoTimeToPrefetch);
5821
5822 return s->NoTimeToPrefetch;
5823 }
5824
calculate_peak_bandwidth_required(struct dml2_core_internal_scratch * s,struct dml2_core_calcs_calculate_peak_bandwidth_required_params * p)5825 static void calculate_peak_bandwidth_required(
5826 struct dml2_core_internal_scratch *s,
5827 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p)
5828 {
5829 unsigned int n;
5830 unsigned int m;
5831
5832 struct dml2_core_shared_calculate_peak_bandwidth_required_locals *l = &s->calculate_peak_bandwidth_required_locals;
5833
5834 memset(l, 0, sizeof(struct dml2_core_shared_calculate_peak_bandwidth_required_locals));
5835
5836 #ifdef __DML_VBA_DEBUG__
5837 dml2_printf("DML::%s: inc_flip_bw = %d\n", __func__, p->inc_flip_bw);
5838 dml2_printf("DML::%s: NumberOfActiveSurfaces = %d\n", __func__, p->num_active_planes);
5839 #endif
5840
5841 for (unsigned int k = 0; k < p->num_active_planes; ++k) {
5842 l->unity_array[k] = 1.0;
5843 l->zero_array[k] = 0.0;
5844 }
5845
5846 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
5847 for (n = 0; n < dml2_core_internal_bw_max; n++) {
5848 get_urgent_bandwidth_required(
5849 &s->get_urgent_bandwidth_required_locals,
5850 p->display_cfg,
5851 m,
5852 n,
5853 0, //inc_flip_bw,
5854 0, //use_qual_row_bw
5855 p->num_active_planes,
5856 p->num_of_dpp,
5857 p->dcc_dram_bw_nom_overhead_factor_p0,
5858 p->dcc_dram_bw_nom_overhead_factor_p1,
5859 p->dcc_dram_bw_pref_overhead_factor_p0,
5860 p->dcc_dram_bw_pref_overhead_factor_p1,
5861 p->mall_prefetch_sdp_overhead_factor,
5862 p->mall_prefetch_dram_overhead_factor,
5863 p->surface_read_bandwidth_l,
5864 p->surface_read_bandwidth_c,
5865 l->zero_array, //PrefetchBandwidthLuma,
5866 l->zero_array, //PrefetchBandwidthChroma,
5867 l->zero_array,
5868 l->zero_array,
5869 l->zero_array,
5870 p->dpte_row_bw,
5871 p->meta_row_bw,
5872 l->zero_array, //prefetch_cursor_bw,
5873 l->zero_array, //prefetch_vmrow_bw,
5874 l->zero_array, //flip_bw,
5875 l->zero_array,
5876 l->zero_array,
5877 l->zero_array,
5878 l->zero_array,
5879 l->zero_array,
5880 l->zero_array,
5881 p->surface_avg_vactive_required_bw[m][n],
5882 p->surface_peak_required_bw[m][n]);
5883
5884 p->urg_vactive_bandwidth_required[m][n] = get_urgent_bandwidth_required(
5885 &s->get_urgent_bandwidth_required_locals,
5886 p->display_cfg,
5887 m,
5888 n,
5889 0, //inc_flip_bw,
5890 0, //use_qual_row_bw
5891 p->num_active_planes,
5892 p->num_of_dpp,
5893 p->dcc_dram_bw_nom_overhead_factor_p0,
5894 p->dcc_dram_bw_nom_overhead_factor_p1,
5895 p->dcc_dram_bw_pref_overhead_factor_p0,
5896 p->dcc_dram_bw_pref_overhead_factor_p1,
5897 p->mall_prefetch_sdp_overhead_factor,
5898 p->mall_prefetch_dram_overhead_factor,
5899 p->surface_read_bandwidth_l,
5900 p->surface_read_bandwidth_c,
5901 l->zero_array, //PrefetchBandwidthLuma,
5902 l->zero_array, //PrefetchBandwidthChroma,
5903 p->excess_vactive_fill_bw_l,
5904 p->excess_vactive_fill_bw_c,
5905 p->cursor_bw,
5906 p->dpte_row_bw,
5907 p->meta_row_bw,
5908 l->zero_array, //prefetch_cursor_bw,
5909 l->zero_array, //prefetch_vmrow_bw,
5910 l->zero_array, //flip_bw,
5911 p->urgent_burst_factor_l,
5912 p->urgent_burst_factor_c,
5913 p->urgent_burst_factor_cursor,
5914 p->urgent_burst_factor_prefetch_l,
5915 p->urgent_burst_factor_prefetch_c,
5916 p->urgent_burst_factor_prefetch_cursor,
5917 l->surface_dummy_bw,
5918 p->surface_peak_required_bw[m][n]);
5919
5920 p->urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
5921 &s->get_urgent_bandwidth_required_locals,
5922 p->display_cfg,
5923 m,
5924 n,
5925 p->inc_flip_bw,
5926 0, //use_qual_row_bw
5927 p->num_active_planes,
5928 p->num_of_dpp,
5929 p->dcc_dram_bw_nom_overhead_factor_p0,
5930 p->dcc_dram_bw_nom_overhead_factor_p1,
5931 p->dcc_dram_bw_pref_overhead_factor_p0,
5932 p->dcc_dram_bw_pref_overhead_factor_p1,
5933 p->mall_prefetch_sdp_overhead_factor,
5934 p->mall_prefetch_dram_overhead_factor,
5935 p->surface_read_bandwidth_l,
5936 p->surface_read_bandwidth_c,
5937 p->prefetch_bandwidth_l,
5938 p->prefetch_bandwidth_c,
5939 p->excess_vactive_fill_bw_l,
5940 p->excess_vactive_fill_bw_c,
5941 p->cursor_bw,
5942 p->dpte_row_bw,
5943 p->meta_row_bw,
5944 p->prefetch_cursor_bw,
5945 p->prefetch_vmrow_bw,
5946 p->flip_bw,
5947 p->urgent_burst_factor_l,
5948 p->urgent_burst_factor_c,
5949 p->urgent_burst_factor_cursor,
5950 p->urgent_burst_factor_prefetch_l,
5951 p->urgent_burst_factor_prefetch_c,
5952 p->urgent_burst_factor_prefetch_cursor,
5953 l->surface_dummy_bw,
5954 p->surface_peak_required_bw[m][n]);
5955
5956 p->urg_bandwidth_required_qual[m][n] = get_urgent_bandwidth_required(
5957 &s->get_urgent_bandwidth_required_locals,
5958 p->display_cfg,
5959 m,
5960 n,
5961 0, //inc_flip_bw
5962 1, //use_qual_row_bw
5963 p->num_active_planes,
5964 p->num_of_dpp,
5965 p->dcc_dram_bw_nom_overhead_factor_p0,
5966 p->dcc_dram_bw_nom_overhead_factor_p1,
5967 p->dcc_dram_bw_pref_overhead_factor_p0,
5968 p->dcc_dram_bw_pref_overhead_factor_p1,
5969 p->mall_prefetch_sdp_overhead_factor,
5970 p->mall_prefetch_dram_overhead_factor,
5971 p->surface_read_bandwidth_l,
5972 p->surface_read_bandwidth_c,
5973 p->prefetch_bandwidth_l,
5974 p->prefetch_bandwidth_c,
5975 p->excess_vactive_fill_bw_l,
5976 p->excess_vactive_fill_bw_c,
5977 p->cursor_bw,
5978 p->dpte_row_bw,
5979 p->meta_row_bw,
5980 p->prefetch_cursor_bw,
5981 p->prefetch_vmrow_bw,
5982 p->flip_bw,
5983 p->urgent_burst_factor_l,
5984 p->urgent_burst_factor_c,
5985 p->urgent_burst_factor_cursor,
5986 p->urgent_burst_factor_prefetch_l,
5987 p->urgent_burst_factor_prefetch_c,
5988 p->urgent_burst_factor_prefetch_cursor,
5989 l->surface_dummy_bw,
5990 p->surface_peak_required_bw[m][n]);
5991
5992 p->non_urg_bandwidth_required[m][n] = get_urgent_bandwidth_required(
5993 &s->get_urgent_bandwidth_required_locals,
5994 p->display_cfg,
5995 m,
5996 n,
5997 p->inc_flip_bw,
5998 0, //use_qual_row_bw
5999 p->num_active_planes,
6000 p->num_of_dpp,
6001 p->dcc_dram_bw_nom_overhead_factor_p0,
6002 p->dcc_dram_bw_nom_overhead_factor_p1,
6003 p->dcc_dram_bw_pref_overhead_factor_p0,
6004 p->dcc_dram_bw_pref_overhead_factor_p1,
6005 p->mall_prefetch_sdp_overhead_factor,
6006 p->mall_prefetch_dram_overhead_factor,
6007 p->surface_read_bandwidth_l,
6008 p->surface_read_bandwidth_c,
6009 p->prefetch_bandwidth_l,
6010 p->prefetch_bandwidth_c,
6011 p->excess_vactive_fill_bw_l,
6012 p->excess_vactive_fill_bw_c,
6013 p->cursor_bw,
6014 p->dpte_row_bw,
6015 p->meta_row_bw,
6016 p->prefetch_cursor_bw,
6017 p->prefetch_vmrow_bw,
6018 p->flip_bw,
6019 l->unity_array,
6020 l->unity_array,
6021 l->unity_array,
6022 l->unity_array,
6023 l->unity_array,
6024 l->unity_array,
6025 l->surface_dummy_bw,
6026 p->surface_peak_required_bw[m][n]);
6027
6028 #ifdef __DML_VBA_DEBUG__
6029 dml2_printf("DML::%s: urg_vactive_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_vactive_bandwidth_required[m][n]);
6030 dml2_printf("DML::%s: urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6031 dml2_printf("DML::%s: urg_bandwidth_required_qual[%s][%s]=%f\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->urg_bandwidth_required[m][n]);
6032 dml2_printf("DML::%s: non_urg_bandwidth_required%s[%s][%s]=%f\n", __func__, (p->inc_flip_bw ? "_flip" : ""), dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n), p->non_urg_bandwidth_required[m][n]);
6033 #endif
6034 dml2_assert(p->urg_bandwidth_required[m][n] >= p->non_urg_bandwidth_required[m][n]);
6035 }
6036 }
6037 }
6038
check_urgent_bandwidth_support(double * frac_urg_bandwidth_nom,double * frac_urg_bandwidth_mall,bool * vactive_bandwidth_support_ok,bool * bandwidth_support_ok,unsigned int mall_allocated_for_dcn_mbytes,double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6039 static void check_urgent_bandwidth_support(
6040 double *frac_urg_bandwidth_nom,
6041 double *frac_urg_bandwidth_mall,
6042 bool *vactive_bandwidth_support_ok, // vactive ok
6043 bool *bandwidth_support_ok, // max of vm, prefetch, vactive all ok
6044
6045 unsigned int mall_allocated_for_dcn_mbytes,
6046 double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6047 double urg_vactive_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6048 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6049 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6050 {
6051 double frac_urg_bandwidth_nom_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6052 double frac_urg_bandwidth_nom_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6053 double frac_urg_bandwidth_mall_sdp;
6054 double frac_urg_bandwidth_mall_dram;
6055 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] > 0)
6056 frac_urg_bandwidth_mall_sdp = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6057 else
6058 frac_urg_bandwidth_mall_sdp = 0.0;
6059 if (urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] > 0)
6060 frac_urg_bandwidth_mall_dram = non_urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] / urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6061 else
6062 frac_urg_bandwidth_mall_dram = 0.0;
6063
6064 *bandwidth_support_ok = 1;
6065 *vactive_bandwidth_support_ok = 1;
6066
6067 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp -> FractionOfUrgentBandwidth
6068 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram
6069 // Check urgent bandwidth required at sdp vs urgent bandwidth avail at sdp, svp_prefetch -> FractionOfUrgentBandwidthMALL
6070 // Check urgent bandwidth required at dram vs urgent bandwidth avail at dram, svp_prefetch
6071
6072 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6073 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6074
6075 if (mall_allocated_for_dcn_mbytes > 0) {
6076 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6077 *bandwidth_support_ok &= urg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6078 }
6079
6080 *frac_urg_bandwidth_nom = math_max2(frac_urg_bandwidth_nom_sdp, frac_urg_bandwidth_nom_dram);
6081 *frac_urg_bandwidth_mall = math_max2(frac_urg_bandwidth_mall_sdp, frac_urg_bandwidth_mall_dram);
6082
6083 *bandwidth_support_ok &= (*frac_urg_bandwidth_nom <= 1.0);
6084
6085 if (mall_allocated_for_dcn_mbytes > 0)
6086 *bandwidth_support_ok &= (*frac_urg_bandwidth_mall <= 1.0);
6087
6088 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp];
6089 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_dram];
6090 if (mall_allocated_for_dcn_mbytes > 0) {
6091 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_sdp];
6092 *vactive_bandwidth_support_ok &= urg_vactive_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram] <= urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram];
6093 }
6094
6095 #ifdef __DML_VBA_DEBUG__
6096 dml2_printf("DML::%s: frac_urg_bandwidth_nom_sdp = %f\n", __func__, frac_urg_bandwidth_nom_sdp);
6097 dml2_printf("DML::%s: frac_urg_bandwidth_nom_dram = %f\n", __func__, frac_urg_bandwidth_nom_dram);
6098 dml2_printf("DML::%s: frac_urg_bandwidth_nom = %f\n", __func__, *frac_urg_bandwidth_nom);
6099
6100 dml2_printf("DML::%s: frac_urg_bandwidth_mall_sdp = %f\n", __func__, frac_urg_bandwidth_mall_sdp);
6101 dml2_printf("DML::%s: frac_urg_bandwidth_mall_dram = %f\n", __func__, frac_urg_bandwidth_mall_dram);
6102 dml2_printf("DML::%s: frac_urg_bandwidth_mall = %f\n", __func__, *frac_urg_bandwidth_mall);
6103 dml2_printf("DML::%s: bandwidth_support_ok = %d\n", __func__, *bandwidth_support_ok);
6104
6105 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6106 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6107 dml2_printf("DML::%s: state:%s bw_type:%s urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6108 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6109 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required[m][n]) ? "<" : ">=", urg_bandwidth_required[m][n]);
6110 }
6111 }
6112 #endif
6113
6114 }
6115
get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6116 static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state,
6117 double urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], // no flip
6118 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6119 {
6120 double flip_bw_available_mbps;
6121 double flip_bw_available_sdp_mbps;
6122 double flip_bw_available_dram_mbps;
6123
6124 flip_bw_available_sdp_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp];
6125 flip_bw_available_dram_mbps = urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram] - urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram];
6126 flip_bw_available_mbps = flip_bw_available_sdp_mbps < flip_bw_available_dram_mbps ? flip_bw_available_sdp_mbps : flip_bw_available_dram_mbps;
6127
6128 #ifdef __DML_VBA_DEBUG__
6129 dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6130 dml2_printf("DML::%s: urg_bandwidth_available_sdp_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp]);
6131 dml2_printf("DML::%s: urg_bandwidth_available_dram_mbps = %f\n", __func__, urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram]);
6132 dml2_printf("DML::%s: urg_bandwidth_required_sdp_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_sdp]);
6133 dml2_printf("DML::%s: urg_bandwidth_required_dram_mbps = %f\n", __func__, urg_bandwidth_required[eval_state][dml2_core_internal_bw_dram]);
6134 dml2_printf("DML::%s: flip_bw_available_sdp_mbps = %f\n", __func__, flip_bw_available_sdp_mbps);
6135 dml2_printf("DML::%s: flip_bw_available_dram_mbps = %f\n", __func__, flip_bw_available_dram_mbps);
6136 dml2_printf("DML::%s: flip_bw_available_mbps = %f\n", __func__, flip_bw_available_mbps);
6137 #endif
6138
6139 return flip_bw_available_mbps;
6140 }
6141
calculate_immediate_flip_bandwidth_support(double * frac_urg_bandwidth_flip,bool * flip_bandwidth_support_ok,enum dml2_core_internal_soc_state_type eval_state,double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])6142 static void calculate_immediate_flip_bandwidth_support(
6143 // Output
6144 double *frac_urg_bandwidth_flip,
6145 bool *flip_bandwidth_support_ok,
6146
6147 // Input
6148 enum dml2_core_internal_soc_state_type eval_state,
6149 double urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6150 double non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max],
6151 double urg_bandwidth_available[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max])
6152 {
6153 double frac_urg_bw_flip_sdp = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_sdp] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_sdp];
6154 double frac_urg_bw_flip_dram = non_urg_bandwidth_required_flip[eval_state][dml2_core_internal_bw_dram] / urg_bandwidth_available[eval_state][dml2_core_internal_bw_dram];
6155
6156 *flip_bandwidth_support_ok = true;
6157 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
6158 *flip_bandwidth_support_ok &= urg_bandwidth_available[eval_state][n] >= urg_bandwidth_required_flip[eval_state][n];
6159
6160 #ifdef __DML_VBA_DEBUG__
6161 dml2_printf("DML::%s: n = %s\n", __func__, dml2_core_internal_bw_type_str(n));
6162 dml2_printf("DML::%s: urg_bandwidth_available = %f\n", __func__, urg_bandwidth_available[eval_state][n]);
6163 dml2_printf("DML::%s: non_urg_bandwidth_required_flip = %f\n", __func__, non_urg_bandwidth_required_flip[eval_state][n]);
6164 dml2_printf("DML::%s: urg_bandwidth_required_flip = %f\n", __func__, urg_bandwidth_required_flip[eval_state][n]);
6165 dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6166 #endif
6167 dml2_assert(urg_bandwidth_required_flip[eval_state][n] >= non_urg_bandwidth_required_flip[eval_state][n]);
6168 }
6169
6170 *frac_urg_bandwidth_flip = (frac_urg_bw_flip_sdp > frac_urg_bw_flip_dram) ? frac_urg_bw_flip_sdp : frac_urg_bw_flip_dram;
6171 *flip_bandwidth_support_ok &= (*frac_urg_bandwidth_flip <= 1.0);
6172
6173 #ifdef __DML_VBA_DEBUG__
6174 dml2_printf("DML::%s: eval_state = %s\n", __func__, dml2_core_internal_soc_state_type_str(eval_state));
6175 dml2_printf("DML::%s: frac_urg_bw_flip_sdp = %f\n", __func__, frac_urg_bw_flip_sdp);
6176 dml2_printf("DML::%s: frac_urg_bw_flip_dram = %f\n", __func__, frac_urg_bw_flip_dram);
6177 dml2_printf("DML::%s: frac_urg_bandwidth_flip = %f\n", __func__, *frac_urg_bandwidth_flip);
6178 dml2_printf("DML::%s: flip_bandwidth_support_ok = %d\n", __func__, *flip_bandwidth_support_ok);
6179
6180 for (unsigned int m = 0; m < dml2_core_internal_soc_state_max; m++) {
6181 for (unsigned int n = 0; n < dml2_core_internal_bw_max; n++) {
6182 dml2_printf("DML::%s: state:%s bw_type:%s, urg_bandwidth_available=%f %s urg_bandwidth_required=%f\n",
6183 __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n),
6184 urg_bandwidth_available[m][n], (urg_bandwidth_available[m][n] < urg_bandwidth_required_flip[m][n]) ? "<" : ">=", urg_bandwidth_required_flip[m][n]);
6185 }
6186 }
6187 #endif
6188 }
6189
CalculateFlipSchedule(struct dml2_core_internal_scratch * s,bool iflip_enable,bool use_lb_flip_bw,double HostVMInefficiencyFactor,double Tvm_trips_flip,double Tr0_trips_flip,double Tvm_trips_flip_rounded,double Tr0_trips_flip_rounded,bool GPUVMEnable,double vm_bytes,double DPTEBytesPerRow,double BandwidthAvailableForImmediateFlip,unsigned int TotImmediateFlipBytes,enum dml2_source_format_class SourcePixelFormat,double LineTime,double VRatio,double VRatioChroma,double Tno_bw_flip,unsigned int dpte_row_height,unsigned int dpte_row_height_chroma,bool use_one_row_for_frame_flip,unsigned int max_flip_time_us,unsigned int max_flip_time_lines,unsigned int per_pipe_flip_bytes,unsigned int meta_row_bytes,unsigned int meta_row_height,unsigned int meta_row_height_chroma,bool dcc_mrq_enable,double * dst_y_per_vm_flip,double * dst_y_per_row_flip,double * final_flip_bw,bool * ImmediateFlipSupportedForPipe)6190 static void CalculateFlipSchedule(
6191 struct dml2_core_internal_scratch *s,
6192 bool iflip_enable,
6193 bool use_lb_flip_bw,
6194 double HostVMInefficiencyFactor,
6195 double Tvm_trips_flip,
6196 double Tr0_trips_flip,
6197 double Tvm_trips_flip_rounded,
6198 double Tr0_trips_flip_rounded,
6199 bool GPUVMEnable,
6200 double vm_bytes, // vm_bytes
6201 double DPTEBytesPerRow, // dpte_row_bytes
6202 double BandwidthAvailableForImmediateFlip,
6203 unsigned int TotImmediateFlipBytes,
6204 enum dml2_source_format_class SourcePixelFormat,
6205 double LineTime,
6206 double VRatio,
6207 double VRatioChroma,
6208 double Tno_bw_flip,
6209 unsigned int dpte_row_height,
6210 unsigned int dpte_row_height_chroma,
6211 bool use_one_row_for_frame_flip,
6212 unsigned int max_flip_time_us,
6213 unsigned int max_flip_time_lines,
6214 unsigned int per_pipe_flip_bytes,
6215 unsigned int meta_row_bytes,
6216 unsigned int meta_row_height,
6217 unsigned int meta_row_height_chroma,
6218 bool dcc_mrq_enable,
6219
6220 // Output
6221 double *dst_y_per_vm_flip,
6222 double *dst_y_per_row_flip,
6223 double *final_flip_bw,
6224 bool *ImmediateFlipSupportedForPipe)
6225 {
6226 struct dml2_core_shared_CalculateFlipSchedule_locals *l = &s->CalculateFlipSchedule_locals;
6227
6228 l->dual_plane = dml_is_420(SourcePixelFormat) || SourcePixelFormat == dml2_rgbe_alpha;
6229 l->dpte_row_bytes = DPTEBytesPerRow;
6230
6231 #ifdef __DML_VBA_DEBUG__
6232 dml2_printf("DML::%s: GPUVMEnable = %u\n", __func__, GPUVMEnable);
6233 dml2_printf("DML::%s: ip.max_flip_time_us = %d\n", __func__, max_flip_time_us);
6234 dml2_printf("DML::%s: ip.max_flip_time_lines = %d\n", __func__, max_flip_time_lines);
6235 dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6236 dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, TotImmediateFlipBytes);
6237 dml2_printf("DML::%s: use_lb_flip_bw = %u\n", __func__, use_lb_flip_bw);
6238 dml2_printf("DML::%s: iflip_enable = %u\n", __func__, iflip_enable);
6239 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor);
6240 dml2_printf("DML::%s: LineTime = %f\n", __func__, LineTime);
6241 dml2_printf("DML::%s: Tno_bw_flip = %f\n", __func__, Tno_bw_flip);
6242 dml2_printf("DML::%s: Tvm_trips_flip = %f\n", __func__, Tvm_trips_flip);
6243 dml2_printf("DML::%s: Tr0_trips_flip = %f\n", __func__, Tr0_trips_flip);
6244 dml2_printf("DML::%s: Tvm_trips_flip_rounded = %f\n", __func__, Tvm_trips_flip_rounded);
6245 dml2_printf("DML::%s: Tr0_trips_flip_rounded = %f\n", __func__, Tr0_trips_flip_rounded);
6246 dml2_printf("DML::%s: vm_bytes = %f\n", __func__, vm_bytes);
6247 dml2_printf("DML::%s: DPTEBytesPerRow = %f\n", __func__, DPTEBytesPerRow);
6248 dml2_printf("DML::%s: meta_row_bytes = %d\n", __func__, meta_row_bytes);
6249 dml2_printf("DML::%s: dpte_row_bytes = %f\n", __func__, l->dpte_row_bytes);
6250 dml2_printf("DML::%s: dpte_row_height = %d\n", __func__, dpte_row_height);
6251 dml2_printf("DML::%s: meta_row_height = %d\n", __func__, meta_row_height);
6252 dml2_printf("DML::%s: VRatio = %f\n", __func__, VRatio);
6253 #endif
6254
6255 if (TotImmediateFlipBytes > 0 && (GPUVMEnable || dcc_mrq_enable)) {
6256 if (l->dual_plane) {
6257 if (dcc_mrq_enable & GPUVMEnable) {
6258 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6259 l->min_row_height_chroma = math_min2(dpte_row_height_chroma, meta_row_height_chroma);
6260 } else if (GPUVMEnable) {
6261 l->min_row_height = dpte_row_height;
6262 l->min_row_height_chroma = dpte_row_height_chroma;
6263 } else {
6264 l->min_row_height = meta_row_height;
6265 l->min_row_height_chroma = meta_row_height_chroma;
6266 }
6267 l->min_row_time = math_min2(l->min_row_height * LineTime / VRatio, l->min_row_height_chroma * LineTime / VRatioChroma);
6268 } else {
6269 if (dcc_mrq_enable & GPUVMEnable)
6270 l->min_row_height = math_min2(dpte_row_height, meta_row_height);
6271 else if (GPUVMEnable)
6272 l->min_row_height = dpte_row_height;
6273 else
6274 l->min_row_height = meta_row_height;
6275
6276 l->min_row_time = l->min_row_height * LineTime / VRatio;
6277 }
6278 #ifdef __DML_VBA_DEBUG__
6279 dml2_printf("DML::%s: min_row_time = %f\n", __func__, l->min_row_time);
6280 #endif
6281 dml2_assert(l->min_row_time > 0);
6282
6283 if (use_lb_flip_bw) {
6284 // For mode check, calculation the flip bw requirement with worst case flip time
6285 l->max_flip_time = math_min2(math_min2(l->min_row_time, (double)max_flip_time_lines * LineTime / VRatio),
6286 math_max2(Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded, (double)max_flip_time_us));
6287
6288 //The lower bound on flip bandwidth
6289 // Note: The get_urgent_bandwidth_required already consider dpte_row_bw and meta_row_bw in bandwidth calculation, so leave final_flip_bw = 0 if iflip not required
6290 l->lb_flip_bw = 0;
6291
6292 if (iflip_enable) {
6293 l->hvm_scaled_vm_bytes = vm_bytes * HostVMInefficiencyFactor;
6294 l->num_rows = 2;
6295 l->hvm_scaled_row_bytes = (l->num_rows * l->dpte_row_bytes * HostVMInefficiencyFactor + l->num_rows * meta_row_bytes);
6296 l->hvm_scaled_vm_row_bytes = l->hvm_scaled_vm_bytes + l->hvm_scaled_row_bytes;
6297 l->lb_flip_bw = math_max3(
6298 l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip),
6299 l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded),
6300 l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6301 #ifdef __DML_VBA_DEBUG__
6302 dml2_printf("DML::%s: max_flip_time = %f\n", __func__, l->max_flip_time);
6303 dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_bytes);
6304 dml2_printf("DML::%s: total row bytes (%d row, hvm ineff scaled) = %f\n", __func__, l->num_rows, l->hvm_scaled_row_bytes);
6305 dml2_printf("DML::%s: total vm+row bytes (hvm ineff scaled) = %f\n", __func__, l->hvm_scaled_vm_row_bytes);
6306 dml2_printf("DML::%s: lb_flip_bw for vm and row = %f\n", __func__, l->hvm_scaled_vm_row_bytes / (l->max_flip_time - Tno_bw_flip));
6307 dml2_printf("DML::%s: lb_flip_bw for vm = %f\n", __func__, l->hvm_scaled_vm_bytes / (l->max_flip_time - Tno_bw_flip - 2 * Tr0_trips_flip_rounded));
6308 dml2_printf("DML::%s: lb_flip_bw for row = %f\n", __func__, l->hvm_scaled_row_bytes / (l->max_flip_time - Tvm_trips_flip_rounded));
6309
6310 if (l->lb_flip_bw > 0) {
6311 dml2_printf("DML::%s: mode_support est Tvm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw);
6312 dml2_printf("DML::%s: mode_support est Tr0_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / l->num_rows);
6313 dml2_printf("DML::%s: mode_support est dst_y_per_vm_flip = %f (bw-based)\n", __func__, Tno_bw_flip + l->hvm_scaled_vm_bytes / l->lb_flip_bw / LineTime);
6314 dml2_printf("DML::%s: mode_support est dst_y_per_row_flip = %f (bw-based)\n", __func__, l->hvm_scaled_row_bytes / l->lb_flip_bw / LineTime / l->num_rows);
6315 dml2_printf("DML::%s: Tvm_trips_flip_rounded + 2*Tr0_trips_flip_rounded = %f\n", __func__, (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded));
6316 }
6317 #endif
6318 l->lb_flip_bw = math_max3(l->lb_flip_bw,
6319 l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip,
6320 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6321
6322 #ifdef __DML_VBA_DEBUG__
6323 dml2_printf("DML::%s: lb_flip_bw for vm reg limit = %f\n", __func__, l->hvm_scaled_vm_bytes / (31 * LineTime) - Tno_bw_flip);
6324 dml2_printf("DML::%s: lb_flip_bw for row reg limit = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (15 * LineTime));
6325 #endif
6326 }
6327
6328 *final_flip_bw = l->lb_flip_bw;
6329
6330 *dst_y_per_vm_flip = 1; // not used
6331 *dst_y_per_row_flip = 1; // not used
6332 *ImmediateFlipSupportedForPipe = l->min_row_time >= (Tvm_trips_flip_rounded + 2 * Tr0_trips_flip_rounded);
6333 } else {
6334 if (iflip_enable) {
6335 l->ImmediateFlipBW = (double)per_pipe_flip_bytes * BandwidthAvailableForImmediateFlip / (double)TotImmediateFlipBytes; // flip_bw(i)
6336 double portion = (double)per_pipe_flip_bytes / (double)TotImmediateFlipBytes;
6337
6338 #ifdef __DML_VBA_DEBUG__
6339 dml2_printf("DML::%s: per_pipe_flip_bytes = %d\n", __func__, per_pipe_flip_bytes);
6340 dml2_printf("DML::%s: BandwidthAvailableForImmediateFlip = %f\n", __func__, BandwidthAvailableForImmediateFlip);
6341 dml2_printf("DML::%s: ImmediateFlipBW = %f\n", __func__, l->ImmediateFlipBW);
6342 dml2_printf("DML::%s: portion of flip bw = %f\n", __func__, portion);
6343 #endif
6344 if (l->ImmediateFlipBW == 0) {
6345 l->Tvm_flip = 0;
6346 l->Tr0_flip = 0;
6347 } else {
6348 l->Tvm_flip = math_max3(Tvm_trips_flip,
6349 Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW,
6350 LineTime / 4.0);
6351
6352 l->Tr0_flip = math_max3(Tr0_trips_flip,
6353 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW,
6354 LineTime / 4.0);
6355 }
6356 #ifdef __DML_VBA_DEBUG__
6357 dml2_printf("DML::%s: total vm bytes (hvm ineff scaled) = %f\n", __func__, vm_bytes * HostVMInefficiencyFactor);
6358 dml2_printf("DML::%s: total row bytes (hvm ineff scaled, one row) = %f\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes));
6359
6360 dml2_printf("DML::%s: Tvm_flip = %f (bw-based), Tvm_trips_flip = %f (latency-based)\n", __func__, Tno_bw_flip + vm_bytes * HostVMInefficiencyFactor / l->ImmediateFlipBW, Tvm_trips_flip);
6361 dml2_printf("DML::%s: Tr0_flip = %f (bw-based), Tr0_trips_flip = %f (latency-based)\n", __func__, (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / l->ImmediateFlipBW, Tr0_trips_flip);
6362 #endif
6363 *dst_y_per_vm_flip = math_ceil2(4.0 * (l->Tvm_flip / LineTime), 1.0) / 4.0;
6364 *dst_y_per_row_flip = math_ceil2(4.0 * (l->Tr0_flip / LineTime), 1.0) / 4.0;
6365
6366 *final_flip_bw = math_max2(vm_bytes * HostVMInefficiencyFactor / (*dst_y_per_vm_flip * LineTime),
6367 (l->dpte_row_bytes * HostVMInefficiencyFactor + meta_row_bytes) / (*dst_y_per_row_flip * LineTime));
6368
6369 if (*dst_y_per_vm_flip >= 32 || *dst_y_per_row_flip >= 16 || l->Tvm_flip + 2 * l->Tr0_flip > l->min_row_time) {
6370 *ImmediateFlipSupportedForPipe = false;
6371 } else {
6372 *ImmediateFlipSupportedForPipe = iflip_enable;
6373 }
6374 } else {
6375 l->Tvm_flip = 0;
6376 l->Tr0_flip = 0;
6377 *dst_y_per_vm_flip = 0;
6378 *dst_y_per_row_flip = 0;
6379 *final_flip_bw = 0;
6380 *ImmediateFlipSupportedForPipe = iflip_enable;
6381 }
6382 }
6383 } else {
6384 l->Tvm_flip = 0;
6385 l->Tr0_flip = 0;
6386 *dst_y_per_vm_flip = 0;
6387 *dst_y_per_row_flip = 0;
6388 *final_flip_bw = 0;
6389 *ImmediateFlipSupportedForPipe = iflip_enable;
6390 }
6391
6392 #ifdef __DML_VBA_DEBUG__
6393 if (!use_lb_flip_bw) {
6394 dml2_printf("DML::%s: dst_y_per_vm_flip = %f (should be < 32)\n", __func__, *dst_y_per_vm_flip);
6395 dml2_printf("DML::%s: dst_y_per_row_flip = %f (should be < 16)\n", __func__, *dst_y_per_row_flip);
6396 dml2_printf("DML::%s: Tvm_flip = %f (final)\n", __func__, l->Tvm_flip);
6397 dml2_printf("DML::%s: Tr0_flip = %f (final)\n", __func__, l->Tr0_flip);
6398 dml2_printf("DML::%s: Tvm_flip + 2*Tr0_flip = %f (should be <= min_row_time=%f)\n", __func__, l->Tvm_flip + 2 * l->Tr0_flip, l->min_row_time);
6399 }
6400 dml2_printf("DML::%s: final_flip_bw = %f\n", __func__, *final_flip_bw);
6401 dml2_printf("DML::%s: ImmediateFlipSupportedForPipe = %u\n", __func__, *ImmediateFlipSupportedForPipe);
6402 #endif
6403 }
6404
CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params * p)6405 static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
6406 struct dml2_core_internal_scratch *scratch,
6407 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *p)
6408 {
6409 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals *s = &scratch->CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals;
6410
6411 enum dml2_uclk_pstate_change_strategy uclk_pstate_change_strategy;
6412 double reserved_vblank_time_us;
6413 bool FoundCriticalSurface = false;
6414
6415 s->TotalActiveWriteback = 0;
6416 p->Watermark->UrgentWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency;
6417
6418 #ifdef __DML_VBA_DEBUG__
6419 dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6420 #endif
6421
6422 p->Watermark->USRRetrainingWatermark = p->mmSOCParameters.UrgentLatency + p->mmSOCParameters.ExtraLatency + p->mmSOCParameters.USRRetrainingLatency + p->mmSOCParameters.SMNLatency;
6423 p->Watermark->DRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->Watermark->UrgentWatermark;
6424 p->Watermark->FCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->Watermark->UrgentWatermark;
6425 p->Watermark->StutterExitWatermark = p->mmSOCParameters.SRExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6426 p->Watermark->StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitTime + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6427 p->Watermark->Z8StutterExitWatermark = p->mmSOCParameters.SRExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6428 p->Watermark->Z8StutterEnterPlusExitWatermark = p->mmSOCParameters.SREnterPlusExitZ8Time + p->mmSOCParameters.ExtraLatency_sr + 10 / p->DCFClkDeepSleep;
6429 if (p->mmSOCParameters.qos_type == dml2_qos_param_type_dcn4x) {
6430 p->Watermark->StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6431 p->Watermark->StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6432 p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6433 p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us;
6434 }
6435 p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark;
6436
6437 #ifdef __DML_VBA_DEBUG__
6438 dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency);
6439 dml2_printf("DML::%s: ExtraLatency = %f\n", __func__, p->mmSOCParameters.ExtraLatency);
6440 dml2_printf("DML::%s: DRAMClockChangeLatency = %f\n", __func__, p->mmSOCParameters.DRAMClockChangeLatency);
6441 dml2_printf("DML::%s: SREnterPlusExitZ8Time = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitZ8Time);
6442 dml2_printf("DML::%s: SREnterPlusExitTime = %f\n", __func__, p->mmSOCParameters.SREnterPlusExitTime);
6443 dml2_printf("DML::%s: UrgentWatermark = %f\n", __func__, p->Watermark->UrgentWatermark);
6444 dml2_printf("DML::%s: USRRetrainingWatermark = %f\n", __func__, p->Watermark->USRRetrainingWatermark);
6445 dml2_printf("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, p->Watermark->DRAMClockChangeWatermark);
6446 dml2_printf("DML::%s: FCLKChangeWatermark = %f\n", __func__, p->Watermark->FCLKChangeWatermark);
6447 dml2_printf("DML::%s: StutterExitWatermark = %f\n", __func__, p->Watermark->StutterExitWatermark);
6448 dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark);
6449 dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark);
6450 dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark);
6451 dml2_printf("DML::%s: g6_temp_read_watermark_us = %f\n", __func__, p->Watermark->g6_temp_read_watermark_us);
6452 #endif
6453
6454 s->TotalActiveWriteback = 0;
6455 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6456 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
6457 s->TotalActiveWriteback = s->TotalActiveWriteback + 1;
6458 }
6459 }
6460
6461 if (s->TotalActiveWriteback <= 1) {
6462 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency;
6463 } else {
6464 p->Watermark->WritebackUrgentWatermark = p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6465 }
6466 if (p->USRRetrainingRequired)
6467 p->Watermark->WritebackUrgentWatermark = p->Watermark->WritebackUrgentWatermark + p->mmSOCParameters.USRRetrainingLatency;
6468
6469 if (s->TotalActiveWriteback <= 1) {
6470 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency;
6471 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency;
6472 } else {
6473 p->Watermark->WritebackDRAMClockChangeWatermark = p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024.0 / 32.0 / p->SOCCLK;
6474 p->Watermark->WritebackFCLKChangeWatermark = p->mmSOCParameters.FCLKChangeLatency + p->mmSOCParameters.WritebackLatency + p->WritebackChunkSize * 1024 / 32 / p->SOCCLK;
6475 }
6476
6477 if (p->USRRetrainingRequired)
6478 p->Watermark->WritebackDRAMClockChangeWatermark = p->Watermark->WritebackDRAMClockChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6479
6480 if (p->USRRetrainingRequired)
6481 p->Watermark->WritebackFCLKChangeWatermark = p->Watermark->WritebackFCLKChangeWatermark + p->mmSOCParameters.USRRetrainingLatency;
6482
6483 #ifdef __DML_VBA_DEBUG__
6484 dml2_printf("DML::%s: WritebackDRAMClockChangeWatermark = %f\n", __func__, p->Watermark->WritebackDRAMClockChangeWatermark);
6485 dml2_printf("DML::%s: WritebackFCLKChangeWatermark = %f\n", __func__, p->Watermark->WritebackFCLKChangeWatermark);
6486 dml2_printf("DML::%s: WritebackUrgentWatermark = %f\n", __func__, p->Watermark->WritebackUrgentWatermark);
6487 dml2_printf("DML::%s: USRRetrainingRequired = %u\n", __func__, p->USRRetrainingRequired);
6488 dml2_printf("DML::%s: USRRetrainingLatency = %f\n", __func__, p->mmSOCParameters.USRRetrainingLatency);
6489 #endif
6490
6491 s->TotalPixelBW = 0.0;
6492 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6493 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6494 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6495 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6496 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6497 s->TotalPixelBW = s->TotalPixelBW + p->DPPPerSurface[k]
6498 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio + p->SwathWidthC[k] * p->BytePerPixelDETC[k] * v_ratio_c) / (h_total / pixel_clock_mhz);
6499 }
6500
6501 *p->global_fclk_change_supported = true;
6502 *p->global_dram_clock_change_supported = true;
6503
6504 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6505 double h_total = (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
6506 double pixel_clock_mhz = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000.0;
6507 double v_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
6508 double v_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
6509 double v_taps = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
6510 double v_taps_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
6511 double h_ratio = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio;
6512 double h_ratio_c = p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio;
6513 double LBBitPerPixel = 57;
6514
6515 s->LBLatencyHidingSourceLinesY[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthY[k] / math_max2(h_ratio, 1.0)), 1)) - (v_taps - 1));
6516 s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1));
6517
6518 #ifdef __DML_VBA_DEBUG__
6519 dml2_printf("DML::%s: k=%u, MaxLineBufferLines= %u\n", __func__, k, p->MaxLineBufferLines);
6520 dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize);
6521 dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel);
6522 dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio);
6523 dml2_printf("DML::%s: k=%u, VTaps = %f\n", __func__, k, v_taps);
6524 #endif
6525
6526 s->EffectiveLBLatencyHidingY = s->LBLatencyHidingSourceLinesY[k] / v_ratio * (h_total / pixel_clock_mhz);
6527 s->EffectiveLBLatencyHidingC = s->LBLatencyHidingSourceLinesC[k] / v_ratio_c * (h_total / pixel_clock_mhz);
6528
6529 s->EffectiveDETBufferSizeY = p->DETBufferSizeY[k];
6530 if (p->UnboundedRequestEnabled) {
6531 s->EffectiveDETBufferSizeY = s->EffectiveDETBufferSizeY + p->CompressedBufferSizeInkByte * 1024 * (p->SwathWidthY[k] * p->BytePerPixelDETY[k] * v_ratio) / (h_total / pixel_clock_mhz) / s->TotalPixelBW;
6532 }
6533
6534 s->LinesInDETY[k] = (double)s->EffectiveDETBufferSizeY / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
6535 s->LinesInDETYRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETY[k], p->SwathHeightY[k]));
6536 s->FullDETBufferingTimeY = s->LinesInDETYRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio;
6537
6538 s->ActiveClockChangeLatencyHidingY = s->EffectiveLBLatencyHidingY + s->FullDETBufferingTimeY - ((double)p->DSTXAfterScaler[k] / h_total + (double)p->DSTYAfterScaler[k]) * h_total / pixel_clock_mhz;
6539
6540 if (p->NumberOfActiveSurfaces > 1) {
6541 s->ActiveClockChangeLatencyHidingY = s->ActiveClockChangeLatencyHidingY - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightY[k] * (double)h_total / pixel_clock_mhz / v_ratio;
6542 }
6543
6544 if (p->BytePerPixelDETC[k] > 0) {
6545 s->LinesInDETC[k] = p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k];
6546 s->LinesInDETCRoundedDownToSwath[k] = (unsigned int)(math_floor2(s->LinesInDETC[k], p->SwathHeightC[k]));
6547 s->FullDETBufferingTimeC = s->LinesInDETCRoundedDownToSwath[k] * (h_total / pixel_clock_mhz) / v_ratio_c;
6548 s->ActiveClockChangeLatencyHidingC = s->EffectiveLBLatencyHidingC + s->FullDETBufferingTimeC - ((double)p->DSTXAfterScaler[k] / (double)h_total + (double)p->DSTYAfterScaler[k]) * (double)h_total / pixel_clock_mhz;
6549 if (p->NumberOfActiveSurfaces > 1) {
6550 s->ActiveClockChangeLatencyHidingC = s->ActiveClockChangeLatencyHidingC - (1.0 - 1.0 / (double)p->NumberOfActiveSurfaces) * (double)p->SwathHeightC[k] * (double)h_total / pixel_clock_mhz / v_ratio_c;
6551 }
6552 s->ActiveClockChangeLatencyHiding = math_min2(s->ActiveClockChangeLatencyHidingY, s->ActiveClockChangeLatencyHidingC);
6553 } else {
6554 s->ActiveClockChangeLatencyHiding = s->ActiveClockChangeLatencyHidingY;
6555 }
6556
6557 s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark;
6558 s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark;
6559 s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark;
6560 s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->g6_temp_read_watermark_us;
6561
6562 if (p->VActiveLatencyHidingMargin)
6563 p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k];
6564
6565 if (p->VActiveLatencyHidingUs)
6566 p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding;
6567
6568 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) {
6569 s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height * (double)h_total / pixel_clock_mhz) * 4.0);
6570 if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) {
6571 s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2;
6572 }
6573 s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark;
6574
6575 s->WritebackFCLKChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackFCLKChangeWatermark;
6576
6577 s->ActiveDRAMClockChangeLatencyMargin[k] = math_min2(s->ActiveDRAMClockChangeLatencyMargin[k], s->WritebackDRAMClockChangeLatencyMargin);
6578 s->ActiveFCLKChangeLatencyMargin[k] = math_min2(s->ActiveFCLKChangeLatencyMargin[k], s->WritebackFCLKChangeLatencyMargin);
6579 }
6580 p->MaxActiveDRAMClockChangeLatencySupported[k] = dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k]) ? 0 : (s->ActiveDRAMClockChangeLatencyMargin[k] + p->mmSOCParameters.DRAMClockChangeLatency);
6581
6582 uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy;
6583 reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000;
6584
6585 p->FCLKChangeSupport[k] = dml2_fclock_change_unsupported;
6586 if (s->ActiveFCLKChangeLatencyMargin[k] > 0)
6587 p->FCLKChangeSupport[k] = dml2_fclock_change_vactive;
6588 else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency)
6589 p->FCLKChangeSupport[k] = dml2_fclock_change_vblank;
6590
6591 if (p->FCLKChangeSupport[k] == dml2_fclock_change_unsupported)
6592 *p->global_fclk_change_supported = false;
6593
6594 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported;
6595 if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) {
6596 if (p->display_cfg->overrides.all_streams_blanked ||
6597 (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency))
6598 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive;
6599 else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6600 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive;
6601 else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6602 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank;
6603 } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0)
6604 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive;
6605 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)
6606 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank;
6607 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr)
6608 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_drr;
6609 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp)
6610 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_svp;
6611 else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
6612 p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_full_frame;
6613
6614 if (p->DRAMClockChangeSupport[k] == dml2_dram_clock_change_unsupported)
6615 *p->global_dram_clock_change_supported = false;
6616
6617 s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1));
6618 s->src_y_pstate_l = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio, p->SwathHeightY[k]));
6619 s->src_y_ahead_l = (unsigned int)(math_floor2(p->DETBufferSizeY[k] / p->BytePerPixelDETY[k] / p->SwathWidthY[k], p->SwathHeightY[k]) + s->LBLatencyHidingSourceLinesY[k]);
6620 s->sub_vp_lines_l = s->src_y_pstate_l + s->src_y_ahead_l + p->meta_row_height_l[k];
6621
6622 #ifdef __DML_VBA_DEBUG__
6623 dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u\n", __func__, k, p->DETBufferSizeY[k]);
6624 dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
6625 dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
6626 dml2_printf("DML::%s: k=%u, SwathHeightY = %u\n", __func__, k, p->SwathHeightY[k]);
6627 dml2_printf("DML::%s: k=%u, LBLatencyHidingSourceLinesY = %u\n", __func__, k, s->LBLatencyHidingSourceLinesY[k]);
6628 dml2_printf("DML::%s: k=%u, dst_y_pstate = %u\n", __func__, k, s->dst_y_pstate);
6629 dml2_printf("DML::%s: k=%u, src_y_pstate_l = %u\n", __func__, k, s->src_y_pstate_l);
6630 dml2_printf("DML::%s: k=%u, src_y_ahead_l = %u\n", __func__, k, s->src_y_ahead_l);
6631 dml2_printf("DML::%s: k=%u, meta_row_height_l = %u\n", __func__, k, p->meta_row_height_l[k]);
6632 dml2_printf("DML::%s: k=%u, sub_vp_lines_l = %u\n", __func__, k, s->sub_vp_lines_l);
6633 #endif
6634 p->SubViewportLinesNeededInMALL[k] = s->sub_vp_lines_l;
6635
6636 if (p->BytePerPixelDETC[k] > 0) {
6637 s->src_y_pstate_c = (unsigned int)(math_ceil2(s->dst_y_pstate * v_ratio_c, p->SwathHeightC[k]));
6638 s->src_y_ahead_c = (unsigned int)(math_floor2(p->DETBufferSizeC[k] / p->BytePerPixelDETC[k] / p->SwathWidthC[k], p->SwathHeightC[k]) + s->LBLatencyHidingSourceLinesC[k]);
6639 s->sub_vp_lines_c = s->src_y_pstate_c + s->src_y_ahead_c + p->meta_row_height_c[k];
6640
6641 if (dml_is_420(p->display_cfg->plane_descriptors[k].pixel_format))
6642 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, 2 * s->sub_vp_lines_c));
6643 else
6644 p->SubViewportLinesNeededInMALL[k] = (unsigned int)(math_max2(s->sub_vp_lines_l, s->sub_vp_lines_c));
6645
6646 #ifdef __DML_VBA_DEBUG__
6647 dml2_printf("DML::%s: k=%u, meta_row_height_c = %u\n", __func__, k, p->meta_row_height_c[k]);
6648 dml2_printf("DML::%s: k=%u, src_y_pstate_c = %u\n", __func__, k, s->src_y_pstate_c);
6649 dml2_printf("DML::%s: k=%u, src_y_ahead_c = %u\n", __func__, k, s->src_y_ahead_c);
6650 dml2_printf("DML::%s: k=%u, sub_vp_lines_c = %u\n", __func__, k, s->sub_vp_lines_c);
6651 #endif
6652 }
6653 }
6654
6655 *p->g6_temp_read_support = true;
6656 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6657 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) &&
6658 (s->g6_temp_read_latency_margin[k] < 0)) {
6659 *p->g6_temp_read_support = false;
6660 }
6661 }
6662
6663 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
6664 if ((!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) && ((!FoundCriticalSurface)
6665 || ((s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency) < *p->MaxActiveFCLKChangeLatencySupported))) {
6666 FoundCriticalSurface = true;
6667 *p->MaxActiveFCLKChangeLatencySupported = s->ActiveFCLKChangeLatencyMargin[k] + p->mmSOCParameters.FCLKChangeLatency;
6668 }
6669 }
6670
6671 #ifdef __DML_VBA_DEBUG__
6672 dml2_printf("DML::%s: DRAMClockChangeSupport = %u\n", __func__, *p->global_dram_clock_change_supported);
6673 dml2_printf("DML::%s: FCLKChangeSupport = %u\n", __func__, *p->global_fclk_change_supported);
6674 dml2_printf("DML::%s: MaxActiveFCLKChangeLatencySupported = %f\n", __func__, *p->MaxActiveFCLKChangeLatencySupported);
6675 dml2_printf("DML::%s: USRRetrainingSupport = %u\n", __func__, *p->USRRetrainingSupport);
6676 #endif
6677 }
6678
calculate_bytes_to_fetch_required_to_hide_latency(struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params * p)6679 static void calculate_bytes_to_fetch_required_to_hide_latency(
6680 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *p)
6681 {
6682 unsigned int dst_lines_to_hide;
6683 unsigned int src_lines_to_hide_l;
6684 unsigned int src_lines_to_hide_c;
6685 unsigned int plane_index;
6686 unsigned int stream_index;
6687
6688 for (plane_index = 0; plane_index < p->num_active_planes; plane_index++) {
6689 if (dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[plane_index]))
6690 continue;
6691
6692 stream_index = p->display_cfg->plane_descriptors[plane_index].stream_index;
6693
6694 dst_lines_to_hide = (unsigned int)math_ceil(p->latency_to_hide_us /
6695 ((double)p->display_cfg->stream_descriptors[stream_index].timing.h_total /
6696 (double)p->display_cfg->stream_descriptors[stream_index].timing.pixel_clock_khz * 1000.0));
6697
6698 src_lines_to_hide_l = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio * dst_lines_to_hide,
6699 p->swath_height_l[plane_index]);
6700 p->bytes_required_l[plane_index] = src_lines_to_hide_l * p->num_of_dpp[plane_index] * p->swath_width_l[plane_index] * p->byte_per_pix_l[plane_index];
6701
6702 src_lines_to_hide_c = (unsigned int)math_ceil2(p->display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane1.v_ratio * dst_lines_to_hide,
6703 p->swath_height_c[plane_index]);
6704 p->bytes_required_c[plane_index] = src_lines_to_hide_c * p->num_of_dpp[plane_index] * p->swath_width_c[plane_index] * p->byte_per_pix_c[plane_index];
6705
6706 if (p->display_cfg->plane_descriptors[plane_index].surface.dcc.enable && p->mrq_present) {
6707 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->meta_row_height_l[plane_index]) * p->meta_row_bytes_per_row_ub_l[plane_index];
6708 if (p->meta_row_height_c[plane_index]) {
6709 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->meta_row_height_c[plane_index]) * p->meta_row_bytes_per_row_ub_c[plane_index];
6710 }
6711 }
6712
6713 if (p->display_cfg->gpuvm_enable == true) {
6714 p->bytes_required_l[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_l / p->dpte_row_height_l[plane_index]) * p->dpte_bytes_per_row_l[plane_index];
6715 if (p->dpte_row_height_c[plane_index]) {
6716 p->bytes_required_c[plane_index] += (unsigned int)math_ceil((double)src_lines_to_hide_c / p->dpte_row_height_c[plane_index]) * p->dpte_bytes_per_row_c[plane_index];
6717 }
6718 }
6719 }
6720 }
6721
calculate_vactive_det_fill_latency(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double dcc_dram_bw_nom_overhead_factor_p0[],double dcc_dram_bw_nom_overhead_factor_p1[],double surface_read_bw_l[],double surface_read_bw_c[],double (* surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double (* surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],double vactive_det_fill_delay_us[])6722 static void calculate_vactive_det_fill_latency(
6723 const struct dml2_display_cfg *display_cfg,
6724 unsigned int num_active_planes,
6725 unsigned int bytes_required_l[],
6726 unsigned int bytes_required_c[],
6727 double dcc_dram_bw_nom_overhead_factor_p0[],
6728 double dcc_dram_bw_nom_overhead_factor_p1[],
6729 double surface_read_bw_l[],
6730 double surface_read_bw_c[],
6731 double (*surface_avg_vactive_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
6732 double (*surface_peak_required_bw)[dml2_core_internal_bw_max][DML2_MAX_PLANES],
6733 /* output */
6734 double vactive_det_fill_delay_us[])
6735 {
6736 double effective_excess_bandwidth;
6737 double effective_excess_bandwidth_l;
6738 double effective_excess_bandwidth_c;
6739 double adj_factor;
6740 unsigned int plane_index;
6741 unsigned int soc_state;
6742 unsigned int bw_type;
6743
6744 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
6745 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
6746 continue;
6747
6748 vactive_det_fill_delay_us[plane_index] = 0.0;
6749 for (soc_state = 0; soc_state < dml2_core_internal_soc_state_max; soc_state++) {
6750 for (bw_type = 0; bw_type < dml2_core_internal_bw_max; bw_type++) {
6751 effective_excess_bandwidth = (surface_peak_required_bw[soc_state][bw_type][plane_index] - surface_avg_vactive_required_bw[soc_state][bw_type][plane_index]);
6752
6753 /* luma */
6754 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p0[plane_index] : 1.0;
6755
6756 effective_excess_bandwidth_l = effective_excess_bandwidth * surface_read_bw_l[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
6757 if (effective_excess_bandwidth_l > 0.0) {
6758 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_l[plane_index] / effective_excess_bandwidth_l);
6759 }
6760
6761 /* chroma */
6762 adj_factor = bw_type == dml2_core_internal_bw_dram ? dcc_dram_bw_nom_overhead_factor_p1[plane_index] : 1.0;
6763
6764 effective_excess_bandwidth_c = effective_excess_bandwidth * surface_read_bw_c[plane_index] / (surface_read_bw_l[plane_index] + surface_read_bw_c[plane_index]) / adj_factor;
6765 if (effective_excess_bandwidth_c > 0.0) {
6766 vactive_det_fill_delay_us[plane_index] = math_max2(vactive_det_fill_delay_us[plane_index], bytes_required_c[plane_index] / effective_excess_bandwidth_c);
6767 }
6768 }
6769 }
6770 }
6771 }
6772
calculate_excess_vactive_bandwidth_required(const struct dml2_display_cfg * display_cfg,unsigned int num_active_planes,unsigned int bytes_required_l[],unsigned int bytes_required_c[],double excess_vactive_fill_bw_l[],double excess_vactive_fill_bw_c[])6773 static void calculate_excess_vactive_bandwidth_required(
6774 const struct dml2_display_cfg *display_cfg,
6775 unsigned int num_active_planes,
6776 unsigned int bytes_required_l[],
6777 unsigned int bytes_required_c[],
6778 /* outputs */
6779 double excess_vactive_fill_bw_l[],
6780 double excess_vactive_fill_bw_c[])
6781 {
6782 unsigned int plane_index;
6783
6784 for (plane_index = 0; plane_index < num_active_planes; plane_index++) {
6785 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[plane_index]))
6786 continue;
6787
6788 excess_vactive_fill_bw_l[plane_index] = 0.0;
6789 excess_vactive_fill_bw_c[plane_index] = 0.0;
6790
6791 if (display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us > 0) {
6792 excess_vactive_fill_bw_l[plane_index] = (double)bytes_required_l[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us;
6793 excess_vactive_fill_bw_c[plane_index] = (double)bytes_required_c[plane_index] / (double)display_cfg->plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us;
6794 }
6795 }
6796 }
6797
uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz,const struct dml2_dram_params * dram_config)6798 static double uclk_khz_to_dram_bw_mbps(unsigned long uclk_khz, const struct dml2_dram_params *dram_config)
6799 {
6800 double bw_mbps = 0;
6801 bw_mbps = ((double)uclk_khz * dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
6802
6803 return bw_mbps;
6804 }
6805
dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps,const struct dml2_dram_params * dram_config)6806 static double dram_bw_kbps_to_uclk_mhz(unsigned long long bw_kbps, const struct dml2_dram_params *dram_config)
6807 {
6808 double uclk_mhz = 0;
6809
6810 uclk_mhz = (double)bw_kbps / (dram_config->channel_count * dram_config->channel_width_bytes * dram_config->transactions_per_clock) / 1000.0;
6811
6812 return uclk_mhz;
6813 }
6814
get_qos_param_index(unsigned long uclk_freq_khz,const struct dml2_dcn4_uclk_dpm_dependent_qos_params * per_uclk_dpm_params)6815 static unsigned int get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params)
6816 {
6817 unsigned int i;
6818 unsigned int index = 0;
6819
6820 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
6821 dml2_printf("DML::%s: per_uclk_dpm_params[%d].minimum_uclk_khz = %d\n", __func__, i, per_uclk_dpm_params[i].minimum_uclk_khz);
6822
6823 if (i == 0)
6824 index = 0;
6825 else
6826 index = i - 1;
6827
6828 if (uclk_freq_khz < per_uclk_dpm_params[i].minimum_uclk_khz ||
6829 per_uclk_dpm_params[i].minimum_uclk_khz == 0) {
6830 break;
6831 }
6832 }
6833 #if defined(__DML_VBA_DEBUG__)
6834 dml2_printf("DML::%s: uclk_freq_khz = %d\n", __func__, uclk_freq_khz);
6835 dml2_printf("DML::%s: index = %d\n", __func__, index);
6836 #endif
6837 return index;
6838 }
6839
get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz,const struct dml2_soc_state_table * clk_table)6840 static unsigned int get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table)
6841 {
6842 unsigned int i;
6843 bool clk_entry_found = 0;
6844
6845 for (i = 0; i < clk_table->uclk.num_clk_values; i++) {
6846 dml2_printf("DML::%s: clk_table.uclk.clk_values_khz[%d] = %d\n", __func__, i, clk_table->uclk.clk_values_khz[i]);
6847
6848 if (uclk_freq_khz == clk_table->uclk.clk_values_khz[i]) {
6849 clk_entry_found = 1;
6850 break;
6851 }
6852 }
6853
6854 dml2_assert(clk_entry_found);
6855 #if defined(__DML_VBA_DEBUG__)
6856 dml2_printf("DML::%s: uclk_freq_khz = %ld\n", __func__, uclk_freq_khz);
6857 dml2_printf("DML::%s: index = %d\n", __func__, i);
6858 #endif
6859 return i;
6860 }
6861
get_pipe_flip_bytes(double hostvm_inefficiency_factor,unsigned int vm_bytes,unsigned int dpte_row_bytes,unsigned int meta_row_bytes)6862 static unsigned int get_pipe_flip_bytes(
6863 double hostvm_inefficiency_factor,
6864 unsigned int vm_bytes,
6865 unsigned int dpte_row_bytes,
6866 unsigned int meta_row_bytes)
6867 {
6868 unsigned int flip_bytes = 0;
6869
6870 flip_bytes += (unsigned int) ((vm_bytes * hostvm_inefficiency_factor) + 2*meta_row_bytes);
6871 flip_bytes += (unsigned int) (2*dpte_row_bytes * hostvm_inefficiency_factor);
6872
6873 return flip_bytes;
6874 }
6875
calculate_hostvm_inefficiency_factor(double * HostVMInefficiencyFactor,double * HostVMInefficiencyFactorPrefetch,bool gpuvm_enable,bool hostvm_enable,unsigned int remote_iommu_outstanding_translations,unsigned int max_outstanding_reqs,double urg_bandwidth_avail_active_pixel_and_vm,double urg_bandwidth_avail_active_vm_only)6876 static void calculate_hostvm_inefficiency_factor(
6877 double *HostVMInefficiencyFactor,
6878 double *HostVMInefficiencyFactorPrefetch,
6879
6880 bool gpuvm_enable,
6881 bool hostvm_enable,
6882 unsigned int remote_iommu_outstanding_translations,
6883 unsigned int max_outstanding_reqs,
6884 double urg_bandwidth_avail_active_pixel_and_vm,
6885 double urg_bandwidth_avail_active_vm_only)
6886 {
6887 *HostVMInefficiencyFactor = 1;
6888 *HostVMInefficiencyFactorPrefetch = 1;
6889
6890 if (gpuvm_enable && hostvm_enable) {
6891 *HostVMInefficiencyFactor = urg_bandwidth_avail_active_pixel_and_vm / urg_bandwidth_avail_active_vm_only;
6892 *HostVMInefficiencyFactorPrefetch = *HostVMInefficiencyFactor;
6893
6894 if ((*HostVMInefficiencyFactorPrefetch < 4) && (remote_iommu_outstanding_translations < max_outstanding_reqs))
6895 *HostVMInefficiencyFactorPrefetch = 4;
6896 #ifdef __DML_VBA_DEBUG__
6897 dml2_printf("DML::%s: urg_bandwidth_avail_active_pixel_and_vm = %f\n", __func__, urg_bandwidth_avail_active_pixel_and_vm);
6898 dml2_printf("DML::%s: urg_bandwidth_avail_active_vm_only = %f\n", __func__, urg_bandwidth_avail_active_vm_only);
6899 dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, *HostVMInefficiencyFactor);
6900 dml2_printf("DML::%s: HostVMInefficiencyFactorPrefetch = %f\n", __func__, *HostVMInefficiencyFactorPrefetch);
6901 #endif
6902 }
6903 }
6904
6905 struct dml2_core_internal_g6_temp_read_blackouts_table {
6906 struct {
6907 unsigned int uclk_khz;
6908 unsigned int blackout_us;
6909 } entries[DML_MAX_CLK_TABLE_SIZE];
6910 };
6911
6912 static const struct dml2_core_internal_g6_temp_read_blackouts_table
6913 core_dcn4_g6_temp_read_blackout_table = {
6914 .entries = {
6915 {
6916 .uclk_khz = 96000,
6917 .blackout_us = 23,
6918 },
6919 {
6920 .uclk_khz = 435000,
6921 .blackout_us = 10,
6922 },
6923 {
6924 .uclk_khz = 521000,
6925 .blackout_us = 10,
6926 },
6927 {
6928 .uclk_khz = 731000,
6929 .blackout_us = 8,
6930 },
6931 {
6932 .uclk_khz = 822000,
6933 .blackout_us = 8,
6934 },
6935 {
6936 .uclk_khz = 962000,
6937 .blackout_us = 5,
6938 },
6939 {
6940 .uclk_khz = 1069000,
6941 .blackout_us = 5,
6942 },
6943 {
6944 .uclk_khz = 1187000,
6945 .blackout_us = 5,
6946 },
6947 },
6948 };
6949
get_g6_temp_read_blackout_us(struct dml2_soc_bb * soc,unsigned int uclk_freq_khz,unsigned int min_clk_index)6950 static double get_g6_temp_read_blackout_us(
6951 struct dml2_soc_bb *soc,
6952 unsigned int uclk_freq_khz,
6953 unsigned int min_clk_index)
6954 {
6955 unsigned int i;
6956 unsigned int blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
6957
6958 if (soc->power_management_parameters.g6_temp_read_blackout_us[0] > 0.0) {
6959 /* overrides are present in the SoC BB */
6960 return soc->power_management_parameters.g6_temp_read_blackout_us[min_clk_index];
6961 }
6962
6963 /* use internal table */
6964 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[0].blackout_us;
6965
6966 for (i = 0; i < DML_MAX_CLK_TABLE_SIZE; i++) {
6967 if (uclk_freq_khz < core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz ||
6968 core_dcn4_g6_temp_read_blackout_table.entries[i].uclk_khz == 0) {
6969 break;
6970 }
6971
6972 blackout_us = core_dcn4_g6_temp_read_blackout_table.entries[i].blackout_us;
6973 }
6974
6975 return (double)blackout_us;
6976 }
6977
get_max_urgent_latency_us(struct dml2_dcn4x_soc_qos_params * dcn4x,double uclk_freq_mhz,double FabricClock,unsigned int min_clk_index)6978 static double get_max_urgent_latency_us(
6979 struct dml2_dcn4x_soc_qos_params *dcn4x,
6980 double uclk_freq_mhz,
6981 double FabricClock,
6982 unsigned int min_clk_index)
6983 {
6984 double latency;
6985 latency = dcn4x->per_uclk_dpm_params[min_clk_index].maximum_latency_when_urgent_uclk_cycles / uclk_freq_mhz
6986 * (1 + dcn4x->umc_max_latency_margin / 100.0)
6987 + dcn4x->mall_overhead_fclk_cycles / FabricClock
6988 + dcn4x->max_round_trip_to_furthest_cs_fclk_cycles / FabricClock
6989 * (1 + dcn4x->fabric_max_transport_latency_margin / 100.0);
6990 return latency;
6991 }
6992
calculate_pstate_keepout_dst_lines(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_watermarks * watermarks,unsigned int pstate_keepout_dst_lines[])6993 static void calculate_pstate_keepout_dst_lines(
6994 const struct dml2_display_cfg *display_cfg,
6995 const struct dml2_core_internal_watermarks *watermarks,
6996 unsigned int pstate_keepout_dst_lines[])
6997 {
6998 const struct dml2_stream_parameters *stream_descriptor;
6999 unsigned int i;
7000
7001 for (i = 0; i < display_cfg->num_planes; i++) {
7002 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[i])) {
7003 stream_descriptor = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[i].stream_index];
7004
7005 pstate_keepout_dst_lines[i] =
7006 (unsigned int)math_ceil(watermarks->DRAMClockChangeWatermark / ((double)stream_descriptor->timing.h_total * 1000.0 / (double)stream_descriptor->timing.pixel_clock_khz));
7007
7008 if (pstate_keepout_dst_lines[i] > stream_descriptor->timing.v_total - 1) {
7009 pstate_keepout_dst_lines[i] = stream_descriptor->timing.v_total - 1;
7010 }
7011 }
7012 }
7013 }
7014
dml_core_mode_support(struct dml2_core_calcs_mode_support_ex * in_out_params)7015 static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out_params)
7016 {
7017 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
7018 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
7019 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
7020
7021 #if defined(__DML_VBA_DEBUG__)
7022 double old_ReadBandwidthLuma;
7023 double old_ReadBandwidthChroma;
7024 #endif
7025 double outstanding_latency_us = 0;
7026 double min_return_bw_for_latency;
7027
7028 struct dml2_core_calcs_mode_support_locals *s = &mode_lib->scratch.dml_core_mode_support_locals;
7029 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
7030 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
7031 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
7032 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
7033 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
7034 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
7035 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
7036 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
7037 unsigned int k, m, n;
7038
7039 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
7040 memset(&mode_lib->ms, 0, sizeof(struct dml2_core_internal_mode_support));
7041
7042 mode_lib->ms.num_active_planes = display_cfg->num_planes;
7043 get_stream_output_bpp(s->OutputBpp, display_cfg);
7044
7045 mode_lib->ms.state_idx = in_out_params->min_clk_index;
7046 mode_lib->ms.SOCCLK = ((double)mode_lib->soc.clk_table.socclk.clk_values_khz[0] / 1000);
7047 mode_lib->ms.DCFCLK = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_dcfclk_khz / 1000);
7048 mode_lib->ms.FabricClock = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz / 1000);
7049 mode_lib->ms.MaxDCFCLK = (double)min_clk_table->max_clocks_khz.dcfclk / 1000;
7050 mode_lib->ms.MaxFabricClock = (double)min_clk_table->max_clocks_khz.fclk / 1000;
7051 mode_lib->ms.max_dispclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dispclk / 1000;
7052 mode_lib->ms.max_dscclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dscclk / 1000;
7053 mode_lib->ms.max_dppclk_freq_mhz = (double)min_clk_table->max_clocks_khz.dppclk / 1000;
7054 mode_lib->ms.uclk_freq_mhz = dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config);
7055 mode_lib->ms.dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps / 1000);
7056 mode_lib->ms.max_dram_bw_mbps = ((double)min_clk_table->dram_bw_table.entries[min_clk_table->dram_bw_table.num_entries - 1].pre_derate_dram_bw_kbps / 1000);
7057 mode_lib->ms.qos_param_index = get_qos_param_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
7058 mode_lib->ms.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index((unsigned int) (mode_lib->ms.uclk_freq_mhz * 1000.0), &mode_lib->soc.clk_table);
7059
7060 #if defined(__DML_VBA_DEBUG__)
7061 dml2_printf("DML::%s: --- START --- \n", __func__);
7062 dml2_printf("DML::%s: num_active_planes = %u\n", __func__, mode_lib->ms.num_active_planes);
7063 dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
7064 dml2_printf("DML::%s: qos_param_index = %0d\n", __func__, mode_lib->ms.qos_param_index);
7065 dml2_printf("DML::%s: SOCCLK = %f\n", __func__, mode_lib->ms.SOCCLK);
7066 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->ms.dram_bw_mbps);
7067 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
7068 dml2_printf("DML::%s: DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
7069 dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
7070 dml2_printf("DML::%s: MaxDCFCLK = %f\n", __func__, mode_lib->ms.MaxDCFCLK);
7071 dml2_printf("DML::%s: max_dispclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dispclk_freq_mhz);
7072 dml2_printf("DML::%s: max_dscclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dscclk_freq_mhz);
7073 dml2_printf("DML::%s: max_dppclk_freq_mhz = %f\n", __func__, mode_lib->ms.max_dppclk_freq_mhz);
7074 dml2_printf("DML::%s: MaxFabricClock = %f\n", __func__, mode_lib->ms.MaxFabricClock);
7075 dml2_printf("DML::%s: ip.compressed_buffer_segment_size_in_kbytes = %u\n", __func__, mode_lib->ip.compressed_buffer_segment_size_in_kbytes);
7076 dml2_printf("DML::%s: ip.dcn_mrq_present = %u\n", __func__, mode_lib->ip.dcn_mrq_present);
7077
7078 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
7079 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
7080
7081 // dml2_printf_dml_policy(&mode_lib->ms.policy);
7082 // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, mode_lib->ms.num_active_planes);
7083 // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, mode_lib->ms.num_active_planes);
7084 // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, mode_lib->ms.num_active_planes);
7085 // dml2_printf_dml_display_cfg_output(&display_cfg->output, mode_lib->ms.num_active_planes);
7086 #endif
7087
7088 CalculateMaxDETAndMinCompressedBufferSize(
7089 mode_lib->ip.config_return_buffer_size_in_kbytes,
7090 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
7091 mode_lib->ip.rob_buffer_size_kbytes,
7092 mode_lib->ip.max_num_dpp,
7093 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
7094 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
7095 mode_lib->ip.dcn_mrq_present,
7096
7097 /* Output */
7098 &mode_lib->ms.MaxTotalDETInKByte,
7099 &mode_lib->ms.NomDETInKByte,
7100 &mode_lib->ms.MinCompressedBufferSizeInKByte);
7101
7102 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
7103
7104 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/
7105
7106 /*Scale Ratio, taps Support Check*/
7107 mode_lib->ms.support.ScaleRatioAndTapsSupport = true;
7108 // Many core tests are still setting scaling parameters "incorrectly"
7109 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7110 if (display_cfg->plane_descriptors[k].composition.scaler_info.enabled == false
7111 && (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
7112 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio != 1.0
7113 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps != 1.0
7114 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio != 1.0
7115 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps != 1.0)) {
7116 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
7117 } else if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps > 8.0
7118 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps < 1.0 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 8.0
7119 || (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps > 1.0 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps % 2) == 1)
7120 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > mode_lib->ip.max_hscl_ratio
7121 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > mode_lib->ip.max_vscl_ratio
7122 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps
7123 || display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps
7124 || (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)
7125 && (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps > 8 ||
7126 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps < 1 || display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 8 ||
7127 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps > 1 && display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps % 2 == 1) ||
7128 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > mode_lib->ip.max_hscl_ratio ||
7129 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > mode_lib->ip.max_vscl_ratio ||
7130 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps ||
7131 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio > display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps))) {
7132 mode_lib->ms.support.ScaleRatioAndTapsSupport = false;
7133 }
7134 }
7135
7136 /*Source Format, Pixel Format and Scan Support Check*/
7137 mode_lib->ms.support.SourceFormatPixelAndScanSupport = true;
7138 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7139 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear && dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
7140 mode_lib->ms.support.SourceFormatPixelAndScanSupport = false;
7141 }
7142 }
7143
7144 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7145 CalculateBytePerPixelAndBlockSizes(
7146 display_cfg->plane_descriptors[k].pixel_format,
7147 display_cfg->plane_descriptors[k].surface.tiling,
7148 display_cfg->plane_descriptors[k].surface.plane0.pitch,
7149 display_cfg->plane_descriptors[k].surface.plane1.pitch,
7150
7151 /* Output */
7152 &mode_lib->ms.BytePerPixelY[k],
7153 &mode_lib->ms.BytePerPixelC[k],
7154 &mode_lib->ms.BytePerPixelInDETY[k],
7155 &mode_lib->ms.BytePerPixelInDETC[k],
7156 &mode_lib->ms.Read256BlockHeightY[k],
7157 &mode_lib->ms.Read256BlockHeightC[k],
7158 &mode_lib->ms.Read256BlockWidthY[k],
7159 &mode_lib->ms.Read256BlockWidthC[k],
7160 &mode_lib->ms.MacroTileHeightY[k],
7161 &mode_lib->ms.MacroTileHeightC[k],
7162 &mode_lib->ms.MacroTileWidthY[k],
7163 &mode_lib->ms.MacroTileWidthC[k],
7164 &mode_lib->ms.surf_linear128_l[k],
7165 &mode_lib->ms.surf_linear128_c[k]);
7166 }
7167
7168 /*Bandwidth Support Check*/
7169 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7170 if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle)) {
7171 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
7172 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
7173 } else {
7174 mode_lib->ms.SwathWidthYSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
7175 mode_lib->ms.SwathWidthCSingleDPP[k] = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
7176 }
7177 }
7178
7179 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7180 mode_lib->ms.SurfaceReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
7181 mode_lib->ms.SurfaceReadBandwidthChroma[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
7182
7183 mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width *
7184 display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
7185
7186 #ifdef __DML_VBA_DEBUG__
7187 old_ReadBandwidthLuma = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelInDETY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
7188 old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0;
7189 dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma);
7190 dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma);
7191 dml2_printf("DML::%s: k=%u, ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthLuma[k]);
7192 dml2_printf("DML::%s: k=%u, ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthChroma[k]);
7193 #endif
7194 }
7195
7196 // Writeback bandwidth
7197 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7198 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) {
7199 mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height
7200 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width
7201 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height
7202 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
7203 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0;
7204 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
7205 mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height
7206 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width
7207 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height
7208 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total
7209 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0;
7210 } else {
7211 mode_lib->ms.WriteBandwidth[k] = 0.0;
7212 }
7213 }
7214
7215 /*Writeback Latency support check*/
7216 mode_lib->ms.support.WritebackLatencySupport = true;
7217 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7218 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true &&
7219 (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) {
7220 mode_lib->ms.support.WritebackLatencySupport = false;
7221 }
7222 }
7223
7224
7225 /* Writeback Scale Ratio and Taps Support Check */
7226 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true;
7227 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7228 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
7229 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > mode_lib->ip.writeback_max_hscl_ratio
7230 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > mode_lib->ip.writeback_max_vscl_ratio
7231 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio < mode_lib->ip.writeback_min_hscl_ratio
7232 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio < mode_lib->ip.writeback_min_vscl_ratio
7233 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps
7234 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps
7235 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps
7236 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps
7237 || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps % 2) == 1))) {
7238 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
7239 }
7240 if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) {
7241 mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false;
7242 }
7243 }
7244 }
7245
7246 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7247 CalculateSinglePipeDPPCLKAndSCLThroughput(
7248 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
7249 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
7250 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
7251 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
7252 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
7253 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
7254 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7255 display_cfg->plane_descriptors[k].pixel_format,
7256 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
7257 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
7258 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
7259 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
7260 /* Output */
7261 &mode_lib->ms.PSCL_FACTOR[k],
7262 &mode_lib->ms.PSCL_FACTOR_CHROMA[k],
7263 &mode_lib->ms.MinDPPCLKUsingSingleDPP[k]);
7264 }
7265
7266 // Max Viewport Size support
7267 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7268 if (display_cfg->plane_descriptors[k].surface.tiling == dml2_sw_linear) {
7269 s->MaximumSwathWidthSupportLuma = 15360;
7270 } else if (!dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // horz video
7271 s->MaximumSwathWidthSupportLuma = 7680 + 16;
7272 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelC[k] > 0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) { // vert video
7273 s->MaximumSwathWidthSupportLuma = 4320 + 16;
7274 } else if (display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) { // rgbe + alpha
7275 s->MaximumSwathWidthSupportLuma = 5120 + 16;
7276 } else if (dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle) && mode_lib->ms.BytePerPixelY[k] == 8 && display_cfg->plane_descriptors[k].surface.dcc.enable == true) { // vert 64bpp
7277 s->MaximumSwathWidthSupportLuma = 3072 + 16;
7278 } else {
7279 s->MaximumSwathWidthSupportLuma = 6144 + 16;
7280 }
7281
7282 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format)) {
7283 s->MaximumSwathWidthSupportChroma = (unsigned int)(s->MaximumSwathWidthSupportLuma / 2.0);
7284 } else {
7285 s->MaximumSwathWidthSupportChroma = s->MaximumSwathWidthSupportLuma;
7286 }
7287
7288 unsigned lb_buffer_size_bits_luma = mode_lib->ip.line_buffer_size_bits;
7289 unsigned lb_buffer_size_bits_chroma = mode_lib->ip.line_buffer_size_bits;
7290
7291 /*
7292 #if defined(DV_BUILD)
7293 // Assume a memory config setting of 3 in 420 mode or get a new ip parameter that reflects the programming.
7294 if (mode_lib->ms.BytePerPixelC[k] != 0.0 && display_cfg->plane_descriptors[k].pixel_format != dml2_rgbe_alpha) {
7295 lb_buffer_size_bits_luma = 34620 * 57;
7296 lb_buffer_size_bits_chroma = 13560 * 57;
7297 }
7298 #endif
7299 */
7300 mode_lib->ms.MaximumSwathWidthInLineBufferLuma = lb_buffer_size_bits_luma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, 1.0) / 57 /
7301 (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, 1.0) - 2, 0.0));
7302 if (mode_lib->ms.BytePerPixelC[k] == 0.0) {
7303 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = 0;
7304 } else {
7305 mode_lib->ms.MaximumSwathWidthInLineBufferChroma = lb_buffer_size_bits_chroma * math_max2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, 1.0) / 57 /
7306 (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps + math_max2(math_ceil2(display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, 1.0) - 2, 0.0));
7307 }
7308
7309 mode_lib->ms.MaximumSwathWidthLuma[k] = math_min2(s->MaximumSwathWidthSupportLuma, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
7310 mode_lib->ms.MaximumSwathWidthChroma[k] = math_min2(s->MaximumSwathWidthSupportChroma, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
7311
7312 dml2_printf("DML::%s: k=%u MaximumSwathWidthLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthLuma[k]);
7313 dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportLuma=%u\n", __func__, k, s->MaximumSwathWidthSupportLuma);
7314 dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferLuma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferLuma);
7315
7316 dml2_printf("DML::%s: k=%u MaximumSwathWidthChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthChroma[k]);
7317 dml2_printf("DML::%s: k=%u MaximumSwathWidthSupportChroma=%u\n", __func__, k, s->MaximumSwathWidthSupportChroma);
7318 dml2_printf("DML::%s: k=%u MaximumSwathWidthInLineBufferChroma=%f\n", __func__, k, mode_lib->ms.MaximumSwathWidthInLineBufferChroma);
7319 }
7320
7321 /* Cursor Support Check */
7322 mode_lib->ms.support.CursorSupport = true;
7323 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7324 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
7325 if (display_cfg->plane_descriptors[k].cursor.cursor_bpp == 64 && mode_lib->ip.cursor_64bpp_support == false)
7326 mode_lib->ms.support.CursorSupport = false;
7327 }
7328 }
7329
7330 /* Valid Pitch Check */
7331 mode_lib->ms.support.PitchSupport = true;
7332 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7333
7334 // data pitch
7335 unsigned int alignment_l = mode_lib->ms.MacroTileWidthY[k];
7336
7337 if (mode_lib->ms.surf_linear128_l[k])
7338 alignment_l = alignment_l / 2;
7339
7340 mode_lib->ms.support.AlignedYPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane0.pitch, display_cfg->plane_descriptors[k].surface.plane0.width), alignment_l);
7341 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
7342 unsigned int alignment_c = mode_lib->ms.MacroTileWidthC[k];
7343
7344 if (mode_lib->ms.surf_linear128_c[k])
7345 alignment_c = alignment_c / 2;
7346 mode_lib->ms.support.AlignedCPitch[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.plane1.pitch, display_cfg->plane_descriptors[k].surface.plane1.width), alignment_c);
7347 } else {
7348 mode_lib->ms.support.AlignedCPitch[k] = display_cfg->plane_descriptors[k].surface.plane1.pitch;
7349 }
7350
7351 if (mode_lib->ms.support.AlignedYPitch[k] > display_cfg->plane_descriptors[k].surface.plane0.pitch ||
7352 mode_lib->ms.support.AlignedCPitch[k] > display_cfg->plane_descriptors[k].surface.plane1.pitch) {
7353 mode_lib->ms.support.PitchSupport = false;
7354 #if defined(__DML_VBA_DEBUG__)
7355 dml2_printf("DML::%s: k=%u AlignedYPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedYPitch[k]);
7356 dml2_printf("DML::%s: k=%u PitchY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.pitch);
7357 dml2_printf("DML::%s: k=%u AlignedCPitch = %d\n", __func__, k, mode_lib->ms.support.AlignedCPitch[k]);
7358 dml2_printf("DML::%s: k=%u PitchC = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane1.pitch);
7359 dml2_printf("DML::%s: k=%u PitchSupport = %d\n", __func__, k, mode_lib->ms.support.PitchSupport);
7360 #endif
7361 }
7362
7363 // meta pitch
7364 if (mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable) {
7365 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch,
7366 display_cfg->plane_descriptors[k].surface.plane0.width), 64.0 * mode_lib->ms.Read256BlockWidthY[k]);
7367
7368 if (mode_lib->ms.support.AlignedDCCMetaPitchY[k] > display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch)
7369 mode_lib->ms.support.PitchSupport = false;
7370
7371 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
7372 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = (unsigned int)math_ceil2(math_max2(display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch,
7373 display_cfg->plane_descriptors[k].surface.plane1.width), 64.0 * mode_lib->ms.Read256BlockWidthC[k]);
7374
7375 if (mode_lib->ms.support.AlignedDCCMetaPitchC[k] > display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch)
7376 mode_lib->ms.support.PitchSupport = false;
7377 }
7378 } else {
7379 mode_lib->ms.support.AlignedDCCMetaPitchY[k] = 0;
7380 mode_lib->ms.support.AlignedDCCMetaPitchC[k] = 0;
7381 }
7382 }
7383
7384 mode_lib->ms.support.ViewportExceedsSurface = false;
7385 if (!display_cfg->overrides.hw.surface_viewport_size_check_disable) {
7386 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
7387 if (display_cfg->plane_descriptors[k].composition.viewport.plane0.width > display_cfg->plane_descriptors[k].surface.plane0.width ||
7388 display_cfg->plane_descriptors[k].composition.viewport.plane0.height > display_cfg->plane_descriptors[k].surface.plane0.height) {
7389 mode_lib->ms.support.ViewportExceedsSurface = true;
7390 #if defined(__DML_VBA_DEBUG__)
7391 dml2_printf("DML::%s: k=%u ViewportWidth = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.width);
7392 dml2_printf("DML::%s: k=%u SurfaceWidthY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.width);
7393 dml2_printf("DML::%s: k=%u ViewportHeight = %d\n", __func__, k, display_cfg->plane_descriptors[k].composition.viewport.plane0.height);
7394 dml2_printf("DML::%s: k=%u SurfaceHeightY = %d\n", __func__, k, display_cfg->plane_descriptors[k].surface.plane0.height);
7395 dml2_printf("DML::%s: k=%u ViewportExceedsSurface = %d\n", __func__, k, mode_lib->ms.support.ViewportExceedsSurface);
7396 #endif
7397 }
7398 if (dml_is_420(display_cfg->plane_descriptors[k].pixel_format) || display_cfg->plane_descriptors[k].pixel_format == dml2_rgbe_alpha) {
7399 if (display_cfg->plane_descriptors[k].composition.viewport.plane1.width > display_cfg->plane_descriptors[k].surface.plane1.width ||
7400 display_cfg->plane_descriptors[k].composition.viewport.plane1.height > display_cfg->plane_descriptors[k].surface.plane1.height) {
7401 mode_lib->ms.support.ViewportExceedsSurface = true;
7402 }
7403 }
7404 }
7405 }
7406
7407 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
7408 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
7409 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = mode_lib->ms.MaxTotalDETInKByte;
7410 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = mode_lib->ms.MinCompressedBufferSizeInKByte;
7411 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
7412 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
7413 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
7414 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
7415 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = 1;
7416 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
7417 CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte;
7418 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
7419 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
7420 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.SurfaceReadBandwidthLuma;
7421 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.SurfaceReadBandwidthChroma;
7422 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma;
7423 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma;
7424 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY;
7425 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->ms.Read256BlockHeightC;
7426 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->ms.Read256BlockWidthY;
7427 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->ms.Read256BlockWidthC;
7428 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->ms.surf_linear128_l;
7429 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->ms.surf_linear128_c;
7430 CalculateSwathAndDETConfiguration_params->ODMMode = s->dummy_odm_mode;
7431 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->ms.BytePerPixelY;
7432 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->ms.BytePerPixelC;
7433 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->ms.BytePerPixelInDETY;
7434 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->ms.BytePerPixelInDETC;
7435 CalculateSwathAndDETConfiguration_params->DPPPerSurface = s->dummy_integer_array[2];
7436 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
7437
7438 // output
7439 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
7440 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
7441 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_integer_array[3];
7442 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_integer_array[4];
7443 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_integer_array[5];
7444 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_integer_array[6];
7445 CalculateSwathAndDETConfiguration_params->SwathHeightY = s->dummy_integer_array[7];
7446 CalculateSwathAndDETConfiguration_params->SwathHeightC = s->dummy_integer_array[8];
7447 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = s->dummy_integer_array[26];
7448 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = s->dummy_integer_array[27];
7449 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = s->dummy_integer_array[9];
7450 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = s->dummy_integer_array[10];
7451 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = s->dummy_integer_array[11];
7452 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
7453 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
7454 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &s->dummy_boolean[0];
7455 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &s->dummy_integer[1];
7456 CalculateSwathAndDETConfiguration_params->hw_debug5 = &s->dummy_boolean[2];
7457 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &s->dummy_integer[0];
7458 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = mode_lib->ms.SingleDPPViewportSizeSupportPerSurface;
7459 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[1];
7460
7461 // This calls is just to find out if there is enough DET space to support full vp in 1 pipe.
7462 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
7463
7464 mode_lib->ms.TotalNumberOfActiveDPP = 0;
7465 mode_lib->ms.support.TotalAvailablePipesSupport = true;
7466
7467 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7468 /*Number Of DSC Slices*/
7469 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable ||
7470 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) {
7471
7472 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0)
7473 mode_lib->ms.support.NumberOfDSCSlices[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices;
7474 else {
7475 if (s->PixelClockBackEnd[k] > 4800) {
7476 mode_lib->ms.support.NumberOfDSCSlices[k] = (unsigned int)(math_ceil2(s->PixelClockBackEnd[k] / 600, 4));
7477 } else if (s->PixelClockBackEnd[k] > 2400) {
7478 mode_lib->ms.support.NumberOfDSCSlices[k] = 8;
7479 } else if (s->PixelClockBackEnd[k] > 1200) {
7480 mode_lib->ms.support.NumberOfDSCSlices[k] = 4;
7481 } else if (s->PixelClockBackEnd[k] > 340) {
7482 mode_lib->ms.support.NumberOfDSCSlices[k] = 2;
7483 } else {
7484 mode_lib->ms.support.NumberOfDSCSlices[k] = 1;
7485 }
7486 }
7487 } else {
7488 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
7489 }
7490
7491 CalculateODMMode(
7492 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
7493 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7494 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7495 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7496 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
7497 mode_lib->ms.max_dispclk_freq_mhz,
7498 false, // DSCEnable
7499 mode_lib->ms.TotalNumberOfActiveDPP,
7500 mode_lib->ip.max_num_dpp,
7501 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7502 mode_lib->ms.support.NumberOfDSCSlices[k],
7503
7504 /* Output */
7505 &s->TotalAvailablePipesSupportNoDSC,
7506 &s->NumberOfDPPNoDSC,
7507 &s->ODMModeNoDSC,
7508 &s->RequiredDISPCLKPerSurfaceNoDSC);
7509
7510 CalculateODMMode(
7511 mode_lib->ip.maximum_pixels_per_line_per_dsc_unit,
7512 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7513 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7514 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7515 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode,
7516 mode_lib->ms.max_dispclk_freq_mhz,
7517 true, // DSCEnable
7518 mode_lib->ms.TotalNumberOfActiveDPP,
7519 mode_lib->ip.max_num_dpp,
7520 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7521 mode_lib->ms.support.NumberOfDSCSlices[k],
7522
7523 /* Output */
7524 &s->TotalAvailablePipesSupportDSC,
7525 &s->NumberOfDPPDSC,
7526 &s->ODMModeDSC,
7527 &s->RequiredDISPCLKPerSurfaceDSC);
7528
7529 CalculateOutputLink(
7530 &mode_lib->scratch,
7531 ((double)mode_lib->soc.clk_table.phyclk.clk_values_khz[0] / 1000),
7532 ((double)mode_lib->soc.clk_table.phyclk_d18.clk_values_khz[0] / 1000),
7533 ((double)mode_lib->soc.clk_table.phyclk_d32.clk_values_khz[0] / 1000),
7534 mode_lib->soc.phy_downspread_percent,
7535 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7536 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7537 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
7538 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7539 s->PixelClockBackEnd[k],
7540 s->OutputBpp[k],
7541 mode_lib->ip.maximum_dsc_bits_per_component,
7542 mode_lib->ms.support.NumberOfDSCSlices[k],
7543 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
7544 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout,
7545 s->ODMModeNoDSC,
7546 s->ODMModeDSC,
7547 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable,
7548 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count,
7549 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate,
7550
7551 /* Output */
7552 &mode_lib->ms.RequiresDSC[k],
7553 &mode_lib->ms.RequiresFEC[k],
7554 &mode_lib->ms.OutputBpp[k],
7555 &mode_lib->ms.OutputType[k],
7556 &mode_lib->ms.OutputRate[k],
7557 &mode_lib->ms.RequiredSlots[k]);
7558
7559 if (s->OutputBpp[k] == 0.0) {
7560 s->OutputBpp[k] = mode_lib->ms.OutputBpp[k];
7561 }
7562
7563 if (mode_lib->ms.RequiresDSC[k] == false) {
7564 mode_lib->ms.ODMMode[k] = s->ODMModeNoDSC;
7565 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceNoDSC;
7566 if (!s->TotalAvailablePipesSupportNoDSC)
7567 mode_lib->ms.support.TotalAvailablePipesSupport = false;
7568 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPNoDSC;
7569 } else {
7570 mode_lib->ms.ODMMode[k] = s->ODMModeDSC;
7571 mode_lib->ms.RequiredDISPCLKPerSurface[k] = s->RequiredDISPCLKPerSurfaceDSC;
7572 if (!s->TotalAvailablePipesSupportDSC)
7573 mode_lib->ms.support.TotalAvailablePipesSupport = false;
7574 mode_lib->ms.TotalNumberOfActiveDPP = mode_lib->ms.TotalNumberOfActiveDPP + s->NumberOfDPPDSC;
7575 }
7576 #if defined(__DML_VBA_DEBUG__)
7577 dml2_printf("DML::%s: k=%d RequiresDSC = %d\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
7578 dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
7579 #endif
7580
7581 // ensure the number dsc slices is integer multiple based on ODM mode
7582 mode_lib->ms.support.DSCSlicesODMModeSupported = true;
7583 if (mode_lib->ms.RequiresDSC[k]) {
7584 // fail a ms check if the override num_slices doesn't align with odm mode setting
7585 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices != 0) {
7586 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
7587 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 2) == 0);
7588 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
7589 mode_lib->ms.support.DSCSlicesODMModeSupported = (mode_lib->ms.support.NumberOfDSCSlices[k] == 12);
7590 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
7591 mode_lib->ms.support.DSCSlicesODMModeSupported = ((mode_lib->ms.support.NumberOfDSCSlices[k] % 4) == 0);
7592 #if defined(__DML_VBA_DEBUG__)
7593 if (!mode_lib->ms.support.DSCSlicesODMModeSupported) {
7594 dml2_printf("DML::%s: k=%d Invalid dsc num_slices and ODM mode setting\n", __func__, k);
7595 dml2_printf("DML::%s: k=%d num_slices = %d\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.overrides.num_slices);
7596 dml2_printf("DML::%s: k=%d ODMMode = %d\n", __func__, k, mode_lib->ms.ODMMode[k]);
7597 }
7598 #endif
7599 } else {
7600 // safe guard to ensure the dml derived dsc slices and odm setting are compatible
7601 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
7602 mode_lib->ms.support.NumberOfDSCSlices[k] = 2 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 2.0, 1.0);
7603 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
7604 mode_lib->ms.support.NumberOfDSCSlices[k] = 12;
7605 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
7606 mode_lib->ms.support.NumberOfDSCSlices[k] = 4 * (unsigned int)math_ceil2(mode_lib->ms.support.NumberOfDSCSlices[k] / 4.0, 1.0);
7607 }
7608
7609 } else {
7610 mode_lib->ms.support.NumberOfDSCSlices[k] = 0;
7611 }
7612 }
7613
7614 mode_lib->ms.support.incorrect_imall_usage = 0;
7615 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7616 if (mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall)
7617 mode_lib->ms.support.incorrect_imall_usage = 1;
7618 }
7619
7620 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7621 mode_lib->ms.MPCCombine[k] = false;
7622 mode_lib->ms.NoOfDPP[k] = 1;
7623
7624 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1) {
7625 mode_lib->ms.MPCCombine[k] = false;
7626 mode_lib->ms.NoOfDPP[k] = 4;
7627 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1) {
7628 mode_lib->ms.MPCCombine[k] = false;
7629 mode_lib->ms.NoOfDPP[k] = 3;
7630 } else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1) {
7631 mode_lib->ms.MPCCombine[k] = false;
7632 mode_lib->ms.NoOfDPP[k] = 2;
7633 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 2) {
7634 mode_lib->ms.MPCCombine[k] = true;
7635 mode_lib->ms.NoOfDPP[k] = 2;
7636 mode_lib->ms.TotalNumberOfActiveDPP++;
7637 } else if (display_cfg->plane_descriptors[k].overrides.mpcc_combine_factor == 1) {
7638 mode_lib->ms.MPCCombine[k] = false;
7639 mode_lib->ms.NoOfDPP[k] = 1;
7640 if (!mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
7641 dml2_printf("WARNING: DML::%s: MPCC is override to disable but viewport is too large to be supported with single pipe!\n", __func__);
7642 }
7643 } else {
7644 if ((mode_lib->ms.MinDPPCLKUsingSingleDPP[k] > mode_lib->ms.max_dppclk_freq_mhz) || !mode_lib->ms.SingleDPPViewportSizeSupportPerSurface[k]) {
7645 mode_lib->ms.MPCCombine[k] = true;
7646 mode_lib->ms.NoOfDPP[k] = 2;
7647 mode_lib->ms.TotalNumberOfActiveDPP++;
7648 }
7649 }
7650 #if defined(__DML_VBA_DEBUG__)
7651 dml2_printf("DML::%s: k=%d, NoOfDPP = %d\n", __func__, k, mode_lib->ms.NoOfDPP[k]);
7652 #endif
7653 }
7654
7655 if (mode_lib->ms.TotalNumberOfActiveDPP > (unsigned int)mode_lib->ip.max_num_dpp)
7656 mode_lib->ms.support.TotalAvailablePipesSupport = false;
7657
7658
7659 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = 0;
7660 for (k = 0; k < (unsigned int)mode_lib->ms.num_active_planes; ++k) {
7661 if (mode_lib->ms.NoOfDPP[k] == 1)
7662 mode_lib->ms.TotalNumberOfSingleDPPSurfaces = mode_lib->ms.TotalNumberOfSingleDPPSurfaces + 1;
7663 }
7664
7665 //DISPCLK/DPPCLK
7666 mode_lib->ms.WritebackRequiredDISPCLK = 0;
7667 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7668 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable) {
7669 mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK,
7670 CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format,
7671 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7672 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio,
7673 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio,
7674 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps,
7675 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps,
7676 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width,
7677 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width,
7678 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
7679 mode_lib->ip.writeback_line_buffer_buffer_size));
7680 }
7681 }
7682
7683 mode_lib->ms.RequiredDISPCLK = mode_lib->ms.WritebackRequiredDISPCLK;
7684 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7685 mode_lib->ms.RequiredDISPCLK = math_max2(mode_lib->ms.RequiredDISPCLK, mode_lib->ms.RequiredDISPCLKPerSurface[k]);
7686 }
7687
7688 mode_lib->ms.GlobalDPPCLK = 0;
7689 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7690 mode_lib->ms.RequiredDPPCLK[k] = mode_lib->ms.MinDPPCLKUsingSingleDPP[k] / mode_lib->ms.NoOfDPP[k];
7691 mode_lib->ms.GlobalDPPCLK = math_max2(mode_lib->ms.GlobalDPPCLK, mode_lib->ms.RequiredDPPCLK[k]);
7692 }
7693
7694 mode_lib->ms.support.DISPCLK_DPPCLK_Support = !((mode_lib->ms.RequiredDISPCLK > mode_lib->ms.max_dispclk_freq_mhz) || (mode_lib->ms.GlobalDPPCLK > mode_lib->ms.max_dppclk_freq_mhz));
7695
7696 /* Total Available OTG, Writeback, HDMIFRL, DP Support Check */
7697 s->TotalNumberOfActiveOTG = 0;
7698 s->TotalNumberOfActiveHDMIFRL = 0;
7699 s->TotalNumberOfActiveDP2p0 = 0;
7700 s->TotalNumberOfActiveDP2p0Outputs = 0;
7701 s->TotalNumberOfActiveWriteback = 0;
7702 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
7703
7704 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7705 if (!dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
7706 if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
7707 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
7708
7709 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true)
7710 s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1;
7711
7712 s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1;
7713 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)
7714 s->TotalNumberOfActiveHDMIFRL = s->TotalNumberOfActiveHDMIFRL + 1;
7715 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0) {
7716 s->TotalNumberOfActiveDP2p0 = s->TotalNumberOfActiveDP2p0 + 1;
7717 // FIXME_STAGE2: SW not using backend related stuff, need mapping for mst setup
7718 //if (display_cfg->output.OutputMultistreamId[k] == k || display_cfg->output.OutputMultistreamEn[k] == false) {
7719 s->TotalNumberOfActiveDP2p0Outputs = s->TotalNumberOfActiveDP2p0Outputs + 1;
7720 //}
7721 }
7722 }
7723 }
7724 }
7725
7726 /* Writeback Mode Support Check */
7727 mode_lib->ms.support.EnoughWritebackUnits = 1;
7728 if (s->TotalNumberOfActiveWriteback > (unsigned int)mode_lib->ip.max_num_wb) {
7729 mode_lib->ms.support.EnoughWritebackUnits = false;
7730 }
7731 mode_lib->ms.support.NumberOfOTGSupport = (s->TotalNumberOfActiveOTG <= (unsigned int)mode_lib->ip.max_num_otg);
7732 mode_lib->ms.support.NumberOfHDMIFRLSupport = (s->TotalNumberOfActiveHDMIFRL <= (unsigned int)mode_lib->ip.max_num_hdmi_frl_outputs);
7733 mode_lib->ms.support.NumberOfDP2p0Support = (s->TotalNumberOfActiveDP2p0 <= (unsigned int)mode_lib->ip.max_num_dp2p0_streams && s->TotalNumberOfActiveDP2p0Outputs <= (unsigned int)mode_lib->ip.max_num_dp2p0_outputs);
7734
7735
7736 mode_lib->ms.support.ExceededMultistreamSlots = false;
7737 mode_lib->ms.support.LinkCapacitySupport = true;
7738 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7739 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_disabled == false &&
7740 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
7741 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) && mode_lib->ms.OutputBpp[k] == 0) {
7742 mode_lib->ms.support.LinkCapacitySupport = false;
7743 }
7744 }
7745
7746 mode_lib->ms.support.P2IWith420 = false;
7747 mode_lib->ms.support.DSCOnlyIfNecessaryWithBPP = false;
7748 mode_lib->ms.support.DSC422NativeNotSupported = false;
7749 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = false;
7750 mode_lib->ms.support.LinkRateForMultistreamNotIndicated = false;
7751 mode_lib->ms.support.BPPForMultistreamNotIndicated = false;
7752 mode_lib->ms.support.MultistreamWithHDMIOreDP = false;
7753 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = false;
7754 mode_lib->ms.support.NotEnoughLanesForMSO = false;
7755
7756 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7757 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
7758 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
7759 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced == 1 && mode_lib->ip.ptoi_supported == true)
7760 mode_lib->ms.support.P2IWith420 = true;
7761
7762 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.dsc.enable == dml2_dsc_enable_if_necessary) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 && !mode_lib->ip.dsc422_native_support)
7763 mode_lib->ms.support.DSC422NativeNotSupported = true;
7764
7765 if (((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr2 ||
7766 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_hbr3) &&
7767 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_edp) ||
7768 ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr10 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr13p5 ||
7769 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_uhbr20) &&
7770 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp2p0))
7771 mode_lib->ms.support.LinkRateDoesNotMatchDPVersion = true;
7772
7773 // FIXME_STAGE2
7774 //if (display_cfg->output.OutputMultistreamEn[k] == 1) {
7775 // if (display_cfg->output.OutputMultistreamId[k] == k && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_link_rate == dml2_dp_rate_na)
7776 // mode_lib->ms.support.LinkRateForMultistreamNotIndicated = true;
7777 // if (display_cfg->output.OutputMultistreamId[k] == k && s->OutputBpp[k] == 0)
7778 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
7779 // for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
7780 // if (display_cfg->output.OutputMultistreamId[k] == n && s->OutputBpp[k] == 0)
7781 // mode_lib->ms.support.BPPForMultistreamNotIndicated = true;
7782 // }
7783 //}
7784
7785 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
7786 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmi ||
7787 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl)) {
7788 // FIXME_STAGE2
7789 //if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == k)
7790 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
7791 //for (n = 0; n < mode_lib->ms.num_active_planes; ++n) {
7792 // if (display_cfg->output.OutputMultistreamEn[k] == 1 && display_cfg->output.OutputMultistreamId[k] == n)
7793 // mode_lib->ms.support.MultistreamWithHDMIOreDP = true;
7794 //}
7795 }
7796 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder != dml2_dp && (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_split_1to2 ||
7797 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4))
7798 mode_lib->ms.support.MSOOrODMSplitWithNonDPLink = true;
7799
7800 if ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to2 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 2) ||
7801 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].overrides.odm_mode == dml2_odm_mode_mso_1to4 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_dp_lane_count < 4))
7802 mode_lib->ms.support.NotEnoughLanesForMSO = true;
7803 }
7804 }
7805
7806 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = false;
7807 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7808 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl &&
7809 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])) {
7810 mode_lib->ms.RequiredDTBCLK[k] = RequiredDTBCLK(
7811 mode_lib->ms.RequiresDSC[k],
7812 s->PixelClockBackEnd[k],
7813 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7814 mode_lib->ms.OutputBpp[k],
7815 mode_lib->ms.support.NumberOfDSCSlices[k],
7816 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
7817 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7818 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_rate,
7819 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.audio_sample_layout);
7820
7821 if (mode_lib->ms.RequiredDTBCLK[k] > ((double)min_clk_table->max_clocks_khz.dtbclk / 1000)) {
7822 mode_lib->ms.support.DTBCLKRequiredMoreThanSupported = true;
7823 }
7824 } else {
7825 /* Phantom DTBCLK can be calculated different from main because phantom has no DSC and thus
7826 * will have a different output BPP. Ignore phantom DTBCLK requirement and only consider
7827 * non-phantom DTBCLK requirements. In map_mode_to_soc_dpm we choose the highest DTBCLK
7828 * required - by setting phantom dtbclk to 0 we ignore it.
7829 */
7830 mode_lib->ms.RequiredDTBCLK[k] = 0;
7831 }
7832 }
7833
7834 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = false;
7835 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
7836 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp ||
7837 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_dp2p0 ||
7838 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_edp ||
7839 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
7840 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_420) {
7841 s->DSCFormatFactor = 2;
7842 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_444) {
7843 s->DSCFormatFactor = 1;
7844 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format == dml2_n422 || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder == dml2_hdmifrl) {
7845 s->DSCFormatFactor = 2;
7846 } else {
7847 s->DSCFormatFactor = 1;
7848 }
7849 #ifdef __DML_VBA_DEBUG__
7850 dml2_printf("DML::%s: k=%u, RequiresDSC = %u\n", __func__, k, mode_lib->ms.RequiresDSC[k]);
7851 #endif
7852 if (mode_lib->ms.RequiresDSC[k] == true) {
7853 s->PixelClockBackEndFactor = 3.0;
7854
7855 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
7856 s->PixelClockBackEndFactor = 12.0;
7857 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
7858 s->PixelClockBackEndFactor = 9.0;
7859 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
7860 s->PixelClockBackEndFactor = 6.0;
7861
7862 mode_lib->ms.required_dscclk_freq_mhz[k] = s->PixelClockBackEnd[k] / s->PixelClockBackEndFactor / (double)s->DSCFormatFactor;
7863 if (mode_lib->ms.required_dscclk_freq_mhz[k] > mode_lib->ms.max_dscclk_freq_mhz) {
7864 mode_lib->ms.support.DSCCLKRequiredMoreThanSupported = true;
7865 }
7866
7867 #ifdef __DML_VBA_DEBUG__
7868 dml2_printf("DML::%s: k=%u, PixelClockBackEnd = %f\n", __func__, k, s->PixelClockBackEnd[k]);
7869 dml2_printf("DML::%s: k=%u, required_dscclk_freq_mhz = %f\n", __func__, k, mode_lib->ms.required_dscclk_freq_mhz[k]);
7870 dml2_printf("DML::%s: k=%u, DSCFormatFactor = %u\n", __func__, k, s->DSCFormatFactor);
7871 dml2_printf("DML::%s: k=%u, DSCCLKRequiredMoreThanSupported = %u\n", __func__, k, mode_lib->ms.support.DSCCLKRequiredMoreThanSupported);
7872 #endif
7873 }
7874 }
7875 }
7876
7877 /* Check DSC Unit and Slices Support */
7878 mode_lib->ms.support.NotEnoughDSCSlices = false;
7879 s->TotalDSCUnitsRequired = 0;
7880 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = true;
7881 memset(s->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
7882
7883 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7884 if (mode_lib->ms.RequiresDSC[k] == true && !s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) {
7885 s->NumDSCUnitRequired = 1;
7886
7887 if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_4to1)
7888 s->NumDSCUnitRequired = 4;
7889 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_3to1)
7890 s->NumDSCUnitRequired = 3;
7891 else if (mode_lib->ms.ODMMode[k] == dml2_odm_mode_combine_2to1)
7892 s->NumDSCUnitRequired = 2;
7893
7894 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active > s->NumDSCUnitRequired * (unsigned int)mode_lib->ip.maximum_pixels_per_line_per_dsc_unit)
7895 mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport = false;
7896 s->TotalDSCUnitsRequired = s->TotalDSCUnitsRequired + s->NumDSCUnitRequired;
7897
7898 if (mode_lib->ms.support.NumberOfDSCSlices[k] > 4 * s->NumDSCUnitRequired)
7899 mode_lib->ms.support.NotEnoughDSCSlices = true;
7900 }
7901 s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1;
7902 }
7903
7904 mode_lib->ms.support.NotEnoughDSCUnits = false;
7905 if (s->TotalDSCUnitsRequired > (unsigned int)mode_lib->ip.num_dsc) {
7906 mode_lib->ms.support.NotEnoughDSCUnits = true;
7907 }
7908
7909 /*DSC Delay per state*/
7910 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7911 mode_lib->ms.DSCDelay[k] = DSCDelayRequirement(mode_lib->ms.RequiresDSC[k],
7912 mode_lib->ms.ODMMode[k],
7913 mode_lib->ip.maximum_dsc_bits_per_component,
7914 s->OutputBpp[k],
7915 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
7916 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
7917 mode_lib->ms.support.NumberOfDSCSlices[k],
7918 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
7919 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
7920 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
7921 s->PixelClockBackEnd[k]);
7922 }
7923
7924 // Figure out the swath and DET configuration after the num dpp per plane is figured out
7925 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
7926 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->ms.ODMMode;
7927 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
7928
7929 // output
7930 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = s->dummy_integer_array[0];
7931 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = s->dummy_integer_array[1];
7932 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub;
7933 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub;
7934 CalculateSwathAndDETConfiguration_params->SwathWidth = mode_lib->ms.SwathWidthY;
7935 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = mode_lib->ms.SwathWidthC;
7936 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->ms.SwathHeightY;
7937 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->ms.SwathHeightC;
7938 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->ms.support.request_size_bytes_luma;
7939 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->ms.support.request_size_bytes_chroma;
7940 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->ms.DETBufferSizeInKByte; // FIXME: This is per pipe but the pipes in plane will use that
7941 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
7942 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
7943 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->ms.UnboundedRequestEnabled;
7944 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = s->dummy_integer_array[3];
7945 CalculateSwathAndDETConfiguration_params->hw_debug5 = s->dummy_boolean_array[1];
7946 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->ms.CompressedBufferSizeInkByte;
7947 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = s->dummy_boolean_array[0];
7948 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &mode_lib->ms.support.ViewportSizeSupport;
7949
7950 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
7951
7952 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
7953 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
7954 mode_lib->ms.SurfaceSizeInMALL[k] = 0;
7955 mode_lib->ms.support.ExceededMALLSize = 0;
7956 } else {
7957 CalculateSurfaceSizeInMall(
7958 display_cfg,
7959 mode_lib->ms.num_active_planes,
7960 mode_lib->soc.mall_allocated_for_dcn_mbytes,
7961
7962 mode_lib->ms.BytePerPixelY,
7963 mode_lib->ms.BytePerPixelC,
7964 mode_lib->ms.Read256BlockWidthY,
7965 mode_lib->ms.Read256BlockWidthC,
7966 mode_lib->ms.Read256BlockHeightY,
7967 mode_lib->ms.Read256BlockHeightC,
7968 mode_lib->ms.MacroTileWidthY,
7969 mode_lib->ms.MacroTileWidthC,
7970 mode_lib->ms.MacroTileHeightY,
7971 mode_lib->ms.MacroTileHeightC,
7972
7973 /* Output */
7974 mode_lib->ms.SurfaceSizeInMALL,
7975 &mode_lib->ms.support.ExceededMALLSize);
7976 }
7977
7978 mode_lib->ms.TotalNumberOfDCCActiveDPP = 0;
7979 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7980 if (display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
7981 mode_lib->ms.TotalNumberOfDCCActiveDPP = mode_lib->ms.TotalNumberOfDCCActiveDPP + mode_lib->ms.NoOfDPP[k];
7982 }
7983 }
7984
7985 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
7986 s->SurfParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
7987 s->SurfParameters[k].DPPPerSurface = mode_lib->ms.NoOfDPP[k];
7988 s->SurfParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
7989 s->SurfParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
7990 s->SurfParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
7991 s->SurfParameters[k].BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
7992 s->SurfParameters[k].BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
7993 s->SurfParameters[k].BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
7994 s->SurfParameters[k].BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
7995 s->SurfParameters[k].BlockWidthY = mode_lib->ms.MacroTileWidthY[k];
7996 s->SurfParameters[k].BlockHeightY = mode_lib->ms.MacroTileHeightY[k];
7997 s->SurfParameters[k].BlockWidthC = mode_lib->ms.MacroTileWidthC[k];
7998 s->SurfParameters[k].BlockHeightC = mode_lib->ms.MacroTileHeightC[k];
7999 s->SurfParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
8000 s->SurfParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
8001 s->SurfParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8002 s->SurfParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
8003 s->SurfParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
8004 s->SurfParameters[k].BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
8005 s->SurfParameters[k].BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
8006 s->SurfParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
8007 s->SurfParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8008 s->SurfParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8009 s->SurfParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
8010 s->SurfParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
8011 s->SurfParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
8012 s->SurfParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
8013 s->SurfParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
8014 s->SurfParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
8015 s->SurfParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
8016 s->SurfParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8017 s->SurfParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8018 s->SurfParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
8019 s->SurfParameters[k].SwathHeightY = mode_lib->ms.SwathHeightY[k];
8020 s->SurfParameters[k].SwathHeightC = mode_lib->ms.SwathHeightC[k];
8021
8022 s->SurfParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
8023 s->SurfParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
8024 }
8025
8026 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
8027 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
8028 CalculateVMRowAndSwath_params->myPipe = s->SurfParameters;
8029 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->ms.SurfaceSizeInMALL;
8030 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
8031 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
8032 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
8033 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->ms.SwathWidthY;
8034 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->ms.SwathWidthC;
8035 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
8036 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
8037 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8038
8039 // output
8040 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = mode_lib->ms.PTEBufferSizeNotExceeded;
8041 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = s->dummy_integer_array[12];
8042 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = s->dummy_integer_array[13];
8043 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->ms.dpte_row_height;
8044 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->ms.dpte_row_height_chroma;
8045 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = s->dummy_integer_array[14]; // VBA_DELTA
8046 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = s->dummy_integer_array[15]; // VBA_DELTA
8047 CalculateVMRowAndSwath_params->vm_group_bytes = s->dummy_integer_array[16];
8048 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
8049 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = s->dummy_integer_array[17];
8050 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = s->dummy_integer_array[18];
8051 CalculateVMRowAndSwath_params->PTERequestSizeY = s->dummy_integer_array[19];
8052 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = s->dummy_integer_array[20];
8053 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = s->dummy_integer_array[21];
8054 CalculateVMRowAndSwath_params->PTERequestSizeC = s->dummy_integer_array[22];
8055 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
8056 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
8057 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
8058 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
8059 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = s->dummy_integer_array[23];
8060 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = s->dummy_integer_array[24];
8061 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY;
8062 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC;
8063 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->ms.PrefillY;
8064 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->ms.PrefillC;
8065 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY;
8066 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC;
8067 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
8068 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow;
8069 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
8070 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
8071 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->ms.vm_bytes;
8072 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->ms.use_one_row_for_frame;
8073 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->ms.use_one_row_for_frame_flip;
8074 CalculateVMRowAndSwath_params->is_using_mall_for_ss = s->dummy_boolean_array[0];
8075 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = s->dummy_boolean_array[1];
8076 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = s->dummy_integer_array[25];
8077 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = mode_lib->ms.DCCMetaBufferSizeNotExceeded;
8078 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->ms.meta_row_bw;
8079 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->ms.meta_row_bytes;
8080 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
8081 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
8082 CalculateVMRowAndSwath_params->meta_req_width_luma = s->dummy_integer_array[26];
8083 CalculateVMRowAndSwath_params->meta_req_height_luma = s->dummy_integer_array[27];
8084 CalculateVMRowAndSwath_params->meta_row_width_luma = s->dummy_integer_array[28];
8085 CalculateVMRowAndSwath_params->meta_row_height_luma = s->meta_row_height_luma;
8086 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = s->dummy_integer_array[29];
8087 CalculateVMRowAndSwath_params->meta_req_width_chroma = s->dummy_integer_array[30];
8088 CalculateVMRowAndSwath_params->meta_req_height_chroma = s->dummy_integer_array[31];
8089 CalculateVMRowAndSwath_params->meta_row_width_chroma = s->dummy_integer_array[32];
8090 CalculateVMRowAndSwath_params->meta_row_height_chroma = s->meta_row_height_chroma;
8091 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = s->dummy_integer_array[33];
8092
8093 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
8094
8095 mode_lib->ms.support.PTEBufferSizeNotExceeded = true;
8096 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = true;
8097
8098 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8099 if (mode_lib->ms.PTEBufferSizeNotExceeded[k] == false)
8100 mode_lib->ms.support.PTEBufferSizeNotExceeded = false;
8101
8102 if (mode_lib->ms.DCCMetaBufferSizeNotExceeded[k] == false)
8103 mode_lib->ms.support.DCCMetaBufferSizeNotExceeded = false;
8104
8105 #ifdef __DML_VBA_DEBUG__
8106 dml2_printf("DML::%s: k=%u, PTEBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.PTEBufferSizeNotExceeded[k]);
8107 dml2_printf("DML::%s: k=%u, DCCMetaBufferSizeNotExceeded = %u\n", __func__, k, mode_lib->ms.DCCMetaBufferSizeNotExceeded[k]);
8108 #endif
8109 }
8110 #ifdef __DML_VBA_DEBUG__
8111 dml2_printf("DML::%s: PTEBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.PTEBufferSizeNotExceeded);
8112 dml2_printf("DML::%s: DCCMetaBufferSizeNotExceeded = %u\n", __func__, mode_lib->ms.support.DCCMetaBufferSizeNotExceeded);
8113 #endif
8114
8115 /* VActive bytes to fetch for UCLK P-State */
8116 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
8117 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8118
8119 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = mode_lib->ms.num_active_planes;
8120 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->ms.NoOfDPP;
8121 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = s->meta_row_height_luma;
8122 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = s->meta_row_height_chroma;
8123 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
8124 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
8125 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->ms.dpte_row_height;
8126 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->ms.dpte_row_height_chroma;
8127 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
8128 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
8129 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->ms.BytePerPixelY;
8130 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->ms.BytePerPixelC;
8131 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->ms.SwathWidthY;
8132 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->ms.SwathWidthC;
8133 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->ms.SwathHeightY;
8134 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->ms.SwathHeightC;
8135 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
8136
8137 /* outputs */
8138 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l;
8139 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c;
8140
8141 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
8142
8143 /* Excess VActive bandwidth required to fill DET */
8144 calculate_excess_vactive_bandwidth_required(
8145 display_cfg,
8146 mode_lib->ms.num_active_planes,
8147 s->pstate_bytes_required_l,
8148 s->pstate_bytes_required_c,
8149 /* outputs */
8150 mode_lib->ms.excess_vactive_fill_bw_l,
8151 mode_lib->ms.excess_vactive_fill_bw_c);
8152
8153 mode_lib->ms.UrgLatency = CalculateUrgentLatency(
8154 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
8155 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
8156 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
8157 mode_lib->soc.do_urgent_latency_adjustment,
8158 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
8159 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
8160 mode_lib->ms.FabricClock,
8161 mode_lib->ms.uclk_freq_mhz,
8162 mode_lib->soc.qos_parameters.qos_type,
8163 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].urgent_ramp_uclk_cycles,
8164 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
8165 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
8166 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
8167 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
8168 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
8169
8170 mode_lib->ms.TripToMemory = CalculateTripToMemory(
8171 mode_lib->ms.UrgLatency,
8172 mode_lib->ms.FabricClock,
8173 mode_lib->ms.uclk_freq_mhz,
8174 mode_lib->soc.qos_parameters.qos_type,
8175 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].trip_to_memory_uclk_cycles,
8176 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
8177 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
8178 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
8179 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
8180
8181 mode_lib->ms.TripToMemory = math_max2(mode_lib->ms.UrgLatency, mode_lib->ms.TripToMemory);
8182
8183 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8184 double line_time_us = (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8185 bool cursor_not_enough_urgent_latency_hiding = 0;
8186
8187 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
8188 calculate_cursor_req_attributes(
8189 display_cfg->plane_descriptors[k].cursor.cursor_width,
8190 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
8191
8192 // output
8193 &s->cursor_lines_per_chunk[k],
8194 &s->cursor_bytes_per_line[k],
8195 &s->cursor_bytes_per_chunk[k],
8196 &s->cursor_bytes[k]);
8197
8198 calculate_cursor_urgent_burst_factor(
8199 mode_lib->ip.cursor_buffer_size,
8200 display_cfg->plane_descriptors[k].cursor.cursor_width,
8201 s->cursor_bytes_per_chunk[k],
8202 s->cursor_lines_per_chunk[k],
8203 line_time_us,
8204 mode_lib->ms.UrgLatency,
8205
8206 // output
8207 &mode_lib->ms.UrgentBurstFactorCursor[k],
8208 &cursor_not_enough_urgent_latency_hiding);
8209 }
8210
8211 mode_lib->ms.UrgentBurstFactorCursorPre[k] = mode_lib->ms.UrgentBurstFactorCursor[k];
8212
8213 #ifdef __DML_VBA_DEBUG__
8214 dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor\n", __func__, k);
8215 dml2_printf("DML::%s: k=%d, VRatio=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
8216 dml2_printf("DML::%s: k=%d, VRatioChroma=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
8217 #endif
8218
8219 CalculateUrgentBurstFactor(
8220 &display_cfg->plane_descriptors[k],
8221 mode_lib->ms.swath_width_luma_ub[k],
8222 mode_lib->ms.swath_width_chroma_ub[k],
8223 mode_lib->ms.SwathHeightY[k],
8224 mode_lib->ms.SwathHeightC[k],
8225 line_time_us,
8226 mode_lib->ms.UrgLatency,
8227 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
8228 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
8229 mode_lib->ms.BytePerPixelInDETY[k],
8230 mode_lib->ms.BytePerPixelInDETC[k],
8231 mode_lib->ms.DETBufferSizeY[k],
8232 mode_lib->ms.DETBufferSizeC[k],
8233
8234 // Output
8235 &mode_lib->ms.UrgentBurstFactorLuma[k],
8236 &mode_lib->ms.UrgentBurstFactorChroma[k],
8237 &mode_lib->ms.NotEnoughUrgentLatencyHiding[k]);
8238
8239 mode_lib->ms.NotEnoughUrgentLatencyHiding[k] = mode_lib->ms.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
8240 }
8241
8242 CalculateDCFCLKDeepSleep(
8243 display_cfg,
8244 mode_lib->ms.num_active_planes,
8245 mode_lib->ms.BytePerPixelY,
8246 mode_lib->ms.BytePerPixelC,
8247 mode_lib->ms.SwathWidthY,
8248 mode_lib->ms.SwathWidthC,
8249 mode_lib->ms.NoOfDPP,
8250 mode_lib->ms.PSCL_FACTOR,
8251 mode_lib->ms.PSCL_FACTOR_CHROMA,
8252 mode_lib->ms.RequiredDPPCLK,
8253 mode_lib->ms.SurfaceReadBandwidthLuma,
8254 mode_lib->ms.SurfaceReadBandwidthChroma,
8255 mode_lib->soc.return_bus_width_bytes,
8256
8257 /* Output */
8258 &mode_lib->ms.dcfclk_deepsleep);
8259
8260 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8261 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
8262 mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay(
8263 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format,
8264 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio,
8265 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio,
8266 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps,
8267 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width,
8268 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height,
8269 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height,
8270 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK;
8271 } else {
8272 mode_lib->ms.WritebackDelayTime[k] = 0.0;
8273 }
8274 }
8275
8276 // MaximumVStartup is actually Tvstartup_min in DCN4 programming guide
8277 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8278 bool isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
8279 s->MaximumVStartup[k] = CalculateMaxVStartup(
8280 mode_lib->ip.ptoi_supported,
8281 mode_lib->ip.vblank_nom_default_us,
8282 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
8283 mode_lib->ms.WritebackDelayTime[k]);
8284 mode_lib->ms.MaxVStartupLines[k] = (isInterlaceTiming ? (2 * s->MaximumVStartup[k]) : s->MaximumVStartup[k]);
8285 }
8286
8287 #ifdef __DML_VBA_DEBUG__
8288 dml2_printf("DML::%s: k=%u, MaximumVStartup = %u\n", __func__, k, s->MaximumVStartup[k]);
8289 #endif
8290
8291 /* Immediate Flip and MALL parameters */
8292 s->ImmediateFlipRequired = false;
8293 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8294 s->ImmediateFlipRequired = s->ImmediateFlipRequired || display_cfg->plane_descriptors[k].immediate_flip;
8295 }
8296
8297 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = false;
8298 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8299 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe =
8300 mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe ||
8301 ((display_cfg->hostvm_enable == true || display_cfg->plane_descriptors[k].immediate_flip == true) &&
8302 (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame || dml_is_phantom_pipe(&display_cfg->plane_descriptors[k])));
8303 }
8304
8305 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = false;
8306 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8307 mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen ||
8308 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_enable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))) ||
8309 ((display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_force_disable || display_cfg->plane_descriptors[k].overrides.refresh_from_mall == dml2_refresh_from_mall_mode_override_auto) && (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame));
8310 }
8311
8312 s->FullFrameMALLPStateMethod = false;
8313 s->SubViewportMALLPStateMethod = false;
8314 s->PhantomPipeMALLPStateMethod = false;
8315 s->SubViewportMALLRefreshGreaterThan120Hz = false;
8316 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8317 if (display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame)
8318 s->FullFrameMALLPStateMethod = true;
8319 if (display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) {
8320 s->SubViewportMALLPStateMethod = true;
8321 if (!display_cfg->overrides.enable_subvp_implicit_pmo) {
8322 // For dv, small frame tests will have very high refresh rate
8323 unsigned long long refresh_rate = (unsigned long long) ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz * 1000 /
8324 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
8325 (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
8326 if (refresh_rate > 120)
8327 s->SubViewportMALLRefreshGreaterThan120Hz = true;
8328 }
8329 }
8330 if (dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]))
8331 s->PhantomPipeMALLPStateMethod = true;
8332 }
8333 mode_lib->ms.support.InvalidCombinationOfMALLUseForPState = (s->SubViewportMALLPStateMethod != s->PhantomPipeMALLPStateMethod) ||
8334 (s->SubViewportMALLPStateMethod && s->FullFrameMALLPStateMethod) || s->SubViewportMALLRefreshGreaterThan120Hz;
8335
8336 #ifdef __DML_VBA_DEBUG__
8337 dml2_printf("DML::%s: SubViewportMALLPStateMethod = %u\n", __func__, s->SubViewportMALLPStateMethod);
8338 dml2_printf("DML::%s: PhantomPipeMALLPStateMethod = %u\n", __func__, s->PhantomPipeMALLPStateMethod);
8339 dml2_printf("DML::%s: FullFrameMALLPStateMethod = %u\n", __func__, s->FullFrameMALLPStateMethod);
8340 dml2_printf("DML::%s: SubViewportMALLRefreshGreaterThan120Hz = %u\n", __func__, s->SubViewportMALLRefreshGreaterThan120Hz);
8341 dml2_printf("DML::%s: InvalidCombinationOfMALLUseForPState = %u\n", __func__, mode_lib->ms.support.InvalidCombinationOfMALLUseForPState);
8342 dml2_printf("DML::%s: in_out_params->min_clk_index = %u\n", __func__, in_out_params->min_clk_index);
8343 dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK);
8344 dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock);
8345 dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz);
8346 dml2_printf("DML::%s: urgent latency tolerance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes)));
8347 #endif
8348
8349 mode_lib->ms.support.OutstandingRequestsSupport = true;
8350 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = true;
8351
8352 mode_lib->ms.support.avg_urgent_latency_us
8353 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
8354 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
8355 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
8356 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
8357
8358 mode_lib->ms.support.avg_non_urgent_latency_us
8359 = (mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].average_latency_when_non_urgent_uclk_cycles / mode_lib->ms.uclk_freq_mhz
8360 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_average_latency_margin / 100.0)
8361 + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock)
8362 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0);
8363
8364 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8365
8366 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
8367 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_luma[k]
8368 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
8369
8370 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
8371 mode_lib->ms.support.OutstandingRequestsSupport = false;
8372 }
8373
8374 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
8375 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
8376 }
8377
8378 #ifdef __DML_VBA_DEBUG__
8379 dml2_printf("DML::%s: avg_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_urgent_latency_us);
8380 dml2_printf("DML::%s: avg_non_urgent_latency_us = %f\n", __func__, mode_lib->ms.support.avg_non_urgent_latency_us);
8381 dml2_printf("DML::%s: k=%d, request_size_bytes_luma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_luma[k]);
8382 dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (luma)\n", __func__, k, outstanding_latency_us);
8383 #endif
8384 }
8385
8386 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x && mode_lib->ms.BytePerPixelC[k] > 0) {
8387 outstanding_latency_us = (mode_lib->soc.max_outstanding_reqs * mode_lib->ms.support.request_size_bytes_chroma[k]
8388 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes));
8389
8390 if (outstanding_latency_us < mode_lib->ms.support.avg_urgent_latency_us) {
8391 mode_lib->ms.support.OutstandingRequestsSupport = false;
8392 }
8393
8394 if (outstanding_latency_us < mode_lib->ms.support.avg_non_urgent_latency_us) {
8395 mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance = false;
8396 }
8397 #ifdef __DML_VBA_DEBUG__
8398 dml2_printf("DML::%s: k=%d, request_size_bytes_chroma = %d\n", __func__, k, mode_lib->ms.support.request_size_bytes_chroma[k]);
8399 dml2_printf("DML::%s: k=%d, outstanding_latency_us = %f (chroma)\n", __func__, k, outstanding_latency_us);
8400 #endif
8401 }
8402 }
8403
8404 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
8405 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
8406 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8407 mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0;
8408 mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0;
8409 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
8410 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
8411 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
8412 mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
8413 }
8414 } else {
8415 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8416 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8417 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
8418 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
8419 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
8420 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
8421 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
8422 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
8423
8424 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
8425 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
8426 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
8427 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
8428 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
8429
8430 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
8431 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
8432 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
8433 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
8434 calculate_mcache_setting_params->blk_width_l = mode_lib->ms.MacroTileWidthY[k];
8435 calculate_mcache_setting_params->blk_height_l = mode_lib->ms.MacroTileHeightY[k];
8436 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
8437 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
8438 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
8439 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->ms.BytePerPixelY[k];
8440
8441 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.x_start;
8442 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
8443 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
8444 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
8445 calculate_mcache_setting_params->blk_width_c = mode_lib->ms.MacroTileWidthC[k];
8446 calculate_mcache_setting_params->blk_height_c = mode_lib->ms.MacroTileHeightC[k];
8447 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
8448 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
8449 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
8450 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->ms.BytePerPixelC[k];
8451
8452 // output
8453 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0[k];
8454 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0[k];
8455 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1[k];
8456 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1[k];
8457
8458 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->ms.num_mcaches_l[k];
8459 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->ms.mcache_row_bytes_l[k];
8460 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->ms.mcache_offsets_l[k];
8461 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->ms.mcache_shift_granularity_l[k];
8462
8463 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->ms.num_mcaches_c[k];
8464 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->ms.mcache_row_bytes_c[k];
8465 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->ms.mcache_offsets_c[k];
8466 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->ms.mcache_shift_granularity_c[k];
8467
8468 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->ms.mall_comb_mcache_l[k];
8469 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->ms.mall_comb_mcache_c[k];
8470 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->ms.lc_comb_mcache[k];
8471
8472 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
8473 }
8474
8475 calculate_mall_bw_overhead_factor(
8476 mode_lib->ms.mall_prefetch_sdp_overhead_factor,
8477 mode_lib->ms.mall_prefetch_dram_overhead_factor,
8478
8479 // input
8480 display_cfg,
8481 mode_lib->ms.num_active_planes);
8482 }
8483
8484 // Calculate all the bandwidth available
8485 // Need anothe bw for latency evaluation
8486 calculate_bandwidth_available(
8487 mode_lib->ms.support.avg_bandwidth_available_min, // not used
8488 mode_lib->ms.support.avg_bandwidth_available, // not used
8489 mode_lib->ms.support.urg_bandwidth_available_min_latency,
8490 mode_lib->ms.support.urg_bandwidth_available, // not used
8491 mode_lib->ms.support.urg_bandwidth_available_vm_only, // not used
8492 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm, // not used
8493
8494 &mode_lib->soc,
8495 display_cfg->hostvm_enable,
8496 mode_lib->ms.DCFCLK,
8497 mode_lib->ms.FabricClock,
8498 mode_lib->ms.dram_bw_mbps);
8499
8500 calculate_bandwidth_available(
8501 mode_lib->ms.support.avg_bandwidth_available_min,
8502 mode_lib->ms.support.avg_bandwidth_available,
8503 mode_lib->ms.support.urg_bandwidth_available_min,
8504 mode_lib->ms.support.urg_bandwidth_available,
8505 mode_lib->ms.support.urg_bandwidth_available_vm_only,
8506 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm,
8507
8508 &mode_lib->soc,
8509 display_cfg->hostvm_enable,
8510 mode_lib->ms.MaxDCFCLK,
8511 mode_lib->ms.MaxFabricClock,
8512 mode_lib->ms.max_dram_bw_mbps);
8513
8514
8515 // Average BW support check
8516 calculate_avg_bandwidth_required(
8517 mode_lib->ms.support.avg_bandwidth_required,
8518 // input
8519 display_cfg,
8520 mode_lib->ms.num_active_planes,
8521 mode_lib->ms.SurfaceReadBandwidthLuma,
8522 mode_lib->ms.SurfaceReadBandwidthChroma,
8523 mode_lib->ms.cursor_bw,
8524 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
8525 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
8526 mode_lib->ms.mall_prefetch_dram_overhead_factor,
8527 mode_lib->ms.mall_prefetch_sdp_overhead_factor);
8528
8529 for (m = 0; m < dml2_core_internal_bw_max; m++) { // check sdp and dram
8530 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_idle][m] = 1;
8531 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_sys_active][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_sys_active][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_sys_active][m]);
8532 mode_lib->ms.support.avg_bandwidth_support_ok[dml2_core_internal_soc_state_svp_prefetch][m] = (mode_lib->ms.support.avg_bandwidth_required[dml2_core_internal_soc_state_svp_prefetch][m] <= mode_lib->ms.support.avg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][m]);
8533 }
8534
8535 mode_lib->ms.support.AvgBandwidthSupport = true;
8536 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = true;
8537 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8538 if (mode_lib->ms.NotEnoughUrgentLatencyHiding[k]) {
8539 mode_lib->ms.support.EnoughUrgentLatencyHidingSupport = false;
8540 dml2_printf("DML::%s: k=%u NotEnoughUrgentLatencyHiding set\n", __func__, k);
8541
8542 }
8543 }
8544 for (m = 0; m < dml2_core_internal_soc_state_max; m++) {
8545 for (n = 0; n < dml2_core_internal_bw_max; n++) { // check sdp and dram
8546 if (!mode_lib->ms.support.avg_bandwidth_support_ok[m][n] && (m == dml2_core_internal_soc_state_sys_active || mode_lib->soc.mall_allocated_for_dcn_mbytes > 0)) {
8547 mode_lib->ms.support.AvgBandwidthSupport = false;
8548 #ifdef __DML_VBA_DEBUG__
8549 dml2_printf("DML::%s: avg_bandwidth_support_ok[%s][%s] not ok\n", __func__, dml2_core_internal_soc_state_type_str(m), dml2_core_internal_bw_type_str(n));
8550 #endif
8551 }
8552 }
8553 }
8554
8555 /* Prefetch Check */
8556 {
8557 mode_lib->ms.TimeCalc = 24 / mode_lib->ms.dcfclk_deepsleep;
8558
8559 calculate_hostvm_inefficiency_factor(
8560 &s->HostVMInefficiencyFactor,
8561 &s->HostVMInefficiencyFactorPrefetch,
8562
8563 display_cfg->gpuvm_enable,
8564 display_cfg->hostvm_enable,
8565 mode_lib->ip.remote_iommu_outstanding_translations,
8566 mode_lib->soc.max_outstanding_reqs,
8567 mode_lib->ms.support.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
8568 mode_lib->ms.support.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
8569
8570 mode_lib->ms.Total3dlutActive = 0;
8571 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8572 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
8573 mode_lib->ms.Total3dlutActive = mode_lib->ms.Total3dlutActive + 1;
8574
8575 // Calculate tdlut schedule related terms
8576 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->ms.RequiredDISPCLK;
8577 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
8578 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
8579 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
8580 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
8581 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
8582 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
8583 calculate_tdlut_setting_params->tdlut_mpc_width_flag = display_cfg->plane_descriptors[k].tdlut.tdlut_mpc_width_flag;
8584 calculate_tdlut_setting_params->is_gfx11 = dml_get_gfx_version(display_cfg->plane_descriptors[k].surface.tiling);
8585
8586 // output
8587 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
8588 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
8589 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
8590 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
8591 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
8592 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
8593
8594 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
8595 }
8596
8597 min_return_bw_for_latency = mode_lib->ms.support.urg_bandwidth_available_min_latency[dml2_core_internal_soc_state_sys_active];
8598
8599 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
8600 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
8601 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
8602 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
8603
8604 CalculateExtraLatency(
8605 display_cfg,
8606 mode_lib->ip.rob_buffer_size_kbytes,
8607 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
8608 s->ReorderingBytes,
8609 mode_lib->ms.DCFCLK,
8610 mode_lib->ms.FabricClock,
8611 mode_lib->ip.pixel_chunk_size_kbytes,
8612 min_return_bw_for_latency,
8613 mode_lib->ms.num_active_planes,
8614 mode_lib->ms.NoOfDPP,
8615 mode_lib->ms.dpte_group_bytes,
8616 s->tdlut_bytes_per_group,
8617 s->HostVMInefficiencyFactor,
8618 s->HostVMInefficiencyFactorPrefetch,
8619 mode_lib->soc.hostvm_min_page_size_kbytes,
8620 mode_lib->soc.qos_parameters.qos_type,
8621 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
8622 mode_lib->soc.max_outstanding_reqs,
8623 mode_lib->ms.support.request_size_bytes_luma,
8624 mode_lib->ms.support.request_size_bytes_chroma,
8625 mode_lib->ip.meta_chunk_size_kbytes,
8626 mode_lib->ip.dchub_arb_to_ret_delay,
8627 mode_lib->ms.TripToMemory,
8628 mode_lib->ip.hostvm_mode,
8629
8630 // output
8631 &mode_lib->ms.ExtraLatency,
8632 &mode_lib->ms.ExtraLatency_sr,
8633 &mode_lib->ms.ExtraLatencyPrefetch);
8634
8635 {
8636 mode_lib->ms.support.PrefetchSupported = true;
8637 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8638 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
8639
8640 mode_lib->ms.TWait[k] = CalculateTWait(
8641 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
8642 mode_lib->ms.UrgLatency,
8643 mode_lib->ms.TripToMemory,
8644 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
8645 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
8646
8647 myPipe->Dppclk = mode_lib->ms.RequiredDPPCLK[k];
8648 myPipe->Dispclk = mode_lib->ms.RequiredDISPCLK;
8649 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8650 myPipe->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
8651 myPipe->DPPPerSurface = mode_lib->ms.NoOfDPP[k];
8652 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
8653 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
8654 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
8655 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
8656 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
8657 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
8658 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
8659 myPipe->BlockWidth256BytesY = mode_lib->ms.Read256BlockWidthY[k];
8660 myPipe->BlockHeight256BytesY = mode_lib->ms.Read256BlockHeightY[k];
8661 myPipe->BlockWidth256BytesC = mode_lib->ms.Read256BlockWidthC[k];
8662 myPipe->BlockHeight256BytesC = mode_lib->ms.Read256BlockHeightC[k];
8663 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
8664 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
8665 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
8666 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
8667 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
8668 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8669 myPipe->ODMMode = mode_lib->ms.ODMMode[k];
8670 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
8671 myPipe->BytePerPixelY = mode_lib->ms.BytePerPixelY[k];
8672 myPipe->BytePerPixelC = mode_lib->ms.BytePerPixelC[k];
8673 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
8674
8675 #ifdef __DML_VBA_DEBUG__
8676 dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
8677 dml2_printf("DML::%s: MaximumVStartup = %u\n", __func__, s->MaximumVStartup[k]);
8678 #endif
8679 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
8680 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
8681 CalculatePrefetchSchedule_params->myPipe = myPipe;
8682 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->ms.DSCDelay[k];
8683 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
8684 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
8685 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
8686 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
8687 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
8688 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->ms.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
8689 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
8690 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
8691 CalculatePrefetchSchedule_params->VStartup = s->MaximumVStartup[k];
8692 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
8693 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
8694 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
8695 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
8696 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
8697 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->ms.UrgLatency;
8698 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->ms.ExtraLatencyPrefetch;
8699 CalculatePrefetchSchedule_params->TCalc = mode_lib->ms.TimeCalc;
8700 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->ms.vm_bytes[k];
8701 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->ms.DPTEBytesPerRow[k];
8702 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->ms.PrefetchLinesY[k];
8703 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->ms.PrefillY[k];
8704 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->ms.MaxNumSwathY[k];
8705 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->ms.PrefetchLinesC[k];
8706 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->ms.PrefillC[k];
8707 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->ms.MaxNumSwathC[k];
8708 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->ms.swath_width_luma_ub[k];
8709 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->ms.swath_width_chroma_ub[k];
8710 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->ms.SwathHeightY[k];
8711 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->ms.SwathHeightC[k];
8712 CalculatePrefetchSchedule_params->TWait = mode_lib->ms.TWait[k];
8713 CalculatePrefetchSchedule_params->Ttrip = mode_lib->ms.TripToMemory;
8714 CalculatePrefetchSchedule_params->Turg = mode_lib->ms.UrgLatency;
8715 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
8716 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
8717 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
8718 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
8719 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
8720 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
8721 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
8722 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
8723 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
8724 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
8725 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k];
8726 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k];
8727
8728 // output
8729 CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k];
8730 CalculatePrefetchSchedule_params->DSTYAfterScaler = &s->DSTYAfterScaler[k];
8731 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->ms.dst_y_prefetch[k];
8732 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->ms.LinesForVM[k];
8733 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->ms.LinesForDPTERow[k];
8734 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->ms.VRatioPreY[k];
8735 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->ms.VRatioPreC[k];
8736 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->ms.RequiredPrefetchPixelDataBWLuma[k]; // prefetch_sw_bw_l
8737 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->ms.RequiredPrefetchPixelDataBWChroma[k]; // prefetch_sw_bw_c
8738 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->ms.NoTimeForDynamicMetadata[k];
8739 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->ms.Tno_bw[k];
8740 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->ms.Tno_bw_flip[k];
8741 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->ms.prefetch_vmrow_bw[k];
8742 CalculatePrefetchSchedule_params->Tdmdl_vm = &s->dummy_single[0];
8743 CalculatePrefetchSchedule_params->Tdmdl = &s->dummy_single[1];
8744 CalculatePrefetchSchedule_params->TSetup = &s->dummy_single[2];
8745 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
8746 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
8747 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
8748 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
8749 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
8750 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
8751 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &s->dummy_integer[0];
8752 CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1];
8753 CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2];
8754 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k];
8755
8756 mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
8757
8758 mode_lib->ms.support.PrefetchSupported &= !mode_lib->ms.NoTimeForPrefetch[k];
8759 dml2_printf("DML::%s: k=%d, dst_y_per_vm_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_vm_vblank);
8760 dml2_printf("DML::%s: k=%d, dst_y_per_row_vblank = %f\n", __func__, k, *CalculatePrefetchSchedule_params->dst_y_per_row_vblank);
8761 } // for k num_planes
8762
8763 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8764 if (mode_lib->ms.dst_y_prefetch[k] < 2.0
8765 || mode_lib->ms.LinesForVM[k] >= 32.0
8766 || mode_lib->ms.LinesForDPTERow[k] >= 16.0
8767 || mode_lib->ms.NoTimeForPrefetch[k] == true
8768 || s->DSTYAfterScaler[k] > 8) {
8769 mode_lib->ms.support.PrefetchSupported = false;
8770 dml2_printf("DML::%s: k=%d, dst_y_prefetch=%f (should not be < 2)\n", __func__, k, mode_lib->ms.dst_y_prefetch[k]);
8771 dml2_printf("DML::%s: k=%d, LinesForVM=%f (should not be >= 32)\n", __func__, k, mode_lib->ms.LinesForVM[k]);
8772 dml2_printf("DML::%s: k=%d, LinesForDPTERow=%f (should not be >= 16)\n", __func__, k, mode_lib->ms.LinesForDPTERow[k]);
8773 dml2_printf("DML::%s: k=%d, DSTYAfterScaler=%d (should be <= 8)\n", __func__, k, s->DSTYAfterScaler[k]);
8774 dml2_printf("DML::%s: k=%d, NoTimeForPrefetch=%d\n", __func__, k, mode_lib->ms.NoTimeForPrefetch[k]);
8775 }
8776 }
8777
8778 mode_lib->ms.support.DynamicMetadataSupported = true;
8779 for (k = 0; k < mode_lib->ms.num_active_planes; ++k) {
8780 if (mode_lib->ms.NoTimeForDynamicMetadata[k] == true) {
8781 mode_lib->ms.support.DynamicMetadataSupported = false;
8782 }
8783 }
8784
8785 mode_lib->ms.support.VRatioInPrefetchSupported = true;
8786 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8787 if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
8788 mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
8789 mode_lib->ms.support.VRatioInPrefetchSupported = false;
8790 dml2_printf("DML::%s: k=%d VRatioPreY = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
8791 dml2_printf("DML::%s: k=%d VRatioPreC = %f (should be <= %f)\n", __func__, k, mode_lib->ms.VRatioPreC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
8792 dml2_printf("DML::%s: VRatioInPrefetchSupported = %u\n", __func__, mode_lib->ms.support.VRatioInPrefetchSupported);
8793 }
8794 }
8795
8796 // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok
8797 if (mode_lib->ms.support.PrefetchSupported) {
8798 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8799 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
8800 // Calculate Urgent burst factor for prefetch
8801 #ifdef __DML_VBA_DEBUG__
8802 dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k);
8803 dml2_printf("DML::%s: k=%d, VRatioPreY=%f\n", __func__, k, mode_lib->ms.VRatioPreY[k]);
8804 dml2_printf("DML::%s: k=%d, VRatioPreC=%f\n", __func__, k, mode_lib->ms.VRatioPreC[k]);
8805 #endif
8806 CalculateUrgentBurstFactor(
8807 &display_cfg->plane_descriptors[k],
8808 mode_lib->ms.swath_width_luma_ub[k],
8809 mode_lib->ms.swath_width_chroma_ub[k],
8810 mode_lib->ms.SwathHeightY[k],
8811 mode_lib->ms.SwathHeightC[k],
8812 line_time_us,
8813 mode_lib->ms.UrgLatency,
8814 mode_lib->ms.VRatioPreY[k],
8815 mode_lib->ms.VRatioPreC[k],
8816 mode_lib->ms.BytePerPixelInDETY[k],
8817 mode_lib->ms.BytePerPixelInDETC[k],
8818 mode_lib->ms.DETBufferSizeY[k],
8819 mode_lib->ms.DETBufferSizeC[k],
8820 /* Output */
8821 &mode_lib->ms.UrgentBurstFactorLumaPre[k],
8822 &mode_lib->ms.UrgentBurstFactorChromaPre[k],
8823 &mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
8824 }
8825
8826 // Calculate urgent bandwidth required, both urg and non urg peak bandwidth
8827 // assume flip bw is 0 at this point
8828 for (k = 0; k < mode_lib->ms.num_active_planes; k++)
8829 mode_lib->ms.final_flip_bw[k] = 0;
8830
8831 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->ms.support.urg_vactive_bandwidth_required;
8832 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required;
8833 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->ms.support.urg_bandwidth_required_qual;
8834 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required;
8835 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = mode_lib->ms.surface_avg_vactive_required_bw;
8836 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
8837
8838 calculate_peak_bandwidth_params->display_cfg = display_cfg;
8839 calculate_peak_bandwidth_params->inc_flip_bw = 0;
8840 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
8841 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
8842 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
8843 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
8844 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
8845 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
8846 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
8847 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
8848
8849 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.SurfaceReadBandwidthLuma;
8850 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.SurfaceReadBandwidthChroma;
8851 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
8852 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
8853 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
8854 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
8855 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
8856 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
8857 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
8858 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
8859 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
8860 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
8861 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
8862 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
8863 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
8864 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
8865 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
8866 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
8867
8868 calculate_peak_bandwidth_required(
8869 &mode_lib->scratch,
8870 calculate_peak_bandwidth_params);
8871
8872 // Check urg peak bandwidth against available urg bw
8873 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
8874 check_urgent_bandwidth_support(
8875 &s->dummy_single[0], // double* frac_urg_bandwidth
8876 &s->dummy_single[1], // double* frac_urg_bandwidth_mall
8877 &mode_lib->ms.support.UrgVactiveBandwidthSupport,
8878 &mode_lib->ms.support.PrefetchBandwidthSupported,
8879
8880 mode_lib->soc.mall_allocated_for_dcn_mbytes,
8881 mode_lib->ms.support.non_urg_bandwidth_required,
8882 mode_lib->ms.support.urg_vactive_bandwidth_required,
8883 mode_lib->ms.support.urg_bandwidth_required,
8884 mode_lib->ms.support.urg_bandwidth_available);
8885
8886 mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.PrefetchBandwidthSupported;
8887 dml2_printf("DML::%s: PrefetchBandwidthSupported=%0d\n", __func__, mode_lib->ms.support.PrefetchBandwidthSupported);
8888
8889 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8890 if (mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]) {
8891 mode_lib->ms.support.PrefetchSupported = false;
8892 dml2_printf("DML::%s: k=%d, NotEnoughUrgentLatencyHidingPre=%d\n", __func__, k, mode_lib->ms.NotEnoughUrgentLatencyHidingPre[k]);
8893 }
8894 }
8895
8896
8897 // Both prefetch schedule and BW okay
8898 if (mode_lib->ms.support.PrefetchSupported == true && mode_lib->ms.support.VRatioInPrefetchSupported == true) {
8899 mode_lib->ms.BandwidthAvailableForImmediateFlip =
8900 get_bandwidth_available_for_immediate_flip(
8901 dml2_core_internal_soc_state_sys_active,
8902 mode_lib->ms.support.urg_bandwidth_required_qual, // no flip
8903 mode_lib->ms.support.urg_bandwidth_available);
8904
8905 mode_lib->ms.TotImmediateFlipBytes = 0;
8906 for (k = 0; k < mode_lib->ms.num_active_planes; k++) {
8907 if (display_cfg->plane_descriptors[k].immediate_flip) {
8908 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(
8909 s->HostVMInefficiencyFactor,
8910 mode_lib->ms.vm_bytes[k],
8911 mode_lib->ms.DPTEBytesPerRow[k],
8912 mode_lib->ms.meta_row_bytes[k]);
8913 } else {
8914 s->per_pipe_flip_bytes[k] = 0;
8915 }
8916 mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k];
8917
8918 }
8919
8920 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
8921 CalculateFlipSchedule(
8922 &mode_lib->scratch,
8923 display_cfg->plane_descriptors[k].immediate_flip,
8924 1, // use_lb_flip_bw
8925 s->HostVMInefficiencyFactor,
8926 s->Tvm_trips_flip[k],
8927 s->Tr0_trips_flip[k],
8928 s->Tvm_trips_flip_rounded[k],
8929 s->Tr0_trips_flip_rounded[k],
8930 display_cfg->gpuvm_enable,
8931 mode_lib->ms.vm_bytes[k],
8932 mode_lib->ms.DPTEBytesPerRow[k],
8933 mode_lib->ms.BandwidthAvailableForImmediateFlip,
8934 mode_lib->ms.TotImmediateFlipBytes,
8935 display_cfg->plane_descriptors[k].pixel_format,
8936 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)),
8937 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
8938 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
8939 mode_lib->ms.Tno_bw_flip[k],
8940 mode_lib->ms.dpte_row_height[k],
8941 mode_lib->ms.dpte_row_height_chroma[k],
8942 mode_lib->ms.use_one_row_for_frame_flip[k],
8943 mode_lib->ip.max_flip_time_us,
8944 mode_lib->ip.max_flip_time_lines,
8945 s->per_pipe_flip_bytes[k],
8946 mode_lib->ms.meta_row_bytes[k],
8947 s->meta_row_height_luma[k],
8948 s->meta_row_height_chroma[k],
8949 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
8950
8951 /* Output */
8952 &mode_lib->ms.dst_y_per_vm_flip[k],
8953 &mode_lib->ms.dst_y_per_row_flip[k],
8954 &mode_lib->ms.final_flip_bw[k],
8955 &mode_lib->ms.ImmediateFlipSupportedForPipe[k]);
8956 }
8957
8958 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
8959 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip;
8960 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
8961 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip;
8962 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
8963 calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw;
8964
8965 calculate_peak_bandwidth_params->display_cfg = display_cfg;
8966 calculate_peak_bandwidth_params->inc_flip_bw = 1;
8967 calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes;
8968 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP;
8969 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0;
8970 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1;
8971 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0;
8972 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1;
8973 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor;
8974 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor;
8975
8976 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.SurfaceReadBandwidthLuma;
8977 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.SurfaceReadBandwidthChroma;
8978 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma;
8979 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma;
8980 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l;
8981 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c;
8982 calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw;
8983 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw;
8984 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw;
8985 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw;
8986 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw;
8987 calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw;
8988 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma;
8989 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma;
8990 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor;
8991 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre;
8992 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre;
8993 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre;
8994
8995 calculate_peak_bandwidth_required(
8996 &mode_lib->scratch,
8997 calculate_peak_bandwidth_params);
8998
8999 calculate_immediate_flip_bandwidth_support(
9000 &s->dummy_single[0], // double* frac_urg_bandwidth_flip
9001 &mode_lib->ms.support.ImmediateFlipSupport,
9002
9003 dml2_core_internal_soc_state_sys_active,
9004 mode_lib->ms.support.urg_bandwidth_required_flip,
9005 mode_lib->ms.support.non_urg_bandwidth_required_flip,
9006 mode_lib->ms.support.urg_bandwidth_available);
9007
9008 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9009 if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false)
9010 mode_lib->ms.support.ImmediateFlipSupport = false;
9011 }
9012
9013 } else { // if prefetch not support, assume iflip is not supported too
9014 mode_lib->ms.support.ImmediateFlipSupport = false;
9015 }
9016 } // prefetch schedule
9017 }
9018
9019 s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency;
9020 s->mSOCParameters.ExtraLatency = mode_lib->ms.ExtraLatency;
9021 s->mSOCParameters.ExtraLatency_sr = mode_lib->ms.ExtraLatency_sr;
9022 s->mSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
9023 s->mSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
9024 s->mSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
9025 s->mSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
9026 s->mSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
9027 s->mSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
9028 s->mSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
9029 s->mSOCParameters.USRRetrainingLatency = 0;
9030 s->mSOCParameters.SMNLatency = 0;
9031 s->mSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->ms.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
9032 s->mSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index);
9033 s->mSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
9034 s->mSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
9035
9036 CalculateWatermarks_params->display_cfg = display_cfg;
9037 CalculateWatermarks_params->USRRetrainingRequired = false;
9038 CalculateWatermarks_params->NumberOfActiveSurfaces = mode_lib->ms.num_active_planes;
9039 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
9040 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
9041 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
9042 CalculateWatermarks_params->DCFCLK = mode_lib->ms.DCFCLK;
9043 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
9044 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
9045 CalculateWatermarks_params->dpte_group_bytes = mode_lib->ms.dpte_group_bytes;
9046 CalculateWatermarks_params->mmSOCParameters = s->mSOCParameters;
9047 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
9048 CalculateWatermarks_params->SOCCLK = mode_lib->ms.SOCCLK;
9049 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->ms.dcfclk_deepsleep;
9050 CalculateWatermarks_params->DETBufferSizeY = mode_lib->ms.DETBufferSizeY;
9051 CalculateWatermarks_params->DETBufferSizeC = mode_lib->ms.DETBufferSizeC;
9052 CalculateWatermarks_params->SwathHeightY = mode_lib->ms.SwathHeightY;
9053 CalculateWatermarks_params->SwathHeightC = mode_lib->ms.SwathHeightC;
9054 CalculateWatermarks_params->SwathWidthY = mode_lib->ms.SwathWidthY;
9055 CalculateWatermarks_params->SwathWidthC = mode_lib->ms.SwathWidthC;
9056 CalculateWatermarks_params->DPPPerSurface = mode_lib->ms.NoOfDPP;
9057 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->ms.BytePerPixelInDETY;
9058 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->ms.BytePerPixelInDETC;
9059 CalculateWatermarks_params->DSTXAfterScaler = s->DSTXAfterScaler;
9060 CalculateWatermarks_params->DSTYAfterScaler = s->DSTYAfterScaler;
9061 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->ms.UnboundedRequestEnabled;
9062 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->ms.CompressedBufferSizeInkByte;
9063 CalculateWatermarks_params->meta_row_height_l = s->meta_row_height_luma;
9064 CalculateWatermarks_params->meta_row_height_c = s->meta_row_height_chroma;
9065
9066 // Output
9067 CalculateWatermarks_params->Watermark = &mode_lib->ms.support.watermarks; // Watermarks *Watermark
9068 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->ms.support.DRAMClockChangeSupport;
9069 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->ms.support.global_dram_clock_change_supported;
9070 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = &s->dummy_single_array[0]; // double *MaxActiveDRAMClockChangeLatencySupported[]
9071 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->ms.SubViewportLinesNeededInMALL; // unsigned int SubViewportLinesNeededInMALL[]
9072 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->ms.support.FCLKChangeSupport;
9073 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->ms.support.global_fclk_change_supported;
9074 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &s->dummy_single[0]; // double *MaxActiveFCLKChangeLatencySupported
9075 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->ms.support.USRRetrainingSupport;
9076 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->ms.support.g6_temp_read_support;
9077 CalculateWatermarks_params->VActiveLatencyHidingMargin = mode_lib->ms.VActiveLatencyHidingMargin;
9078 CalculateWatermarks_params->VActiveLatencyHidingUs = mode_lib->ms.VActiveLatencyHidingUs;
9079
9080 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
9081
9082 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->ms.support.watermarks, s->dummy_integer_array[0]);
9083 }
9084 dml2_printf("DML::%s: Done prefetch calculation\n", __func__);
9085 // End of Prefetch Check
9086
9087 mode_lib->ms.support.max_urgent_latency_us = s->mSOCParameters.max_urgent_latency_us;
9088
9089 //Re-ordering Buffer Support Check
9090 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
9091 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
9092 / mode_lib->ms.support.non_urg_bandwidth_required_flip[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= s->mSOCParameters.max_urgent_latency_us) {
9093 mode_lib->ms.support.ROBSupport = true;
9094 } else {
9095 mode_lib->ms.support.ROBSupport = false;
9096 }
9097 } else {
9098 if (mode_lib->ip.rob_buffer_size_kbytes * 1024 >= mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles * mode_lib->soc.fabric_datapath_to_dcn_data_return_bytes) {
9099 mode_lib->ms.support.ROBSupport = true;
9100 } else {
9101 mode_lib->ms.support.ROBSupport = false;
9102 }
9103 }
9104
9105 /* VActive fill time calculations (informative) */
9106 calculate_vactive_det_fill_latency(
9107 display_cfg,
9108 mode_lib->ms.num_active_planes,
9109 s->pstate_bytes_required_l,
9110 s->pstate_bytes_required_c,
9111 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0,
9112 mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1,
9113 mode_lib->ms.SurfaceReadBandwidthLuma,
9114 mode_lib->ms.SurfaceReadBandwidthChroma,
9115 mode_lib->ms.surface_avg_vactive_required_bw,
9116 mode_lib->ms.surface_peak_required_bw,
9117 /* outputs */
9118 mode_lib->ms.dram_change_vactive_det_fill_delay_us);
9119
9120 #ifdef __DML_VBA_DEBUG__
9121 dml2_printf("DML::%s: max_urgent_latency_us = %f\n", __func__, s->mSOCParameters.max_urgent_latency_us);
9122 dml2_printf("DML::%s: ROBSupport = %u\n", __func__, mode_lib->ms.support.ROBSupport);
9123 #endif
9124
9125 /*Mode Support, Voltage State and SOC Configuration*/
9126 {
9127 if (mode_lib->ms.support.ScaleRatioAndTapsSupport
9128 && mode_lib->ms.support.SourceFormatPixelAndScanSupport
9129 && mode_lib->ms.support.ViewportSizeSupport
9130 && !mode_lib->ms.support.LinkRateDoesNotMatchDPVersion
9131 && !mode_lib->ms.support.LinkRateForMultistreamNotIndicated
9132 && !mode_lib->ms.support.BPPForMultistreamNotIndicated
9133 && !mode_lib->ms.support.MultistreamWithHDMIOreDP
9134 && !mode_lib->ms.support.ExceededMultistreamSlots
9135 && !mode_lib->ms.support.MSOOrODMSplitWithNonDPLink
9136 && !mode_lib->ms.support.NotEnoughLanesForMSO
9137 && !mode_lib->ms.support.P2IWith420
9138 && !mode_lib->ms.support.DSC422NativeNotSupported
9139 && mode_lib->ms.support.DSCSlicesODMModeSupported
9140 && !mode_lib->ms.support.NotEnoughDSCUnits
9141 && !mode_lib->ms.support.NotEnoughDSCSlices
9142 && !mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe
9143 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen
9144 && !mode_lib->ms.support.DSCCLKRequiredMoreThanSupported
9145 && mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport
9146 && !mode_lib->ms.support.DTBCLKRequiredMoreThanSupported
9147 && !mode_lib->ms.support.InvalidCombinationOfMALLUseForPState
9148 && mode_lib->ms.support.ROBSupport
9149 && mode_lib->ms.support.OutstandingRequestsSupport
9150 && mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance
9151 && mode_lib->ms.support.DISPCLK_DPPCLK_Support
9152 && mode_lib->ms.support.TotalAvailablePipesSupport
9153 && mode_lib->ms.support.NumberOfOTGSupport
9154 && mode_lib->ms.support.NumberOfHDMIFRLSupport
9155 && mode_lib->ms.support.NumberOfDP2p0Support
9156 && mode_lib->ms.support.EnoughWritebackUnits
9157 && mode_lib->ms.support.WritebackLatencySupport
9158 && mode_lib->ms.support.WritebackScaleRatioAndTapsSupport
9159 && mode_lib->ms.support.CursorSupport
9160 && mode_lib->ms.support.PitchSupport
9161 && !mode_lib->ms.support.ViewportExceedsSurface
9162 && mode_lib->ms.support.PrefetchSupported
9163 && mode_lib->ms.support.EnoughUrgentLatencyHidingSupport
9164 && mode_lib->ms.support.AvgBandwidthSupport
9165 && mode_lib->ms.support.DynamicMetadataSupported
9166 && mode_lib->ms.support.VRatioInPrefetchSupported
9167 && mode_lib->ms.support.PTEBufferSizeNotExceeded
9168 && mode_lib->ms.support.DCCMetaBufferSizeNotExceeded
9169 && !mode_lib->ms.support.ExceededMALLSize
9170 && mode_lib->ms.support.g6_temp_read_support
9171 && ((!display_cfg->hostvm_enable && !s->ImmediateFlipRequired) || mode_lib->ms.support.ImmediateFlipSupport)) {
9172 dml2_printf("DML::%s: mode is supported\n", __func__);
9173 mode_lib->ms.support.ModeSupport = true;
9174 } else {
9175 dml2_printf("DML::%s: mode is NOT supported\n", __func__);
9176 mode_lib->ms.support.ModeSupport = false;
9177 }
9178 }
9179
9180 // Since now the mode_support work on 1 particular power state, so there is only 1 state idx (index 0).
9181 dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport);
9182 dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport);
9183
9184 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9185 mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k];
9186 mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k];
9187 }
9188
9189 for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) {
9190 mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k];
9191 mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k];
9192 mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k];
9193 mode_lib->ms.support.OutputBpp[k] = mode_lib->ms.OutputBpp[k];
9194 mode_lib->ms.support.OutputType[k] = mode_lib->ms.OutputType[k];
9195 mode_lib->ms.support.OutputRate[k] = mode_lib->ms.OutputRate[k];
9196
9197 #if defined(__DML_VBA_DEBUG__)
9198 dml2_printf("DML::%s: k=%d, ODMMode = %u\n", __func__, k, mode_lib->ms.support.ODMMode[k]);
9199 dml2_printf("DML::%s: k=%d, DSCEnabled = %u\n", __func__, k, mode_lib->ms.support.DSCEnabled[k]);
9200 #endif
9201 }
9202
9203 #if defined(__DML_VBA_DEBUG__)
9204 if (!mode_lib->ms.support.ModeSupport)
9205 dml2_print_mode_support_info(&mode_lib->ms.support, true);
9206
9207 dml2_printf("DML::%s: --- DONE --- \n", __func__);
9208 #endif
9209
9210 return mode_lib->ms.support.ModeSupport;
9211 }
9212
dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex * in_out_params)9213 unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support_ex *in_out_params)
9214 {
9215 unsigned int result;
9216
9217 dml2_printf("DML::%s: ------------- START ----------\n", __func__);
9218 result = dml_core_mode_support(in_out_params);
9219
9220 if (result)
9221 *in_out_params->out_evaluation_info = in_out_params->mode_lib->ms.support;
9222
9223 dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index);
9224
9225 for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++)
9226 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
9227
9228 dml2_printf("DML::%s: ------------- DONE ----------\n", __func__);
9229
9230 return result;
9231 }
9232
CalculatePixelDeliveryTimes(const struct dml2_display_cfg * display_cfg,const struct core_display_cfg_support_info * cfg_support_info,unsigned int NumberOfActiveSurfaces,double VRatioPrefetchY[],double VRatioPrefetchC[],unsigned int swath_width_luma_ub[],unsigned int swath_width_chroma_ub[],double PSCL_THROUGHPUT[],double PSCL_THROUGHPUT_CHROMA[],double Dppclk[],unsigned int BytePerPixelC[],unsigned int req_per_swath_ub_l[],unsigned int req_per_swath_ub_c[],double DisplayPipeLineDeliveryTimeLuma[],double DisplayPipeLineDeliveryTimeChroma[],double DisplayPipeLineDeliveryTimeLumaPrefetch[],double DisplayPipeLineDeliveryTimeChromaPrefetch[],double DisplayPipeRequestDeliveryTimeLuma[],double DisplayPipeRequestDeliveryTimeChroma[],double DisplayPipeRequestDeliveryTimeLumaPrefetch[],double DisplayPipeRequestDeliveryTimeChromaPrefetch[])9233 static void CalculatePixelDeliveryTimes(
9234 const struct dml2_display_cfg *display_cfg,
9235 const struct core_display_cfg_support_info *cfg_support_info,
9236 unsigned int NumberOfActiveSurfaces,
9237 double VRatioPrefetchY[],
9238 double VRatioPrefetchC[],
9239 unsigned int swath_width_luma_ub[],
9240 unsigned int swath_width_chroma_ub[],
9241 double PSCL_THROUGHPUT[],
9242 double PSCL_THROUGHPUT_CHROMA[],
9243 double Dppclk[],
9244 unsigned int BytePerPixelC[],
9245 unsigned int req_per_swath_ub_l[],
9246 unsigned int req_per_swath_ub_c[],
9247
9248 // Output
9249 double DisplayPipeLineDeliveryTimeLuma[],
9250 double DisplayPipeLineDeliveryTimeChroma[],
9251 double DisplayPipeLineDeliveryTimeLumaPrefetch[],
9252 double DisplayPipeLineDeliveryTimeChromaPrefetch[],
9253 double DisplayPipeRequestDeliveryTimeLuma[],
9254 double DisplayPipeRequestDeliveryTimeChroma[],
9255 double DisplayPipeRequestDeliveryTimeLumaPrefetch[],
9256 double DisplayPipeRequestDeliveryTimeChromaPrefetch[])
9257 {
9258 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9259 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9260
9261 #ifdef __DML_VBA_DEBUG__
9262 dml2_printf("DML::%s: k=%u : HRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
9263 dml2_printf("DML::%s: k=%u : VRatio = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9264 dml2_printf("DML::%s: k=%u : HRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio);
9265 dml2_printf("DML::%s: k=%u : VRatioChroma = %f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio);
9266 dml2_printf("DML::%s: k=%u : VRatioPrefetchY = %f\n", __func__, k, VRatioPrefetchY[k]);
9267 dml2_printf("DML::%s: k=%u : VRatioPrefetchC = %f\n", __func__, k, VRatioPrefetchC[k]);
9268 dml2_printf("DML::%s: k=%u : swath_width_luma_ub = %u\n", __func__, k, swath_width_luma_ub[k]);
9269 dml2_printf("DML::%s: k=%u : swath_width_chroma_ub = %u\n", __func__, k, swath_width_chroma_ub[k]);
9270 dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT = %f\n", __func__, k, PSCL_THROUGHPUT[k]);
9271 dml2_printf("DML::%s: k=%u : PSCL_THROUGHPUT_CHROMA = %f\n", __func__, k, PSCL_THROUGHPUT_CHROMA[k]);
9272 dml2_printf("DML::%s: k=%u : DPPPerSurface = %u\n", __func__, k, cfg_support_info->plane_support_info[k].dpps_used);
9273 dml2_printf("DML::%s: k=%u : pixel_clock_mhz = %f\n", __func__, k, pixel_clock_mhz);
9274 dml2_printf("DML::%s: k=%u : Dppclk = %f\n", __func__, k, Dppclk[k]);
9275 #endif
9276 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio <= 1) {
9277 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9278 } else {
9279 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9280 }
9281
9282 if (BytePerPixelC[k] == 0) {
9283 DisplayPipeLineDeliveryTimeChroma[k] = 0;
9284 } else {
9285 if (display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio <= 1) {
9286 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9287 } else {
9288 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9289 }
9290 }
9291
9292 if (VRatioPrefetchY[k] <= 1) {
9293 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio / pixel_clock_mhz;
9294 } else {
9295 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / Dppclk[k];
9296 }
9297
9298 if (BytePerPixelC[k] == 0) {
9299 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0;
9300 } else {
9301 if (VRatioPrefetchC[k] <= 1) {
9302 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * cfg_support_info->plane_support_info[k].dpps_used / display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio / pixel_clock_mhz;
9303 } else {
9304 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / Dppclk[k];
9305 }
9306 }
9307 #ifdef __DML_VBA_DEBUG__
9308 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]);
9309 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]);
9310 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]);
9311 dml2_printf("DML::%s: k=%u : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]);
9312 #endif
9313 }
9314
9315 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9316
9317 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub_l[k];
9318 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub_l[k];
9319 if (BytePerPixelC[k] == 0) {
9320 DisplayPipeRequestDeliveryTimeChroma[k] = 0;
9321 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0;
9322 } else {
9323 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub_c[k];
9324 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub_c[k];
9325 }
9326 #ifdef __DML_VBA_DEBUG__
9327 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]);
9328 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]);
9329 dml2_printf("DML::%s: k=%u : req_per_swath_ub_l = %d\n", __func__, k, req_per_swath_ub_l[k]);
9330 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]);
9331 dml2_printf("DML::%s: k=%u : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]);
9332 dml2_printf("DML::%s: k=%u : req_per_swath_ub_c = %d\n", __func__, k, req_per_swath_ub_c[k]);
9333 #endif
9334 }
9335 }
9336
CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params * p)9337 static void CalculateMetaAndPTETimes(struct dml2_core_shared_CalculateMetaAndPTETimes_params *p)
9338 {
9339 unsigned int meta_chunk_width;
9340 unsigned int min_meta_chunk_width;
9341 unsigned int meta_chunk_per_row_int;
9342 unsigned int meta_row_remainder;
9343 unsigned int meta_chunk_threshold;
9344 unsigned int meta_chunks_per_row_ub;
9345 unsigned int meta_chunk_width_chroma;
9346 unsigned int min_meta_chunk_width_chroma;
9347 unsigned int meta_chunk_per_row_int_chroma;
9348 unsigned int meta_row_remainder_chroma;
9349 unsigned int meta_chunk_threshold_chroma;
9350 unsigned int meta_chunks_per_row_ub_chroma;
9351 unsigned int dpte_group_width_luma;
9352 unsigned int dpte_groups_per_row_luma_ub;
9353 unsigned int dpte_group_width_chroma;
9354 unsigned int dpte_groups_per_row_chroma_ub;
9355 double pixel_clock_mhz;
9356
9357 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9358 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9359 if (p->BytePerPixelC[k] == 0) {
9360 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9361 } else {
9362 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9363 }
9364 p->DST_Y_PER_META_ROW_NOM_L[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9365 if (p->BytePerPixelC[k] == 0) {
9366 p->DST_Y_PER_META_ROW_NOM_C[k] = 0;
9367 } else {
9368 p->DST_Y_PER_META_ROW_NOM_C[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9369 }
9370 }
9371
9372 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9373 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true && p->mrq_present) {
9374 meta_chunk_width = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9375 min_meta_chunk_width = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelY[k] / p->meta_row_height[k];
9376 meta_chunk_per_row_int = p->meta_row_width[k] / meta_chunk_width;
9377 meta_row_remainder = p->meta_row_width[k] % meta_chunk_width;
9378 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9379 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_width[k];
9380 } else {
9381 meta_chunk_threshold = 2 * min_meta_chunk_width - p->meta_req_height[k];
9382 }
9383 if (meta_row_remainder <= meta_chunk_threshold) {
9384 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1;
9385 } else {
9386 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2;
9387 }
9388 p->TimePerMetaChunkNominal[k] = p->meta_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio *
9389 p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9390 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9391 p->TimePerMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9392 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9393 p->TimePerMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total /
9394 (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub;
9395 if (p->BytePerPixelC[k] == 0) {
9396 p->TimePerChromaMetaChunkNominal[k] = 0;
9397 p->TimePerChromaMetaChunkVBlank[k] = 0;
9398 p->TimePerChromaMetaChunkFlip[k] = 0;
9399 } else {
9400 meta_chunk_width_chroma = p->MetaChunkSize * 1024 * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9401 min_meta_chunk_width_chroma = p->MinMetaChunkSizeBytes * 256 / p->BytePerPixelC[k] / p->meta_row_height_chroma[k];
9402 meta_chunk_per_row_int_chroma = (unsigned int)((double)p->meta_row_width_chroma[k] / meta_chunk_width_chroma);
9403 meta_row_remainder_chroma = p->meta_row_width_chroma[k] % meta_chunk_width_chroma;
9404 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9405 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_width_chroma[k];
9406 } else {
9407 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - p->meta_req_height_chroma[k];
9408 }
9409 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) {
9410 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1;
9411 } else {
9412 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2;
9413 }
9414 p->TimePerChromaMetaChunkNominal[k] = p->meta_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9415 p->TimePerChromaMetaChunkVBlank[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9416 p->TimePerChromaMetaChunkFlip[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) / meta_chunks_per_row_ub_chroma;
9417 }
9418 } else {
9419 p->TimePerMetaChunkNominal[k] = 0;
9420 p->TimePerMetaChunkVBlank[k] = 0;
9421 p->TimePerMetaChunkFlip[k] = 0;
9422 p->TimePerChromaMetaChunkNominal[k] = 0;
9423 p->TimePerChromaMetaChunkVBlank[k] = 0;
9424 p->TimePerChromaMetaChunkFlip[k] = 0;
9425 }
9426
9427 #ifdef __DML_VBA_DEBUG__
9428 dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_L[k]);
9429 dml2_printf("DML::%s: k=%d, DST_Y_PER_META_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_META_ROW_NOM_C[k]);
9430 dml2_printf("DML::%s: k=%d, TimePerMetaChunkNominal = %f\n", __func__, k, p->TimePerMetaChunkNominal[k]);
9431 dml2_printf("DML::%s: k=%d, TimePerMetaChunkVBlank = %f\n", __func__, k, p->TimePerMetaChunkVBlank[k]);
9432 dml2_printf("DML::%s: k=%d, TimePerMetaChunkFlip = %f\n", __func__, k, p->TimePerMetaChunkFlip[k]);
9433 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkNominal = %f\n", __func__, k, p->TimePerChromaMetaChunkNominal[k]);
9434 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkVBlank = %f\n", __func__, k, p->TimePerChromaMetaChunkVBlank[k]);
9435 dml2_printf("DML::%s: k=%d, TimePerChromaMetaChunkFlip = %f\n", __func__, k, p->TimePerChromaMetaChunkFlip[k]);
9436 #endif
9437 }
9438
9439 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9440 p->DST_Y_PER_PTE_ROW_NOM_L[k] = p->dpte_row_height[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9441 if (p->BytePerPixelC[k] == 0) {
9442 p->DST_Y_PER_PTE_ROW_NOM_C[k] = 0;
9443 } else {
9444 p->DST_Y_PER_PTE_ROW_NOM_C[k] = p->dpte_row_height_chroma[k] / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
9445 }
9446 }
9447
9448 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9449 pixel_clock_mhz = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9450
9451 if (p->display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut)
9452 p->time_per_tdlut_group[k] = 2 * p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / p->tdlut_groups_per_2row_ub[k];
9453 else
9454 p->time_per_tdlut_group[k] = 0;
9455
9456 dml2_printf("DML::%s: k=%u, time_per_tdlut_group = %f\n", __func__, k, p->time_per_tdlut_group[k]);
9457
9458 if (p->display_cfg->gpuvm_enable == true) {
9459 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9460 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqWidthY[k]);
9461 } else {
9462 dpte_group_width_luma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeY[k] * p->PixelPTEReqHeightY[k]);
9463 }
9464 if (p->use_one_row_for_frame[k]) {
9465 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma / 2.0, 1.0));
9466 } else {
9467 dpte_groups_per_row_luma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_luma_ub[k] / (double)dpte_group_width_luma, 1.0));
9468 }
9469 if (dpte_groups_per_row_luma_ub <= 2) {
9470 dpte_groups_per_row_luma_ub = dpte_groups_per_row_luma_ub + 1;
9471 }
9472 dml2_printf("DML::%s: k=%u, use_one_row_for_frame = %u\n", __func__, k, p->use_one_row_for_frame[k]);
9473 dml2_printf("DML::%s: k=%u, dpte_group_bytes = %u\n", __func__, k, p->dpte_group_bytes[k]);
9474 dml2_printf("DML::%s: k=%u, PTERequestSizeY = %u\n", __func__, k, p->PTERequestSizeY[k]);
9475 dml2_printf("DML::%s: k=%u, PixelPTEReqWidthY = %u\n", __func__, k, p->PixelPTEReqWidthY[k]);
9476 dml2_printf("DML::%s: k=%u, PixelPTEReqHeightY = %u\n", __func__, k, p->PixelPTEReqHeightY[k]);
9477 dml2_printf("DML::%s: k=%u, dpte_row_width_luma_ub = %u\n", __func__, k, p->dpte_row_width_luma_ub[k]);
9478 dml2_printf("DML::%s: k=%u, dpte_group_width_luma = %u\n", __func__, k, dpte_group_width_luma);
9479 dml2_printf("DML::%s: k=%u, dpte_groups_per_row_luma_ub = %u\n", __func__, k, dpte_groups_per_row_luma_ub);
9480
9481 p->time_per_pte_group_nom_luma[k] = p->DST_Y_PER_PTE_ROW_NOM_L[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9482 p->time_per_pte_group_vblank_luma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9483 p->time_per_pte_group_flip_luma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_luma_ub;
9484 if (p->BytePerPixelC[k] == 0) {
9485 p->time_per_pte_group_nom_chroma[k] = 0;
9486 p->time_per_pte_group_vblank_chroma[k] = 0;
9487 p->time_per_pte_group_flip_chroma[k] = 0;
9488 } else {
9489 if (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) {
9490 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqWidthC[k]);
9491 } else {
9492 dpte_group_width_chroma = (unsigned int)((double)p->dpte_group_bytes[k] / (double)p->PTERequestSizeC[k] * p->PixelPTEReqHeightC[k]);
9493 }
9494
9495 if (p->use_one_row_for_frame[k]) {
9496 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma / 2.0, 1.0));
9497 } else {
9498 dpte_groups_per_row_chroma_ub = (unsigned int)(math_ceil2((double)p->dpte_row_width_chroma_ub[k] / (double)dpte_group_width_chroma, 1.0));
9499 }
9500 if (dpte_groups_per_row_chroma_ub <= 2) {
9501 dpte_groups_per_row_chroma_ub = dpte_groups_per_row_chroma_ub + 1;
9502 }
9503 dml2_printf("DML::%s: k=%u, dpte_row_width_chroma_ub = %u\n", __func__, k, p->dpte_row_width_chroma_ub[k]);
9504 dml2_printf("DML::%s: k=%u, dpte_group_width_chroma = %u\n", __func__, k, dpte_group_width_chroma);
9505 dml2_printf("DML::%s: k=%u, dpte_groups_per_row_chroma_ub = %u\n", __func__, k, dpte_groups_per_row_chroma_ub);
9506
9507 p->time_per_pte_group_nom_chroma[k] = p->DST_Y_PER_PTE_ROW_NOM_C[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9508 p->time_per_pte_group_vblank_chroma[k] = p->dst_y_per_row_vblank[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9509 p->time_per_pte_group_flip_chroma[k] = p->dst_y_per_row_flip[k] * p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz / dpte_groups_per_row_chroma_ub;
9510 }
9511 } else {
9512 p->time_per_pte_group_nom_luma[k] = 0;
9513 p->time_per_pte_group_vblank_luma[k] = 0;
9514 p->time_per_pte_group_flip_luma[k] = 0;
9515 p->time_per_pte_group_nom_chroma[k] = 0;
9516 p->time_per_pte_group_vblank_chroma[k] = 0;
9517 p->time_per_pte_group_flip_chroma[k] = 0;
9518 }
9519 #ifdef __DML_VBA_DEBUG__
9520 dml2_printf("DML::%s: k=%u, dst_y_per_row_vblank = %f\n", __func__, k, p->dst_y_per_row_vblank[k]);
9521 dml2_printf("DML::%s: k=%u, dst_y_per_row_flip = %f\n", __func__, k, p->dst_y_per_row_flip[k]);
9522
9523 dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_L = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_L[k]);
9524 dml2_printf("DML::%s: k=%u, DST_Y_PER_PTE_ROW_NOM_C = %f\n", __func__, k, p->DST_Y_PER_PTE_ROW_NOM_C[k]);
9525 dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_luma = %f\n", __func__, k, p->time_per_pte_group_nom_luma[k]);
9526 dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_luma = %f\n", __func__, k, p->time_per_pte_group_vblank_luma[k]);
9527 dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_luma = %f\n", __func__, k, p->time_per_pte_group_flip_luma[k]);
9528 dml2_printf("DML::%s: k=%u, time_per_pte_group_nom_chroma = %f\n", __func__, k, p->time_per_pte_group_nom_chroma[k]);
9529 dml2_printf("DML::%s: k=%u, time_per_pte_group_vblank_chroma = %f\n", __func__, k, p->time_per_pte_group_vblank_chroma[k]);
9530 dml2_printf("DML::%s: k=%u, time_per_pte_group_flip_chroma = %f\n", __func__, k, p->time_per_pte_group_flip_chroma[k]);
9531 #endif
9532 }
9533 } // CalculateMetaAndPTETimes
9534
CalculateVMGroupAndRequestTimes(const struct dml2_display_cfg * display_cfg,unsigned int NumberOfActiveSurfaces,unsigned int BytePerPixelC[],double dst_y_per_vm_vblank[],double dst_y_per_vm_flip[],unsigned int dpte_row_width_luma_ub[],unsigned int dpte_row_width_chroma_ub[],unsigned int vm_group_bytes[],unsigned int dpde0_bytes_per_frame_ub_l[],unsigned int dpde0_bytes_per_frame_ub_c[],unsigned int tdlut_pte_bytes_per_frame[],unsigned int meta_pte_bytes_per_frame_ub_l[],unsigned int meta_pte_bytes_per_frame_ub_c[],bool mrq_present,double TimePerVMGroupVBlank[],double TimePerVMGroupFlip[],double TimePerVMRequestVBlank[],double TimePerVMRequestFlip[])9535 static void CalculateVMGroupAndRequestTimes(
9536 const struct dml2_display_cfg *display_cfg,
9537 unsigned int NumberOfActiveSurfaces,
9538 unsigned int BytePerPixelC[],
9539 double dst_y_per_vm_vblank[],
9540 double dst_y_per_vm_flip[],
9541 unsigned int dpte_row_width_luma_ub[],
9542 unsigned int dpte_row_width_chroma_ub[],
9543 unsigned int vm_group_bytes[],
9544 unsigned int dpde0_bytes_per_frame_ub_l[],
9545 unsigned int dpde0_bytes_per_frame_ub_c[],
9546 unsigned int tdlut_pte_bytes_per_frame[],
9547 unsigned int meta_pte_bytes_per_frame_ub_l[],
9548 unsigned int meta_pte_bytes_per_frame_ub_c[],
9549 bool mrq_present,
9550
9551 // Output
9552 double TimePerVMGroupVBlank[],
9553 double TimePerVMGroupFlip[],
9554 double TimePerVMRequestVBlank[],
9555 double TimePerVMRequestFlip[])
9556 {
9557 unsigned int num_group_per_lower_vm_stage = 0;
9558 unsigned int num_req_per_lower_vm_stage = 0;
9559 unsigned int num_group_per_lower_vm_stage_flip;
9560 unsigned int num_group_per_lower_vm_stage_pref;
9561 unsigned int num_req_per_lower_vm_stage_flip;
9562 unsigned int num_req_per_lower_vm_stage_pref;
9563 double line_time;
9564
9565 #ifdef __DML_VBA_DEBUG__
9566 dml2_printf("DML::%s: NumberOfActiveSurfaces = %u\n", __func__, NumberOfActiveSurfaces);
9567 #endif
9568 for (unsigned int k = 0; k < NumberOfActiveSurfaces; ++k) {
9569 double pixel_clock_mhz = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9570 bool dcc_mrq_enable = display_cfg->plane_descriptors[k].surface.dcc.enable && mrq_present;
9571 #ifdef __DML_VBA_DEBUG__
9572 dml2_printf("DML::%s: k=%u, dcc_mrq_enable = %u\n", __func__, k, dcc_mrq_enable);
9573 dml2_printf("DML::%s: k=%u, vm_group_bytes = %u\n", __func__, k, vm_group_bytes[k]);
9574 dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_l = %u\n", __func__, k, dpde0_bytes_per_frame_ub_l[k]);
9575 dml2_printf("DML::%s: k=%u, dpde0_bytes_per_frame_ub_c = %u\n", __func__, k, dpde0_bytes_per_frame_ub_c[k]);
9576 dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_l = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_l[k]);
9577 dml2_printf("DML::%s: k=%d, meta_pte_bytes_per_frame_ub_c = %d\n", __func__, k, meta_pte_bytes_per_frame_ub_c[k]);
9578 #endif
9579
9580 if (display_cfg->gpuvm_enable) {
9581 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
9582 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1);
9583
9584 if (BytePerPixelC[k] > 0)
9585 num_group_per_lower_vm_stage += (unsigned int) math_ceil2((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1);
9586 }
9587
9588 if (dcc_mrq_enable) {
9589 if (BytePerPixelC[k] > 0) {
9590 num_group_per_lower_vm_stage += (unsigned int)(2.0 /*for each mpde0 group*/ + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) +
9591 math_ceil2((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1));
9592 } else {
9593 num_group_per_lower_vm_stage += (unsigned int)(1.0 + math_ceil2((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1));
9594 }
9595 }
9596
9597 num_group_per_lower_vm_stage_flip = num_group_per_lower_vm_stage;
9598 num_group_per_lower_vm_stage_pref = num_group_per_lower_vm_stage;
9599
9600 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
9601 num_group_per_lower_vm_stage_pref += (unsigned int) math_ceil2(tdlut_pte_bytes_per_frame[k] / vm_group_bytes[k], 1);
9602 if (display_cfg->gpuvm_max_page_table_levels >= 2)
9603 num_group_per_lower_vm_stage_pref += 1; // tdpe0 group
9604 }
9605
9606 if (display_cfg->gpuvm_max_page_table_levels >= 2) {
9607 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_l[k] / 64;
9608 if (BytePerPixelC[k] > 0)
9609 num_req_per_lower_vm_stage += dpde0_bytes_per_frame_ub_c[k];
9610 }
9611
9612 if (dcc_mrq_enable) {
9613 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_l[k] / 64;
9614 if (BytePerPixelC[k] > 0)
9615 num_req_per_lower_vm_stage += meta_pte_bytes_per_frame_ub_c[k] / 64;
9616 }
9617
9618 num_req_per_lower_vm_stage_flip = num_req_per_lower_vm_stage;
9619 num_req_per_lower_vm_stage_pref = num_req_per_lower_vm_stage;
9620
9621 if (display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut && display_cfg->gpuvm_enable) {
9622 num_req_per_lower_vm_stage_pref += tdlut_pte_bytes_per_frame[k] / 64;
9623 }
9624
9625 line_time = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / pixel_clock_mhz;
9626
9627 if (num_group_per_lower_vm_stage_pref > 0)
9628 TimePerVMGroupVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_group_per_lower_vm_stage_pref;
9629 else
9630 TimePerVMGroupVBlank[k] = 0;
9631
9632 if (num_group_per_lower_vm_stage_flip > 0)
9633 TimePerVMGroupFlip[k] = dst_y_per_vm_flip[k] * line_time / num_group_per_lower_vm_stage_flip;
9634 else
9635 TimePerVMGroupFlip[k] = 0;
9636
9637 if (num_req_per_lower_vm_stage_pref > 0)
9638 TimePerVMRequestVBlank[k] = dst_y_per_vm_vblank[k] * line_time / num_req_per_lower_vm_stage_pref;
9639 else
9640 TimePerVMRequestVBlank[k] = 0.0;
9641 if (num_req_per_lower_vm_stage_flip > 0)
9642 TimePerVMRequestFlip[k] = dst_y_per_vm_flip[k] * line_time / num_req_per_lower_vm_stage_flip;
9643 else
9644 TimePerVMRequestFlip[k] = 0.0;
9645
9646 dml2_printf("DML::%s: k=%u, dst_y_per_vm_vblank = %f\n", __func__, k, dst_y_per_vm_vblank[k]);
9647 dml2_printf("DML::%s: k=%u, dst_y_per_vm_flip = %f\n", __func__, k, dst_y_per_vm_flip[k]);
9648 dml2_printf("DML::%s: k=%u, line_time = %f\n", __func__, k, line_time);
9649 dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_pref = %f\n", __func__, k, num_group_per_lower_vm_stage_pref);
9650 dml2_printf("DML::%s: k=%u, num_group_per_lower_vm_stage_flip = %f\n", __func__, k, num_group_per_lower_vm_stage_flip);
9651 dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_pref = %f\n", __func__, k, num_req_per_lower_vm_stage_pref);
9652 dml2_printf("DML::%s: k=%u, num_req_per_lower_vm_stage_flip = %f\n", __func__, k, num_req_per_lower_vm_stage_flip);
9653
9654 if (display_cfg->gpuvm_max_page_table_levels > 2) {
9655 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2;
9656 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2;
9657 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2;
9658 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2;
9659 }
9660
9661 } else {
9662 TimePerVMGroupVBlank[k] = 0;
9663 TimePerVMGroupFlip[k] = 0;
9664 TimePerVMRequestVBlank[k] = 0;
9665 TimePerVMRequestFlip[k] = 0;
9666 }
9667
9668 #ifdef __DML_VBA_DEBUG__
9669 dml2_printf("DML::%s: k=%u, TimePerVMGroupVBlank = %f\n", __func__, k, TimePerVMGroupVBlank[k]);
9670 dml2_printf("DML::%s: k=%u, TimePerVMGroupFlip = %f\n", __func__, k, TimePerVMGroupFlip[k]);
9671 dml2_printf("DML::%s: k=%u, TimePerVMRequestVBlank = %f\n", __func__, k, TimePerVMRequestVBlank[k]);
9672 dml2_printf("DML::%s: k=%u, TimePerVMRequestFlip = %f\n", __func__, k, TimePerVMRequestFlip[k]);
9673 #endif
9674 }
9675 }
9676
CalculateStutterEfficiency(struct dml2_core_internal_scratch * scratch,struct dml2_core_calcs_CalculateStutterEfficiency_params * p)9677 static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratch,
9678 struct dml2_core_calcs_CalculateStutterEfficiency_params *p)
9679 {
9680 struct dml2_core_calcs_CalculateStutterEfficiency_locals *l = &scratch->CalculateStutterEfficiency_locals;
9681
9682 unsigned int TotalNumberOfActiveOTG = 0;
9683 double SinglePixelClock = 0;
9684 unsigned int SingleHTotal = 0;
9685 unsigned int SingleVTotal = 0;
9686 bool SameTiming = true;
9687 bool FoundCriticalSurface = false;
9688 double LastZ8StutterPeriod = 0;
9689
9690 memset(l, 0, sizeof(struct dml2_core_calcs_CalculateStutterEfficiency_locals));
9691
9692 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9693 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
9694 if (p->display_cfg->plane_descriptors[k].surface.dcc.enable == true) {
9695 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesY[k] > p->SwathHeightY[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesY[k] > p->SwathHeightY[k]) || p->DCCYMaxUncompressedBlock[k] < 256) {
9696 l->MaximumEffectiveCompressionLuma = 2;
9697 } else {
9698 l->MaximumEffectiveCompressionLuma = 4;
9699 }
9700 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0, l->MaximumEffectiveCompressionLuma);
9701 #ifdef __DML_VBA_DEBUG__
9702 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
9703 dml2_printf("DML::%s: k=%u, NetDCCRateLuma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane0);
9704 dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionLuma = %f\n", __func__, k, l->MaximumEffectiveCompressionLuma);
9705 #endif
9706 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0;
9707 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane0 / l->MaximumEffectiveCompressionLuma;
9708
9709 if (p->ReadBandwidthSurfaceChroma[k] > 0) {
9710 if ((dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockWidth256BytesC[k] > p->SwathHeightC[k]) || (!dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle) && p->BlockHeight256BytesC[k] > p->SwathHeightC[k]) || p->DCCCMaxUncompressedBlock[k] < 256) {
9711 l->MaximumEffectiveCompressionChroma = 2;
9712 } else {
9713 l->MaximumEffectiveCompressionChroma = 4;
9714 }
9715 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] / math_min2(p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1, l->MaximumEffectiveCompressionChroma);
9716 #ifdef __DML_VBA_DEBUG__
9717 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, p->ReadBandwidthSurfaceChroma[k]);
9718 dml2_printf("DML::%s: k=%u, NetDCCRateChroma = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].surface.dcc.informative.dcc_rate_plane1);
9719 dml2_printf("DML::%s: k=%u, MaximumEffectiveCompressionChroma = %f\n", __func__, k, l->MaximumEffectiveCompressionChroma);
9720 #endif
9721 l->TotalZeroSizeRequestReadBandwidth = l->TotalZeroSizeRequestReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1;
9722 l->TotalZeroSizeCompressedReadBandwidth = l->TotalZeroSizeCompressedReadBandwidth + p->ReadBandwidthSurfaceChroma[k] * p->display_cfg->plane_descriptors[k].surface.dcc.informative.fraction_of_zero_size_request_plane1 / l->MaximumEffectiveCompressionChroma;
9723 }
9724 } else {
9725 l->TotalCompressedReadBandwidth = l->TotalCompressedReadBandwidth + p->ReadBandwidthSurfaceLuma[k] + p->ReadBandwidthSurfaceChroma[k];
9726 }
9727 l->TotalRowReadBandwidth = l->TotalRowReadBandwidth + p->DPPPerSurface[k] * (p->meta_row_bw[k] + p->dpte_row_bw[k]);
9728 }
9729 }
9730
9731 l->AverageDCCCompressionRate = p->TotalDataReadBandwidth / l->TotalCompressedReadBandwidth;
9732 l->AverageDCCZeroSizeFraction = l->TotalZeroSizeRequestReadBandwidth / p->TotalDataReadBandwidth;
9733
9734 #ifdef __DML_VBA_DEBUG__
9735 dml2_printf("DML::%s: UnboundedRequestEnabled = %u\n", __func__, p->UnboundedRequestEnabled);
9736 dml2_printf("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, l->TotalCompressedReadBandwidth);
9737 dml2_printf("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, l->TotalZeroSizeRequestReadBandwidth);
9738 dml2_printf("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, l->TotalZeroSizeCompressedReadBandwidth);
9739 dml2_printf("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, l->MaximumEffectiveCompressionLuma);
9740 dml2_printf("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, l->MaximumEffectiveCompressionChroma);
9741 dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
9742 dml2_printf("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, l->AverageDCCZeroSizeFraction);
9743
9744 dml2_printf("DML::%s: CompbufReservedSpace64B = %u (%f kbytes)\n", __func__, p->CompbufReservedSpace64B, p->CompbufReservedSpace64B * 64 / 1024.0);
9745 dml2_printf("DML::%s: CompbufReservedSpaceZs = %u\n", __func__, p->CompbufReservedSpaceZs);
9746 dml2_printf("DML::%s: CompressedBufferSizeInkByte = %u kbytes\n", __func__, p->CompressedBufferSizeInkByte);
9747 dml2_printf("DML::%s: ROBBufferSizeInKByte = %u kbytes\n", __func__, p->ROBBufferSizeInKByte);
9748 #endif
9749 if (l->AverageDCCZeroSizeFraction == 1) {
9750 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
9751 l->EffectiveCompressedBufferSize = (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageZeroSizeCompressionRate + ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 * l->AverageZeroSizeCompressionRate;
9752
9753
9754 } else if (l->AverageDCCZeroSizeFraction > 0) {
9755 l->AverageZeroSizeCompressionRate = l->TotalZeroSizeRequestReadBandwidth / l->TotalZeroSizeCompressedReadBandwidth;
9756 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
9757 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate)) +
9758 (p->rob_alloc_compressed ? math_min2(((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * l->AverageDCCCompressionRate,
9759 ((double)p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate))
9760 : ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
9761
9762
9763 #ifdef __DML_VBA_DEBUG__
9764 dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
9765 dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate + 1 / l->AverageDCCCompressionRate));
9766 dml2_printf("DML::%s: min 3 = %d\n", __func__, (p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64));
9767 dml2_printf("DML::%s: min 4 = %f\n", __func__, (p->ZeroSizeBufferEntries - p->CompbufReservedSpaceZs) * 64 / (l->AverageDCCZeroSizeFraction / l->AverageZeroSizeCompressionRate));
9768 #endif
9769 } else {
9770 l->EffectiveCompressedBufferSize = math_min2((double)p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate,
9771 (double)p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate) +
9772 ((double)p->ROBBufferSizeInKByte * 1024 - p->CompbufReservedSpace64B * 64) * (p->rob_alloc_compressed ? l->AverageDCCCompressionRate : 1.0);
9773
9774 #ifdef __DML_VBA_DEBUG__
9775 dml2_printf("DML::%s: min 1 = %f\n", __func__, p->CompressedBufferSizeInkByte * 1024 * l->AverageDCCCompressionRate);
9776 dml2_printf("DML::%s: min 2 = %f\n", __func__, p->MetaFIFOSizeInKEntries * 1024 * 64 * l->AverageDCCCompressionRate);
9777 #endif
9778 }
9779
9780 #ifdef __DML_VBA_DEBUG__
9781 dml2_printf("DML::%s: MetaFIFOSizeInKEntries = %u\n", __func__, p->MetaFIFOSizeInKEntries);
9782 dml2_printf("DML::%s: ZeroSizeBufferEntries = %u\n", __func__, p->ZeroSizeBufferEntries);
9783 dml2_printf("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, l->AverageZeroSizeCompressionRate);
9784 dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
9785 #endif
9786
9787 *p->StutterPeriod = 0;
9788
9789 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9790 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
9791 l->LinesInDETY = ((double)p->DETBufferSizeY[k] + (p->UnboundedRequestEnabled == true ? l->EffectiveCompressedBufferSize : 0) * p->ReadBandwidthSurfaceLuma[k] / p->TotalDataReadBandwidth) / p->BytePerPixelDETY[k] / p->SwathWidthY[k];
9792 l->LinesInDETYRoundedDownToSwath = math_floor2(l->LinesInDETY, p->SwathHeightY[k]);
9793 l->DETBufferingTimeY = l->LinesInDETYRoundedDownToSwath * ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) / p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
9794 #ifdef __DML_VBA_DEBUG__
9795 dml2_printf("DML::%s: k=%u, DETBufferSizeY = %u (%u kbytes)\n", __func__, k, p->DETBufferSizeY[k], p->DETBufferSizeY[k] / 1024);
9796 dml2_printf("DML::%s: k=%u, BytePerPixelDETY = %f\n", __func__, k, p->BytePerPixelDETY[k]);
9797 dml2_printf("DML::%s: k=%u, SwathWidthY = %u\n", __func__, k, p->SwathWidthY[k]);
9798 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, p->ReadBandwidthSurfaceLuma[k]);
9799 dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, p->TotalDataReadBandwidth);
9800 dml2_printf("DML::%s: k=%u, LinesInDETY = %f\n", __func__, k, l->LinesInDETY);
9801 dml2_printf("DML::%s: k=%u, LinesInDETYRoundedDownToSwath = %f\n", __func__, k, l->LinesInDETYRoundedDownToSwath);
9802 dml2_printf("DML::%s: k=%u, VRatio = %f\n", __func__, k, p->display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
9803 dml2_printf("DML::%s: k=%u, DETBufferingTimeY = %f\n", __func__, k, l->DETBufferingTimeY);
9804 #endif
9805
9806 if (!FoundCriticalSurface || l->DETBufferingTimeY < *p->StutterPeriod) {
9807 bool isInterlaceTiming = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !p->ProgressiveToInterlaceUnitInOPP;
9808
9809 FoundCriticalSurface = true;
9810 *p->StutterPeriod = l->DETBufferingTimeY;
9811 l->FrameTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9812 l->VActiveTimeCriticalSurface = (isInterlaceTiming ? math_floor2((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active / 2.0, 1.0) : p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_active) * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9813 l->BytePerPixelYCriticalSurface = p->BytePerPixelY[k];
9814 l->SwathWidthYCriticalSurface = p->SwathWidthY[k];
9815 l->SwathHeightYCriticalSurface = p->SwathHeightY[k];
9816 l->BlockWidth256BytesYCriticalSurface = p->BlockWidth256BytesY[k];
9817 l->DETBufferSizeYCriticalSurface = p->DETBufferSizeY[k];
9818 l->MinTTUVBlankCriticalSurface = p->MinTTUVBlank[k];
9819 l->SinglePlaneCriticalSurface = (p->ReadBandwidthSurfaceChroma[k] == 0);
9820 l->SinglePipeCriticalSurface = (p->DPPPerSurface[k] == 1);
9821
9822 #ifdef __DML_VBA_DEBUG__
9823 dml2_printf("DML::%s: k=%u, FoundCriticalSurface = %u\n", __func__, k, FoundCriticalSurface);
9824 dml2_printf("DML::%s: k=%u, StutterPeriod = %f\n", __func__, k, *p->StutterPeriod);
9825 dml2_printf("DML::%s: k=%u, MinTTUVBlankCriticalSurface = %f\n", __func__, k, l->MinTTUVBlankCriticalSurface);
9826 dml2_printf("DML::%s: k=%u, FrameTimeCriticalSurface= %f\n", __func__, k, l->FrameTimeCriticalSurface);
9827 dml2_printf("DML::%s: k=%u, VActiveTimeCriticalSurface = %f\n", __func__, k, l->VActiveTimeCriticalSurface);
9828 dml2_printf("DML::%s: k=%u, BytePerPixelYCriticalSurface = %u\n", __func__, k, l->BytePerPixelYCriticalSurface);
9829 dml2_printf("DML::%s: k=%u, SwathWidthYCriticalSurface = %f\n", __func__, k, l->SwathWidthYCriticalSurface);
9830 dml2_printf("DML::%s: k=%u, SwathHeightYCriticalSurface = %f\n", __func__, k, l->SwathHeightYCriticalSurface);
9831 dml2_printf("DML::%s: k=%u, BlockWidth256BytesYCriticalSurface = %u\n", __func__, k, l->BlockWidth256BytesYCriticalSurface);
9832 dml2_printf("DML::%s: k=%u, SinglePlaneCriticalSurface = %u\n", __func__, k, l->SinglePlaneCriticalSurface);
9833 dml2_printf("DML::%s: k=%u, SinglePipeCriticalSurface = %u\n", __func__, k, l->SinglePipeCriticalSurface);
9834 #endif
9835 }
9836 }
9837 }
9838
9839 // for bounded req, the stutter period is calculated only based on DET size, but during burst there can be some return inside ROB/compressed buffer
9840 // stutter period is calculated only on the det sizing
9841 // if (cdb + rob >= det) the stutter burst will be absorbed by the cdb + rob which is before decompress
9842 // else
9843 // the cdb + rob part will be in compressed rate with urg bw (idea bw)
9844 // the det part will be return at uncompressed rate with 64B/dcfclk
9845 //
9846 // for unbounded req, the stutter period should be calculated as total of CDB+ROB+DET, so the term "PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer"
9847 // should be == EffectiveCompressedBufferSize which will returned a compressed rate, the rest of stutter period is from the DET will be returned at uncompressed rate with 64B/dcfclk
9848
9849 l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = math_min2(*p->StutterPeriod * p->TotalDataReadBandwidth, l->EffectiveCompressedBufferSize);
9850 #ifdef __DML_VBA_DEBUG__
9851 dml2_printf("DML::%s: AverageDCCCompressionRate = %f\n", __func__, l->AverageDCCCompressionRate);
9852 dml2_printf("DML::%s: StutterPeriod*TotalDataReadBandwidth = %f (%f kbytes)\n", __func__, *p->StutterPeriod * p->TotalDataReadBandwidth, (*p->StutterPeriod * p->TotalDataReadBandwidth) / 1024.0);
9853 dml2_printf("DML::%s: EffectiveCompressedBufferSize = %f (%f kbytes)\n", __func__, l->EffectiveCompressedBufferSize, l->EffectiveCompressedBufferSize / 1024.0);
9854 dml2_printf("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f (%f kbytes)\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / 1024);
9855 dml2_printf("DML::%s: ReturnBW = %f\n", __func__, p->ReturnBW);
9856 dml2_printf("DML::%s: TotalDataReadBandwidth = %f\n", __func__, p->TotalDataReadBandwidth);
9857 dml2_printf("DML::%s: TotalRowReadBandwidth = %f\n", __func__, l->TotalRowReadBandwidth);
9858 dml2_printf("DML::%s: DCFCLK = %f\n", __func__, p->DCFCLK);
9859 #endif
9860
9861 l->StutterBurstTime = l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer
9862 / (p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
9863 (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer)
9864 / math_min2(p->DCFCLK * 64, p->ReturnBW * (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate)) +
9865 *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW;
9866 #ifdef __DML_VBA_DEBUG__
9867 dml2_printf("DML::%s: Part 1 = %f\n", __func__, l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / p->ReturnBW / (p->hw_debug5 ? 1 : l->AverageDCCCompressionRate));
9868 dml2_printf("DML::%s: Part 2 = %f\n", __func__, (*p->StutterPeriod * p->TotalDataReadBandwidth - l->PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (p->DCFCLK * 64));
9869 dml2_printf("DML::%s: Part 3 = %f\n", __func__, *p->StutterPeriod * l->TotalRowReadBandwidth / p->ReturnBW);
9870 dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
9871 #endif
9872 l->TotalActiveWriteback = 0;
9873 memset(l->stream_visited, 0, DML2_MAX_PLANES * sizeof(bool));
9874
9875 for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) {
9876 if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) {
9877 if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) {
9878
9879 if (p->display_cfg->stream_descriptors[k].writeback.enable)
9880 l->TotalActiveWriteback = l->TotalActiveWriteback + 1;
9881
9882 if (TotalNumberOfActiveOTG == 0) { // first otg
9883 SinglePixelClock = ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
9884 SingleHTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total;
9885 SingleVTotal = p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total;
9886 } else if (SinglePixelClock != ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) ||
9887 SingleHTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.h_total ||
9888 SingleVTotal != p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].timing.v_total) {
9889 SameTiming = false;
9890 }
9891 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1;
9892 l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index] = 1;
9893 }
9894 }
9895 }
9896
9897 if (l->TotalActiveWriteback == 0) {
9898 #ifdef __DML_VBA_DEBUG__
9899 dml2_printf("DML::%s: SRExitTime = %f\n", __func__, p->SRExitTime);
9900 dml2_printf("DML::%s: SRExitZ8Time = %f\n", __func__, p->SRExitZ8Time);
9901 dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
9902 #endif
9903 *p->StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitTime + l->StutterBurstTime) / *p->StutterPeriod) * 100;
9904 *p->Z8StutterEfficiencyNotIncludingVBlank = math_max2(0., 1 - (p->SRExitZ8Time + l->StutterBurstTime) / *p->StutterPeriod) * 100;
9905 *p->NumberOfStutterBurstsPerFrame = (*p->StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
9906 *p->Z8NumberOfStutterBurstsPerFrame = (*p->Z8StutterEfficiencyNotIncludingVBlank > 0 ? (unsigned int)(math_ceil2(l->VActiveTimeCriticalSurface / *p->StutterPeriod, 1)) : 0);
9907 } else {
9908 *p->StutterEfficiencyNotIncludingVBlank = 0.;
9909 *p->Z8StutterEfficiencyNotIncludingVBlank = 0.;
9910 *p->NumberOfStutterBurstsPerFrame = 0;
9911 *p->Z8NumberOfStutterBurstsPerFrame = 0;
9912 }
9913 #ifdef __DML_VBA_DEBUG__
9914 dml2_printf("DML::%s: VActiveTimeCriticalSurface = %f\n", __func__, l->VActiveTimeCriticalSurface);
9915 dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
9916 dml2_printf("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->Z8StutterEfficiencyNotIncludingVBlank);
9917 dml2_printf("DML::%s: NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->NumberOfStutterBurstsPerFrame);
9918 dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
9919 #endif
9920
9921 if (*p->StutterEfficiencyNotIncludingVBlank > 0) {
9922 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
9923 *p->StutterEfficiency = *p->StutterEfficiencyNotIncludingVBlank;
9924 } else {
9925 *p->StutterEfficiency = (1 - (*p->NumberOfStutterBurstsPerFrame * p->SRExitTime + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
9926 }
9927 } else {
9928 *p->StutterEfficiency = 0;
9929 *p->NumberOfStutterBurstsPerFrame = 0;
9930 }
9931
9932 if (*p->Z8StutterEfficiencyNotIncludingVBlank > 0) {
9933 LastZ8StutterPeriod = l->VActiveTimeCriticalSurface - (*p->Z8NumberOfStutterBurstsPerFrame - 1) * *p->StutterPeriod;
9934 if (!((p->SynchronizeTimings || TotalNumberOfActiveOTG == 1) && SameTiming)) {
9935 *p->Z8StutterEfficiency = *p->Z8StutterEfficiencyNotIncludingVBlank;
9936 } else {
9937 *p->Z8StutterEfficiency = (1 - (*p->Z8NumberOfStutterBurstsPerFrame * p->SRExitZ8Time + l->StutterBurstTime * l->VActiveTimeCriticalSurface / *p->StutterPeriod) / l->FrameTimeCriticalSurface) * 100;
9938 }
9939 } else {
9940 *p->Z8StutterEfficiency = 0.;
9941 *p->Z8NumberOfStutterBurstsPerFrame = 0;
9942 }
9943
9944 #ifdef __DML_VBA_DEBUG__
9945 dml2_printf("DML::%s: TotalNumberOfActiveOTG = %u\n", __func__, TotalNumberOfActiveOTG);
9946 dml2_printf("DML::%s: SameTiming = %u\n", __func__, SameTiming);
9947 dml2_printf("DML::%s: SynchronizeTimings = %u\n", __func__, p->SynchronizeTimings);
9948 dml2_printf("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod);
9949 dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Z8StutterEnterPlusExitWatermark);
9950 dml2_printf("DML::%s: StutterBurstTime = %f\n", __func__, l->StutterBurstTime);
9951 dml2_printf("DML::%s: StutterPeriod = %f\n", __func__, *p->StutterPeriod);
9952 dml2_printf("DML::%s: StutterEfficiency = %f\n", __func__, *p->StutterEfficiency);
9953 dml2_printf("DML::%s: Z8StutterEfficiency = %f\n", __func__, *p->Z8StutterEfficiency);
9954 dml2_printf("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *p->StutterEfficiencyNotIncludingVBlank);
9955 dml2_printf("DML::%s: Z8NumberOfStutterBurstsPerFrame = %u\n", __func__, *p->Z8NumberOfStutterBurstsPerFrame);
9956 #endif
9957
9958 *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = !(!p->UnboundedRequestEnabled && (p->NumberOfActiveSurfaces == 1) && l->SinglePlaneCriticalSurface && l->SinglePipeCriticalSurface);
9959
9960 #ifdef __DML_VBA_DEBUG__
9961 dml2_printf("DML::%s: DETBufferSizeYCriticalSurface = %u\n", __func__, l->DETBufferSizeYCriticalSurface);
9962 dml2_printf("DML::%s: PixelChunkSizeInKByte = %u\n", __func__, p->PixelChunkSizeInKByte);
9963 dml2_printf("DML::%s: DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = %u\n", __func__, *p->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE);
9964 #endif
9965 }
9966
dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex * in_out_params)9967 static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex *in_out_params)
9968 {
9969 const struct dml2_display_cfg *display_cfg = in_out_params->in_display_cfg;
9970 const struct dml2_mcg_min_clock_table *min_clk_table = in_out_params->min_clk_table;
9971 const struct core_display_cfg_support_info *cfg_support_info = in_out_params->cfg_support_info;
9972 struct dml2_core_internal_display_mode_lib *mode_lib = in_out_params->mode_lib;
9973 struct dml2_display_cfg_programming *programming = in_out_params->programming;
9974
9975 struct dml2_core_calcs_mode_programming_locals *s = &mode_lib->scratch.dml_core_mode_programming_locals;
9976 struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params *CalculateWatermarks_params = &mode_lib->scratch.CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params;
9977 struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params;
9978 struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params;
9979 struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params;
9980 struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params;
9981 struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params;
9982 struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params;
9983 struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params;
9984 struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params;
9985 struct dml2_core_calcs_calculate_bytes_to_fetch_required_to_hide_latency_params *calculate_bytes_to_fetch_required_to_hide_latency_params = &mode_lib->scratch.calculate_bytes_to_fetch_required_to_hide_latency_params;
9986
9987 unsigned int k;
9988 bool must_support_iflip;
9989 const long min_return_uclk_cycles = 83;
9990 const long min_return_fclk_cycles = 75;
9991 const double max_fclk_mhz = min_clk_table->max_clocks_khz.fclk / 1000.0;
9992 double hard_minimum_dcfclk_mhz = (double)min_clk_table->dram_bw_table.entries[0].min_dcfclk_khz / 1000.0;
9993 double max_uclk_mhz = 0;
9994 double min_return_latency_in_DCFCLK_cycles = 0;
9995
9996 dml2_printf("DML::%s: --- START --- \n", __func__);
9997
9998 memset(&mode_lib->scratch, 0, sizeof(struct dml2_core_internal_scratch));
9999 memset(&mode_lib->mp, 0, sizeof(struct dml2_core_internal_mode_program));
10000
10001 s->num_active_planes = display_cfg->num_planes;
10002 get_stream_output_bpp(s->OutputBpp, display_cfg);
10003
10004 mode_lib->mp.num_active_pipes = dml_get_num_active_pipes(display_cfg->num_planes, cfg_support_info);
10005 dml_calc_pipe_plane_mapping(cfg_support_info, mode_lib->mp.pipe_plane);
10006
10007 mode_lib->mp.Dcfclk = programming->min_clocks.dcn4x.active.dcfclk_khz / 1000.0;
10008 mode_lib->mp.FabricClock = programming->min_clocks.dcn4x.active.fclk_khz / 1000.0;
10009 mode_lib->mp.dram_bw_mbps = uclk_khz_to_dram_bw_mbps(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table.dram_config);
10010 mode_lib->mp.uclk_freq_mhz = programming->min_clocks.dcn4x.active.uclk_khz / 1000.0;
10011 mode_lib->mp.GlobalDPPCLK = programming->min_clocks.dcn4x.dpprefclk_khz / 1000.0;
10012 s->SOCCLK = (double)programming->min_clocks.dcn4x.socclk_khz / 1000;
10013 mode_lib->mp.qos_param_index = get_qos_param_index(programming->min_clocks.dcn4x.active.uclk_khz, mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params);
10014 mode_lib->mp.active_min_uclk_dpm_index = get_active_min_uclk_dpm_index(programming->min_clocks.dcn4x.active.uclk_khz, &mode_lib->soc.clk_table);
10015
10016 for (k = 0; k < s->num_active_planes; ++k) {
10017 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10018 dml2_assert(cfg_support_info->stream_support_info[stream_index].odms_used <= 4);
10019 dml2_assert(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4 ||
10020 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2 ||
10021 cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10022
10023 if (cfg_support_info->stream_support_info[stream_index].odms_used > 1)
10024 dml2_assert(cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 1);
10025
10026 switch (cfg_support_info->stream_support_info[stream_index].odms_used) {
10027 case (4):
10028 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_4to1;
10029 break;
10030 case (3):
10031 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_3to1;
10032 break;
10033 case (2):
10034 mode_lib->mp.ODMMode[k] = dml2_odm_mode_combine_2to1;
10035 break;
10036 default:
10037 if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 4)
10038 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to4;
10039 else if (cfg_support_info->stream_support_info[stream_index].num_odm_output_segments == 2)
10040 mode_lib->mp.ODMMode[k] = dml2_odm_mode_mso_1to2;
10041 else
10042 mode_lib->mp.ODMMode[k] = dml2_odm_mode_bypass;
10043 break;
10044 }
10045 }
10046
10047 for (k = 0; k < s->num_active_planes; ++k) {
10048 mode_lib->mp.NoOfDPP[k] = cfg_support_info->plane_support_info[k].dpps_used;
10049 mode_lib->mp.Dppclk[k] = programming->plane_programming[k].min_clocks.dcn4x.dppclk_khz / 1000.0;
10050 dml2_assert(mode_lib->mp.Dppclk[k] > 0);
10051 }
10052
10053 for (k = 0; k < s->num_active_planes; ++k) {
10054 unsigned int stream_index = display_cfg->plane_descriptors[k].stream_index;
10055 mode_lib->mp.DSCCLK[k] = programming->stream_programming[stream_index].min_clocks.dcn4x.dscclk_khz / 1000.0;
10056 dml2_printf("DML::%s: k=%d stream_index=%d, mode_lib->mp.DSCCLK = %f\n", __func__, k, stream_index, mode_lib->mp.DSCCLK[k]);
10057 }
10058
10059 mode_lib->mp.Dispclk = programming->min_clocks.dcn4x.dispclk_khz / 1000.0;
10060 mode_lib->mp.DCFCLKDeepSleep = programming->min_clocks.dcn4x.deepsleep_dcfclk_khz / 1000.0;
10061
10062 dml2_assert(mode_lib->mp.Dcfclk > 0);
10063 dml2_assert(mode_lib->mp.FabricClock > 0);
10064 dml2_assert(mode_lib->mp.dram_bw_mbps > 0);
10065 dml2_assert(mode_lib->mp.uclk_freq_mhz > 0);
10066 dml2_assert(mode_lib->mp.GlobalDPPCLK > 0);
10067 dml2_assert(mode_lib->mp.Dispclk > 0);
10068 dml2_assert(mode_lib->mp.DCFCLKDeepSleep > 0);
10069 dml2_assert(s->SOCCLK > 0);
10070
10071 #ifdef __DML_VBA_DEBUG__
10072 // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, s->num_active_planes);
10073 // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, s->num_active_planes);
10074 // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, s->num_active_planes);
10075 // dml2_printf_dml_display_cfg_output(&display_cfg->output, s->num_active_planes);
10076 // dml2_printf_dml_display_cfg_hw_resource(&display_cfg->hw, s->num_active_planes);
10077
10078 dml2_printf("DML::%s: num_active_planes = %u\n", __func__, s->num_active_planes);
10079 dml2_printf("DML::%s: num_active_pipes = %u\n", __func__, mode_lib->mp.num_active_pipes);
10080 dml2_printf("DML::%s: Dcfclk = %f\n", __func__, mode_lib->mp.Dcfclk);
10081 dml2_printf("DML::%s: FabricClock = %f\n", __func__, mode_lib->mp.FabricClock);
10082 dml2_printf("DML::%s: dram_bw_mbps = %f\n", __func__, mode_lib->mp.dram_bw_mbps);
10083 dml2_printf("DML::%s: uclk_freq_mhz = %f\n", __func__, mode_lib->mp.uclk_freq_mhz);
10084 dml2_printf("DML::%s: Dispclk = %f\n", __func__, mode_lib->mp.Dispclk);
10085 for (k = 0; k < s->num_active_planes; ++k) {
10086 dml2_printf("DML::%s: Dppclk[%0d] = %f\n", __func__, k, mode_lib->mp.Dppclk[k]);
10087 }
10088 dml2_printf("DML::%s: GlobalDPPCLK = %f\n", __func__, mode_lib->mp.GlobalDPPCLK);
10089 dml2_printf("DML::%s: DCFCLKDeepSleep = %f\n", __func__, mode_lib->mp.DCFCLKDeepSleep);
10090 dml2_printf("DML::%s: SOCCLK = %f\n", __func__, s->SOCCLK);
10091 dml2_printf("DML::%s: min_clk_index = %0d\n", __func__, in_out_params->min_clk_index);
10092 dml2_printf("DML::%s: min_clk_table min_fclk_khz = %d\n", __func__, min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].min_fclk_khz);
10093 dml2_printf("DML::%s: min_clk_table uclk_mhz = %f\n", __func__, dram_bw_kbps_to_uclk_mhz(min_clk_table->dram_bw_table.entries[in_out_params->min_clk_index].pre_derate_dram_bw_kbps, &mode_lib->soc.clk_table.dram_config));
10094 for (k = 0; k < mode_lib->mp.num_active_pipes; ++k) {
10095 dml2_printf("DML::%s: pipe=%d is in plane=%d\n", __func__, k, mode_lib->mp.pipe_plane[k]);
10096 dml2_printf("DML::%s: Per-plane DPPPerSurface[%0d] = %d\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
10097 }
10098
10099 for (k = 0; k < s->num_active_planes; k++)
10100 dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns);
10101 #endif
10102
10103 CalculateMaxDETAndMinCompressedBufferSize(
10104 mode_lib->ip.config_return_buffer_size_in_kbytes,
10105 mode_lib->ip.config_return_buffer_segment_size_in_kbytes,
10106 mode_lib->ip.rob_buffer_size_kbytes,
10107 mode_lib->ip.max_num_dpp,
10108 display_cfg->overrides.hw.force_nom_det_size_kbytes.enable,
10109 display_cfg->overrides.hw.force_nom_det_size_kbytes.value,
10110 mode_lib->ip.dcn_mrq_present,
10111
10112 /* Output */
10113 &s->MaxTotalDETInKByte,
10114 &s->NomDETInKByte,
10115 &s->MinCompressedBufferSizeInKByte);
10116
10117
10118 PixelClockAdjustmentForProgressiveToInterlaceUnit(display_cfg, mode_lib->ip.ptoi_supported, s->PixelClockBackEnd);
10119
10120 for (k = 0; k < s->num_active_planes; ++k) {
10121 CalculateSinglePipeDPPCLKAndSCLThroughput(
10122 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio,
10123 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio,
10124 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
10125 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
10126 mode_lib->ip.max_dchub_pscl_bw_pix_per_clk,
10127 mode_lib->ip.max_pscl_lb_bw_pix_per_clk,
10128 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10129 display_cfg->plane_descriptors[k].pixel_format,
10130 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_taps,
10131 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps,
10132 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps,
10133 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_taps,
10134
10135 /* Output */
10136 &mode_lib->mp.PSCL_THROUGHPUT[k],
10137 &mode_lib->mp.PSCL_THROUGHPUT_CHROMA[k],
10138 &mode_lib->mp.DPPCLKUsingSingleDPP[k]);
10139 }
10140
10141 for (k = 0; k < s->num_active_planes; ++k) {
10142 CalculateBytePerPixelAndBlockSizes(
10143 display_cfg->plane_descriptors[k].pixel_format,
10144 display_cfg->plane_descriptors[k].surface.tiling,
10145 display_cfg->plane_descriptors[k].surface.plane0.pitch,
10146 display_cfg->plane_descriptors[k].surface.plane1.pitch,
10147
10148 // Output
10149 &mode_lib->mp.BytePerPixelY[k],
10150 &mode_lib->mp.BytePerPixelC[k],
10151 &mode_lib->mp.BytePerPixelInDETY[k],
10152 &mode_lib->mp.BytePerPixelInDETC[k],
10153 &mode_lib->mp.Read256BlockHeightY[k],
10154 &mode_lib->mp.Read256BlockHeightC[k],
10155 &mode_lib->mp.Read256BlockWidthY[k],
10156 &mode_lib->mp.Read256BlockWidthC[k],
10157 &mode_lib->mp.MacroTileHeightY[k],
10158 &mode_lib->mp.MacroTileHeightC[k],
10159 &mode_lib->mp.MacroTileWidthY[k],
10160 &mode_lib->mp.MacroTileWidthC[k],
10161 &mode_lib->mp.surf_linear128_l[k],
10162 &mode_lib->mp.surf_linear128_c[k]);
10163 }
10164
10165 CalculateSwathWidth(
10166 display_cfg,
10167 false, // ForceSingleDPP
10168 s->num_active_planes,
10169 mode_lib->mp.ODMMode,
10170 mode_lib->mp.BytePerPixelY,
10171 mode_lib->mp.BytePerPixelC,
10172 mode_lib->mp.Read256BlockHeightY,
10173 mode_lib->mp.Read256BlockHeightC,
10174 mode_lib->mp.Read256BlockWidthY,
10175 mode_lib->mp.Read256BlockWidthC,
10176 mode_lib->mp.surf_linear128_l,
10177 mode_lib->mp.surf_linear128_c,
10178 mode_lib->mp.NoOfDPP,
10179
10180 /* Output */
10181 mode_lib->mp.req_per_swath_ub_l,
10182 mode_lib->mp.req_per_swath_ub_c,
10183 mode_lib->mp.SwathWidthSingleDPPY,
10184 mode_lib->mp.SwathWidthSingleDPPC,
10185 mode_lib->mp.SwathWidthY,
10186 mode_lib->mp.SwathWidthC,
10187 s->dummy_integer_array[0], // unsigned int MaximumSwathHeightY[]
10188 s->dummy_integer_array[1], // unsigned int MaximumSwathHeightC[]
10189 mode_lib->mp.swath_width_luma_ub,
10190 mode_lib->mp.swath_width_chroma_ub);
10191
10192 for (k = 0; k < s->num_active_planes; ++k) {
10193 mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 /
10194 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000));
10195 mode_lib->mp.SurfaceReadBandwidthLuma[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10196 mode_lib->mp.SurfaceReadBandwidthChroma[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10197 dml2_printf("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]);
10198 dml2_printf("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]);
10199 }
10200
10201 CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg;
10202 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSizeInKByte = mode_lib->ip.config_return_buffer_size_in_kbytes;
10203 CalculateSwathAndDETConfiguration_params->MaxTotalDETInKByte = s->MaxTotalDETInKByte;
10204 CalculateSwathAndDETConfiguration_params->MinCompressedBufferSizeInKByte = s->MinCompressedBufferSizeInKByte;
10205 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10206 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10207 CalculateSwathAndDETConfiguration_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes;
10208 CalculateSwathAndDETConfiguration_params->pixel_chunk_size_kbytes = mode_lib->ip.pixel_chunk_size_kbytes;
10209 CalculateSwathAndDETConfiguration_params->ForceSingleDPP = false;
10210 CalculateSwathAndDETConfiguration_params->NumberOfActiveSurfaces = s->num_active_planes;
10211 CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte;
10212 CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
10213 CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
10214 CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.SurfaceReadBandwidthLuma;
10215 CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.SurfaceReadBandwidthChroma;
10216 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0];
10217 CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1];
10218 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY;
10219 CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightC = mode_lib->mp.Read256BlockHeightC;
10220 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthY = mode_lib->mp.Read256BlockWidthY;
10221 CalculateSwathAndDETConfiguration_params->Read256BytesBlockWidthC = mode_lib->mp.Read256BlockWidthC;
10222 CalculateSwathAndDETConfiguration_params->surf_linear128_l = mode_lib->mp.surf_linear128_l;
10223 CalculateSwathAndDETConfiguration_params->surf_linear128_c = mode_lib->mp.surf_linear128_c;
10224 CalculateSwathAndDETConfiguration_params->ODMMode = mode_lib->mp.ODMMode;
10225 CalculateSwathAndDETConfiguration_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
10226 CalculateSwathAndDETConfiguration_params->BytePerPixY = mode_lib->mp.BytePerPixelY;
10227 CalculateSwathAndDETConfiguration_params->BytePerPixC = mode_lib->mp.BytePerPixelC;
10228 CalculateSwathAndDETConfiguration_params->BytePerPixDETY = mode_lib->mp.BytePerPixelInDETY;
10229 CalculateSwathAndDETConfiguration_params->BytePerPixDETC = mode_lib->mp.BytePerPixelInDETC;
10230 CalculateSwathAndDETConfiguration_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10231
10232 // output
10233 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_l = mode_lib->mp.req_per_swath_ub_l;
10234 CalculateSwathAndDETConfiguration_params->req_per_swath_ub_c = mode_lib->mp.req_per_swath_ub_c;
10235 CalculateSwathAndDETConfiguration_params->swath_width_luma_ub = s->dummy_long_array[0];
10236 CalculateSwathAndDETConfiguration_params->swath_width_chroma_ub = s->dummy_long_array[1];
10237 CalculateSwathAndDETConfiguration_params->SwathWidth = s->dummy_long_array[2];
10238 CalculateSwathAndDETConfiguration_params->SwathWidthChroma = s->dummy_long_array[3];
10239 CalculateSwathAndDETConfiguration_params->SwathHeightY = mode_lib->mp.SwathHeightY;
10240 CalculateSwathAndDETConfiguration_params->SwathHeightC = mode_lib->mp.SwathHeightC;
10241 CalculateSwathAndDETConfiguration_params->request_size_bytes_luma = mode_lib->mp.request_size_bytes_luma;
10242 CalculateSwathAndDETConfiguration_params->request_size_bytes_chroma = mode_lib->mp.request_size_bytes_chroma;
10243 CalculateSwathAndDETConfiguration_params->DETBufferSizeInKByte = mode_lib->mp.DETBufferSizeInKByte;
10244 CalculateSwathAndDETConfiguration_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
10245 CalculateSwathAndDETConfiguration_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
10246 CalculateSwathAndDETConfiguration_params->full_swath_bytes_l = s->full_swath_bytes_l;
10247 CalculateSwathAndDETConfiguration_params->full_swath_bytes_c = s->full_swath_bytes_c;
10248 CalculateSwathAndDETConfiguration_params->UnboundedRequestEnabled = &mode_lib->mp.UnboundedRequestEnabled;
10249 CalculateSwathAndDETConfiguration_params->compbuf_reserved_space_64b = &mode_lib->mp.compbuf_reserved_space_64b;
10250 CalculateSwathAndDETConfiguration_params->hw_debug5 = &mode_lib->mp.hw_debug5;
10251 CalculateSwathAndDETConfiguration_params->CompressedBufferSizeInkByte = &mode_lib->mp.CompressedBufferSizeInkByte;
10252 CalculateSwathAndDETConfiguration_params->ViewportSizeSupportPerSurface = &s->dummy_boolean_array[0][0];
10253 CalculateSwathAndDETConfiguration_params->ViewportSizeSupport = &s->dummy_boolean[0];
10254
10255 // Calculate DET size, swath height here.
10256 CalculateSwathAndDETConfiguration(&mode_lib->scratch, CalculateSwathAndDETConfiguration_params);
10257
10258 // DSC Delay
10259 for (k = 0; k < s->num_active_planes; ++k) {
10260 mode_lib->mp.DSCDelay[k] = DSCDelayRequirement(cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].dsc_enable,
10261 mode_lib->mp.ODMMode[k],
10262 mode_lib->ip.maximum_dsc_bits_per_component,
10263 s->OutputBpp[k],
10264 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active,
10265 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total,
10266 cfg_support_info->stream_support_info[display_cfg->plane_descriptors[k].stream_index].num_dsc_slices,
10267 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format,
10268 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_encoder,
10269 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
10270 s->PixelClockBackEnd[k]);
10271 }
10272
10273 // Prefetch
10274 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0) {
10275 for (k = 0; k < s->num_active_planes; ++k)
10276 mode_lib->mp.SurfaceSizeInTheMALL[k] = 0;
10277 } else {
10278 CalculateSurfaceSizeInMall(
10279 display_cfg,
10280 s->num_active_planes,
10281 mode_lib->soc.mall_allocated_for_dcn_mbytes,
10282 mode_lib->mp.BytePerPixelY,
10283 mode_lib->mp.BytePerPixelC,
10284 mode_lib->mp.Read256BlockWidthY,
10285 mode_lib->mp.Read256BlockWidthC,
10286 mode_lib->mp.Read256BlockHeightY,
10287 mode_lib->mp.Read256BlockHeightC,
10288 mode_lib->mp.MacroTileWidthY,
10289 mode_lib->mp.MacroTileWidthC,
10290 mode_lib->mp.MacroTileHeightY,
10291 mode_lib->mp.MacroTileHeightC,
10292
10293 /* Output */
10294 mode_lib->mp.SurfaceSizeInTheMALL,
10295 &s->dummy_boolean[0]); /* bool *ExceededMALLSize */
10296 }
10297
10298 for (k = 0; k < s->num_active_planes; ++k) {
10299 s->SurfaceParameters[k].PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10300 s->SurfaceParameters[k].DPPPerSurface = mode_lib->mp.NoOfDPP[k];
10301 s->SurfaceParameters[k].RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
10302 s->SurfaceParameters[k].ViewportHeight = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10303 s->SurfaceParameters[k].ViewportHeightC = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10304 s->SurfaceParameters[k].BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
10305 s->SurfaceParameters[k].BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
10306 s->SurfaceParameters[k].BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
10307 s->SurfaceParameters[k].BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
10308 s->SurfaceParameters[k].BlockWidthY = mode_lib->mp.MacroTileWidthY[k];
10309 s->SurfaceParameters[k].BlockHeightY = mode_lib->mp.MacroTileHeightY[k];
10310 s->SurfaceParameters[k].BlockWidthC = mode_lib->mp.MacroTileWidthC[k];
10311 s->SurfaceParameters[k].BlockHeightC = mode_lib->mp.MacroTileHeightC[k];
10312 s->SurfaceParameters[k].InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
10313 s->SurfaceParameters[k].HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10314 s->SurfaceParameters[k].DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10315 s->SurfaceParameters[k].SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
10316 s->SurfaceParameters[k].SurfaceTiling = display_cfg->plane_descriptors[k].surface.tiling;
10317 s->SurfaceParameters[k].BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
10318 s->SurfaceParameters[k].BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
10319 s->SurfaceParameters[k].ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
10320 s->SurfaceParameters[k].VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10321 s->SurfaceParameters[k].VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10322 s->SurfaceParameters[k].VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
10323 s->SurfaceParameters[k].VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
10324 s->SurfaceParameters[k].PitchY = display_cfg->plane_descriptors[k].surface.plane0.pitch;
10325 s->SurfaceParameters[k].PitchC = display_cfg->plane_descriptors[k].surface.plane1.pitch;
10326 s->SurfaceParameters[k].ViewportStationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10327 s->SurfaceParameters[k].ViewportXStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10328 s->SurfaceParameters[k].ViewportYStart = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10329 s->SurfaceParameters[k].ViewportXStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10330 s->SurfaceParameters[k].ViewportYStartC = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10331 s->SurfaceParameters[k].FORCE_ONE_ROW_FOR_FRAME = display_cfg->plane_descriptors[k].overrides.hw.force_one_row_for_frame;
10332 s->SurfaceParameters[k].SwathHeightY = mode_lib->mp.SwathHeightY[k];
10333 s->SurfaceParameters[k].SwathHeightC = mode_lib->mp.SwathHeightC[k];
10334 s->SurfaceParameters[k].DCCMetaPitchY = display_cfg->plane_descriptors[k].surface.dcc.plane0.pitch;
10335 s->SurfaceParameters[k].DCCMetaPitchC = display_cfg->plane_descriptors[k].surface.dcc.plane1.pitch;
10336 }
10337
10338 CalculateVMRowAndSwath_params->display_cfg = display_cfg;
10339 CalculateVMRowAndSwath_params->NumberOfActiveSurfaces = s->num_active_planes;
10340 CalculateVMRowAndSwath_params->myPipe = s->SurfaceParameters;
10341 CalculateVMRowAndSwath_params->SurfaceSizeInMALL = mode_lib->mp.SurfaceSizeInTheMALL;
10342 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsLuma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
10343 CalculateVMRowAndSwath_params->PTEBufferSizeInRequestsChroma = mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma;
10344 CalculateVMRowAndSwath_params->MALLAllocatedForDCN = mode_lib->soc.mall_allocated_for_dcn_mbytes;
10345 CalculateVMRowAndSwath_params->SwathWidthY = mode_lib->mp.SwathWidthY;
10346 CalculateVMRowAndSwath_params->SwathWidthC = mode_lib->mp.SwathWidthC;
10347 CalculateVMRowAndSwath_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
10348 CalculateVMRowAndSwath_params->DCCMetaBufferSizeBytes = mode_lib->ip.dcc_meta_buffer_size_bytes;
10349 CalculateVMRowAndSwath_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10350
10351 // output
10352 CalculateVMRowAndSwath_params->PTEBufferSizeNotExceeded = s->dummy_boolean_array[0];
10353 CalculateVMRowAndSwath_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
10354 CalculateVMRowAndSwath_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
10355 CalculateVMRowAndSwath_params->dpte_row_height_luma = mode_lib->mp.dpte_row_height;
10356 CalculateVMRowAndSwath_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
10357 CalculateVMRowAndSwath_params->dpte_row_height_linear_luma = mode_lib->mp.dpte_row_height_linear;
10358 CalculateVMRowAndSwath_params->dpte_row_height_linear_chroma = mode_lib->mp.dpte_row_height_linear_chroma;
10359 CalculateVMRowAndSwath_params->vm_group_bytes = mode_lib->mp.vm_group_bytes;
10360 CalculateVMRowAndSwath_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
10361 CalculateVMRowAndSwath_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
10362 CalculateVMRowAndSwath_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
10363 CalculateVMRowAndSwath_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
10364 CalculateVMRowAndSwath_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
10365 CalculateVMRowAndSwath_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
10366 CalculateVMRowAndSwath_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
10367 CalculateVMRowAndSwath_params->vmpg_width_y = s->vmpg_width_y;
10368 CalculateVMRowAndSwath_params->vmpg_height_y = s->vmpg_height_y;
10369 CalculateVMRowAndSwath_params->vmpg_width_c = s->vmpg_width_c;
10370 CalculateVMRowAndSwath_params->vmpg_height_c = s->vmpg_height_c;
10371 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_l = mode_lib->mp.dpde0_bytes_per_frame_ub_l;
10372 CalculateVMRowAndSwath_params->dpde0_bytes_per_frame_ub_c = mode_lib->mp.dpde0_bytes_per_frame_ub_c;
10373 CalculateVMRowAndSwath_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY;
10374 CalculateVMRowAndSwath_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC;
10375 CalculateVMRowAndSwath_params->VInitPreFillY = mode_lib->mp.VInitPreFillY;
10376 CalculateVMRowAndSwath_params->VInitPreFillC = mode_lib->mp.VInitPreFillC;
10377 CalculateVMRowAndSwath_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY;
10378 CalculateVMRowAndSwath_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC;
10379 CalculateVMRowAndSwath_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
10380 CalculateVMRowAndSwath_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow;
10381 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
10382 CalculateVMRowAndSwath_params->dpte_row_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
10383 CalculateVMRowAndSwath_params->vm_bytes = mode_lib->mp.vm_bytes;
10384 CalculateVMRowAndSwath_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
10385 CalculateVMRowAndSwath_params->use_one_row_for_frame_flip = mode_lib->mp.use_one_row_for_frame_flip;
10386 CalculateVMRowAndSwath_params->is_using_mall_for_ss = mode_lib->mp.is_using_mall_for_ss;
10387 CalculateVMRowAndSwath_params->PTE_BUFFER_MODE = mode_lib->mp.PTE_BUFFER_MODE;
10388 CalculateVMRowAndSwath_params->BIGK_FRAGMENT_SIZE = mode_lib->mp.BIGK_FRAGMENT_SIZE;
10389 CalculateVMRowAndSwath_params->DCCMetaBufferSizeNotExceeded = s->dummy_boolean_array[1];
10390 CalculateVMRowAndSwath_params->meta_row_bw = mode_lib->mp.meta_row_bw;
10391 CalculateVMRowAndSwath_params->meta_row_bytes = mode_lib->mp.meta_row_bytes;
10392 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
10393 CalculateVMRowAndSwath_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
10394 CalculateVMRowAndSwath_params->meta_req_width_luma = mode_lib->mp.meta_req_width;
10395 CalculateVMRowAndSwath_params->meta_req_height_luma = mode_lib->mp.meta_req_height;
10396 CalculateVMRowAndSwath_params->meta_row_width_luma = mode_lib->mp.meta_row_width;
10397 CalculateVMRowAndSwath_params->meta_row_height_luma = mode_lib->mp.meta_row_height;
10398 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_l = mode_lib->mp.meta_pte_bytes_per_frame_ub_l;
10399 CalculateVMRowAndSwath_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
10400 CalculateVMRowAndSwath_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
10401 CalculateVMRowAndSwath_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
10402 CalculateVMRowAndSwath_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
10403 CalculateVMRowAndSwath_params->meta_pte_bytes_per_frame_ub_c = mode_lib->mp.meta_pte_bytes_per_frame_ub_c;
10404
10405 CalculateVMRowAndSwath(&mode_lib->scratch, CalculateVMRowAndSwath_params);
10406
10407 memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params));
10408 if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) {
10409 for (k = 0; k < s->num_active_planes; k++) {
10410 mode_lib->mp.mall_prefetch_sdp_overhead_factor[k] = 1.0;
10411 mode_lib->mp.mall_prefetch_dram_overhead_factor[k] = 1.0;
10412 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k] = 1.0;
10413 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k] = 1.0;
10414 mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k] = 1.0;
10415 mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k] = 1.0;
10416 }
10417 } else {
10418 for (k = 0; k < s->num_active_planes; k++) {
10419 calculate_mcache_setting_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10420 calculate_mcache_setting_params->num_chans = mode_lib->soc.clk_table.dram_config.channel_count;
10421 calculate_mcache_setting_params->mem_word_bytes = mode_lib->soc.mem_word_bytes;
10422 calculate_mcache_setting_params->mcache_size_bytes = mode_lib->soc.mcache_size_bytes;
10423 calculate_mcache_setting_params->mcache_line_size_bytes = mode_lib->soc.mcache_line_size_bytes;
10424 calculate_mcache_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10425 calculate_mcache_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10426
10427 calculate_mcache_setting_params->source_format = display_cfg->plane_descriptors[k].pixel_format;
10428 calculate_mcache_setting_params->surf_vert = dml_is_vertical_rotation(display_cfg->plane_descriptors[k].composition.rotation_angle);
10429 calculate_mcache_setting_params->vp_stationary = display_cfg->plane_descriptors[k].composition.viewport.stationary;
10430 calculate_mcache_setting_params->tiling_mode = display_cfg->plane_descriptors[k].surface.tiling;
10431 calculate_mcache_setting_params->imall_enable = mode_lib->ip.imall_supported && display_cfg->plane_descriptors[k].overrides.legacy_svp_config == dml2_svp_mode_override_imall;
10432
10433 calculate_mcache_setting_params->vp_start_x_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.x_start;
10434 calculate_mcache_setting_params->vp_start_y_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.y_start;
10435 calculate_mcache_setting_params->full_vp_width_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.width;
10436 calculate_mcache_setting_params->full_vp_height_l = display_cfg->plane_descriptors[k].composition.viewport.plane0.height;
10437 calculate_mcache_setting_params->blk_width_l = mode_lib->mp.MacroTileWidthY[k];
10438 calculate_mcache_setting_params->blk_height_l = mode_lib->mp.MacroTileHeightY[k];
10439 calculate_mcache_setting_params->vmpg_width_l = s->vmpg_width_y[k];
10440 calculate_mcache_setting_params->vmpg_height_l = s->vmpg_height_y[k];
10441 calculate_mcache_setting_params->full_swath_bytes_l = s->full_swath_bytes_l[k];
10442 calculate_mcache_setting_params->bytes_per_pixel_l = mode_lib->mp.BytePerPixelY[k];
10443
10444 calculate_mcache_setting_params->vp_start_x_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10445 calculate_mcache_setting_params->vp_start_y_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.y_start;
10446 calculate_mcache_setting_params->full_vp_width_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.width;
10447 calculate_mcache_setting_params->full_vp_height_c = display_cfg->plane_descriptors[k].composition.viewport.plane1.height;
10448 calculate_mcache_setting_params->blk_width_c = mode_lib->mp.MacroTileWidthC[k];
10449 calculate_mcache_setting_params->blk_height_c = mode_lib->mp.MacroTileHeightC[k];
10450 calculate_mcache_setting_params->vmpg_width_c = s->vmpg_width_c[k];
10451 calculate_mcache_setting_params->vmpg_height_c = s->vmpg_height_c[k];
10452 calculate_mcache_setting_params->full_swath_bytes_c = s->full_swath_bytes_c[k];
10453 calculate_mcache_setting_params->bytes_per_pixel_c = mode_lib->mp.BytePerPixelC[k];
10454
10455 // output
10456 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0[k];
10457 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_l = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0[k];
10458 calculate_mcache_setting_params->dcc_dram_bw_nom_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1[k];
10459 calculate_mcache_setting_params->dcc_dram_bw_pref_overhead_factor_c = &mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1[k];
10460
10461 calculate_mcache_setting_params->num_mcaches_l = &mode_lib->mp.num_mcaches_l[k];
10462 calculate_mcache_setting_params->mcache_row_bytes_l = &mode_lib->mp.mcache_row_bytes_l[k];
10463 calculate_mcache_setting_params->mcache_offsets_l = mode_lib->mp.mcache_offsets_l[k];
10464 calculate_mcache_setting_params->mcache_shift_granularity_l = &mode_lib->mp.mcache_shift_granularity_l[k];
10465
10466 calculate_mcache_setting_params->num_mcaches_c = &mode_lib->mp.num_mcaches_c[k];
10467 calculate_mcache_setting_params->mcache_row_bytes_c = &mode_lib->mp.mcache_row_bytes_c[k];
10468 calculate_mcache_setting_params->mcache_offsets_c = mode_lib->mp.mcache_offsets_c[k];
10469 calculate_mcache_setting_params->mcache_shift_granularity_c = &mode_lib->mp.mcache_shift_granularity_c[k];
10470
10471 calculate_mcache_setting_params->mall_comb_mcache_l = &mode_lib->mp.mall_comb_mcache_l[k];
10472 calculate_mcache_setting_params->mall_comb_mcache_c = &mode_lib->mp.mall_comb_mcache_c[k];
10473 calculate_mcache_setting_params->lc_comb_mcache = &mode_lib->mp.lc_comb_mcache[k];
10474 calculate_mcache_setting(&mode_lib->scratch, calculate_mcache_setting_params);
10475 }
10476
10477 calculate_mall_bw_overhead_factor(
10478 mode_lib->mp.mall_prefetch_sdp_overhead_factor,
10479 mode_lib->mp.mall_prefetch_dram_overhead_factor,
10480
10481 // input
10482 display_cfg,
10483 s->num_active_planes);
10484 }
10485
10486 // Calculate all the bandwidth availabe
10487 calculate_bandwidth_available(
10488 mode_lib->mp.avg_bandwidth_available_min,
10489 mode_lib->mp.avg_bandwidth_available,
10490 mode_lib->mp.urg_bandwidth_available_min,
10491 mode_lib->mp.urg_bandwidth_available,
10492 mode_lib->mp.urg_bandwidth_available_vm_only,
10493 mode_lib->mp.urg_bandwidth_available_pixel_and_vm,
10494
10495 &mode_lib->soc,
10496 display_cfg->hostvm_enable,
10497 mode_lib->mp.Dcfclk,
10498 mode_lib->mp.FabricClock,
10499 mode_lib->mp.dram_bw_mbps);
10500
10501
10502 calculate_hostvm_inefficiency_factor(
10503 &s->HostVMInefficiencyFactor,
10504 &s->HostVMInefficiencyFactorPrefetch,
10505
10506 display_cfg->gpuvm_enable,
10507 display_cfg->hostvm_enable,
10508 mode_lib->ip.remote_iommu_outstanding_translations,
10509 mode_lib->soc.max_outstanding_reqs,
10510 mode_lib->mp.urg_bandwidth_available_pixel_and_vm[dml2_core_internal_soc_state_sys_active],
10511 mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_sys_active]);
10512
10513 s->TotalDCCActiveDPP = 0;
10514 s->TotalActiveDPP = 0;
10515 for (k = 0; k < s->num_active_planes; ++k) {
10516 s->TotalActiveDPP = s->TotalActiveDPP + mode_lib->mp.NoOfDPP[k];
10517 if (display_cfg->plane_descriptors[k].surface.dcc.enable)
10518 s->TotalDCCActiveDPP = s->TotalDCCActiveDPP + mode_lib->mp.NoOfDPP[k];
10519 }
10520 // Calculate tdlut schedule related terms
10521 for (k = 0; k <= s->num_active_planes - 1; k++) {
10522 calculate_tdlut_setting_params->dispclk_mhz = mode_lib->mp.Dispclk;
10523 calculate_tdlut_setting_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
10524 calculate_tdlut_setting_params->tdlut_width_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_width_mode;
10525 calculate_tdlut_setting_params->tdlut_addressing_mode = display_cfg->plane_descriptors[k].tdlut.tdlut_addressing_mode;
10526 calculate_tdlut_setting_params->cursor_buffer_size = mode_lib->ip.cursor_buffer_size;
10527 calculate_tdlut_setting_params->gpuvm_enable = display_cfg->gpuvm_enable;
10528 calculate_tdlut_setting_params->gpuvm_page_size_kbytes = display_cfg->plane_descriptors[k].overrides.gpuvm_min_page_size_kbytes;
10529
10530 // output
10531 calculate_tdlut_setting_params->tdlut_pte_bytes_per_frame = &s->tdlut_pte_bytes_per_frame[k];
10532 calculate_tdlut_setting_params->tdlut_bytes_per_frame = &s->tdlut_bytes_per_frame[k];
10533 calculate_tdlut_setting_params->tdlut_groups_per_2row_ub = &s->tdlut_groups_per_2row_ub[k];
10534 calculate_tdlut_setting_params->tdlut_opt_time = &s->tdlut_opt_time[k];
10535 calculate_tdlut_setting_params->tdlut_drain_time = &s->tdlut_drain_time[k];
10536 calculate_tdlut_setting_params->tdlut_bytes_per_group = &s->tdlut_bytes_per_group[k];
10537
10538 calculate_tdlut_setting(&mode_lib->scratch, calculate_tdlut_setting_params);
10539 }
10540
10541 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn3)
10542 s->ReorderingBytes = (unsigned int)(mode_lib->soc.clk_table.dram_config.channel_count * math_max3(mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_only_bytes,
10543 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_pixel_and_vm_bytes,
10544 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_out_of_order_return_per_channel_vm_only_bytes));
10545
10546 CalculateExtraLatency(
10547 display_cfg,
10548 mode_lib->ip.rob_buffer_size_kbytes,
10549 mode_lib->soc.qos_parameters.qos_params.dcn32x.loaded_round_trip_latency_fclk_cycles,
10550 s->ReorderingBytes,
10551 mode_lib->mp.Dcfclk,
10552 mode_lib->mp.FabricClock,
10553 mode_lib->ip.pixel_chunk_size_kbytes,
10554 mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active],
10555 s->num_active_planes,
10556 mode_lib->mp.NoOfDPP,
10557 mode_lib->mp.dpte_group_bytes,
10558 s->tdlut_bytes_per_group,
10559 s->HostVMInefficiencyFactor,
10560 s->HostVMInefficiencyFactorPrefetch,
10561 mode_lib->soc.hostvm_min_page_size_kbytes,
10562 mode_lib->soc.qos_parameters.qos_type,
10563 !(display_cfg->overrides.max_outstanding_when_urgent_expected_disable),
10564 mode_lib->soc.max_outstanding_reqs,
10565 mode_lib->mp.request_size_bytes_luma,
10566 mode_lib->mp.request_size_bytes_chroma,
10567 mode_lib->ip.meta_chunk_size_kbytes,
10568 mode_lib->ip.dchub_arb_to_ret_delay,
10569 mode_lib->mp.TripToMemory,
10570 mode_lib->ip.hostvm_mode,
10571
10572 // output
10573 &mode_lib->mp.ExtraLatency,
10574 &mode_lib->mp.ExtraLatency_sr,
10575 &mode_lib->mp.ExtraLatencyPrefetch);
10576
10577 mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep;
10578
10579 for (k = 0; k < s->num_active_planes; ++k) {
10580 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
10581 mode_lib->mp.WritebackDelay[k] =
10582 mode_lib->soc.qos_parameters.writeback.base_latency_us
10583 + CalculateWriteBackDelay(
10584 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format,
10585 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio,
10586 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio,
10587 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps,
10588 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width,
10589 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height,
10590 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height,
10591 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk;
10592 } else
10593 mode_lib->mp.WritebackDelay[k] = 0;
10594 }
10595
10596 /* VActive bytes to fetch for UCLK P-State */
10597 calculate_bytes_to_fetch_required_to_hide_latency_params->display_cfg = display_cfg;
10598 calculate_bytes_to_fetch_required_to_hide_latency_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10599
10600 calculate_bytes_to_fetch_required_to_hide_latency_params->num_active_planes = s->num_active_planes;
10601 calculate_bytes_to_fetch_required_to_hide_latency_params->num_of_dpp = mode_lib->mp.NoOfDPP;
10602 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_l = mode_lib->mp.meta_row_height;
10603 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
10604 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_l = s->meta_row_bytes_per_row_ub_l;
10605 calculate_bytes_to_fetch_required_to_hide_latency_params->meta_row_bytes_per_row_ub_c = s->meta_row_bytes_per_row_ub_c;
10606 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_l = mode_lib->mp.dpte_row_height;
10607 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_row_height_c = mode_lib->mp.dpte_row_height_chroma;
10608 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_l = s->dpte_row_bytes_per_row_l;
10609 calculate_bytes_to_fetch_required_to_hide_latency_params->dpte_bytes_per_row_c = s->dpte_row_bytes_per_row_c;
10610 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_l = mode_lib->mp.BytePerPixelY;
10611 calculate_bytes_to_fetch_required_to_hide_latency_params->byte_per_pix_c = mode_lib->mp.BytePerPixelC;
10612 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_l = mode_lib->mp.SwathWidthY;
10613 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_width_c = mode_lib->mp.SwathWidthC;
10614 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_l = mode_lib->mp.SwathHeightY;
10615 calculate_bytes_to_fetch_required_to_hide_latency_params->swath_height_c = mode_lib->mp.SwathHeightC;
10616 calculate_bytes_to_fetch_required_to_hide_latency_params->latency_to_hide_us = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
10617
10618 /* outputs */
10619 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_l = s->pstate_bytes_required_l;
10620 calculate_bytes_to_fetch_required_to_hide_latency_params->bytes_required_c = s->pstate_bytes_required_c;
10621
10622 calculate_bytes_to_fetch_required_to_hide_latency(calculate_bytes_to_fetch_required_to_hide_latency_params);
10623
10624 /* Excess VActive bandwidth required to fill DET */
10625 calculate_excess_vactive_bandwidth_required(
10626 display_cfg,
10627 s->num_active_planes,
10628 s->pstate_bytes_required_l,
10629 s->pstate_bytes_required_c,
10630 /* outputs */
10631 mode_lib->mp.excess_vactive_fill_bw_l,
10632 mode_lib->mp.excess_vactive_fill_bw_c);
10633
10634 mode_lib->mp.UrgentLatency = CalculateUrgentLatency(
10635 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_us,
10636 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_pixel_vm_us,
10637 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.base_latency_vm_us,
10638 mode_lib->soc.do_urgent_latency_adjustment,
10639 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_fclk_us,
10640 mode_lib->soc.qos_parameters.qos_params.dcn32x.urgent_latency_us.scaling_factor_mhz,
10641 mode_lib->mp.FabricClock,
10642 mode_lib->mp.uclk_freq_mhz,
10643 mode_lib->soc.qos_parameters.qos_type,
10644 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].urgent_ramp_uclk_cycles,
10645 mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles,
10646 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
10647 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
10648 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_urgent_ramp_latency_margin,
10649 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
10650
10651 mode_lib->mp.TripToMemory = CalculateTripToMemory(
10652 mode_lib->mp.UrgentLatency,
10653 mode_lib->mp.FabricClock,
10654 mode_lib->mp.uclk_freq_mhz,
10655 mode_lib->soc.qos_parameters.qos_type,
10656 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].trip_to_memory_uclk_cycles,
10657 mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles,
10658 mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles,
10659 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
10660 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
10661
10662 mode_lib->mp.TripToMemory = math_max2(mode_lib->mp.UrgentLatency, mode_lib->mp.TripToMemory);
10663
10664 mode_lib->mp.MetaTripToMemory = CalculateMetaTripToMemory(
10665 mode_lib->mp.UrgentLatency,
10666 mode_lib->mp.FabricClock,
10667 mode_lib->mp.uclk_freq_mhz,
10668 mode_lib->soc.qos_parameters.qos_type,
10669 mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].meta_trip_to_memory_uclk_cycles,
10670 mode_lib->soc.qos_parameters.qos_params.dcn4x.meta_trip_adder_fclk_cycles,
10671 mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin,
10672 mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin);
10673
10674 for (k = 0; k < s->num_active_planes; ++k) {
10675 bool cursor_not_enough_urgent_latency_hiding = 0;
10676 double line_time_us = 0.0;
10677
10678 line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
10679 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10680 if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) {
10681 calculate_cursor_req_attributes(
10682 display_cfg->plane_descriptors[k].cursor.cursor_width,
10683 display_cfg->plane_descriptors[k].cursor.cursor_bpp,
10684
10685 // output
10686 &s->cursor_lines_per_chunk[k],
10687 &s->cursor_bytes_per_line[k],
10688 &s->cursor_bytes_per_chunk[k],
10689 &s->cursor_bytes[k]);
10690
10691 calculate_cursor_urgent_burst_factor(
10692 mode_lib->ip.cursor_buffer_size,
10693 display_cfg->plane_descriptors[k].cursor.cursor_width,
10694 s->cursor_bytes_per_chunk[k],
10695 s->cursor_lines_per_chunk[k],
10696 line_time_us,
10697 mode_lib->mp.UrgentLatency,
10698
10699 // output
10700 &mode_lib->mp.UrgentBurstFactorCursor[k],
10701 &cursor_not_enough_urgent_latency_hiding);
10702 }
10703 mode_lib->mp.UrgentBurstFactorCursorPre[k] = mode_lib->mp.UrgentBurstFactorCursor[k];
10704
10705 CalculateUrgentBurstFactor(
10706 &display_cfg->plane_descriptors[k],
10707 mode_lib->mp.swath_width_luma_ub[k],
10708 mode_lib->mp.swath_width_chroma_ub[k],
10709 mode_lib->mp.SwathHeightY[k],
10710 mode_lib->mp.SwathHeightC[k],
10711 line_time_us,
10712 mode_lib->mp.UrgentLatency,
10713 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
10714 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
10715 mode_lib->mp.BytePerPixelInDETY[k],
10716 mode_lib->mp.BytePerPixelInDETC[k],
10717 mode_lib->mp.DETBufferSizeY[k],
10718 mode_lib->mp.DETBufferSizeC[k],
10719
10720 /* output */
10721 &mode_lib->mp.UrgentBurstFactorLuma[k],
10722 &mode_lib->mp.UrgentBurstFactorChroma[k],
10723 &mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
10724
10725 mode_lib->mp.NotEnoughUrgentLatencyHiding[k] = mode_lib->mp.NotEnoughUrgentLatencyHiding[k] || cursor_not_enough_urgent_latency_hiding;
10726 }
10727
10728 for (k = 0; k < s->num_active_planes; ++k) {
10729 s->MaxVStartupLines[k] = CalculateMaxVStartup(
10730 mode_lib->ip.ptoi_supported,
10731 mode_lib->ip.vblank_nom_default_us,
10732 &display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing,
10733 mode_lib->mp.WritebackDelay[k]);
10734
10735 #ifdef __DML_VBA_DEBUG__
10736 dml2_printf("DML::%s: k=%u MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
10737 dml2_printf("DML::%s: k=%u WritebackDelay = %f\n", __func__, k, mode_lib->mp.WritebackDelay[k]);
10738 #endif
10739 }
10740
10741 s->immediate_flip_required = false;
10742 for (k = 0; k < s->num_active_planes; ++k) {
10743 s->immediate_flip_required = s->immediate_flip_required || display_cfg->plane_descriptors[k].immediate_flip;
10744 }
10745 #ifdef __DML_VBA_DEBUG__
10746 dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required);
10747 #endif
10748
10749 {
10750 s->DestinationLineTimesForPrefetchLessThan2 = false;
10751 s->VRatioPrefetchMoreThanMax = false;
10752
10753 dml2_printf("DML::%s: Start one iteration of prefetch schedule evaluation\n", __func__);
10754
10755 for (k = 0; k < s->num_active_planes; ++k) {
10756 struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe;
10757
10758 dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
10759 mode_lib->mp.TWait[k] = CalculateTWait(
10760 display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns,
10761 mode_lib->mp.UrgentLatency,
10762 mode_lib->mp.TripToMemory,
10763 !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ?
10764 get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0);
10765
10766 myPipe->Dppclk = mode_lib->mp.Dppclk[k];
10767 myPipe->Dispclk = mode_lib->mp.Dispclk;
10768 myPipe->PixelClock = ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10769 myPipe->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
10770 myPipe->DPPPerSurface = mode_lib->mp.NoOfDPP[k];
10771 myPipe->ScalerEnabled = display_cfg->plane_descriptors[k].composition.scaler_info.enabled;
10772 myPipe->VRatio = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio;
10773 myPipe->VRatioChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio;
10774 myPipe->VTaps = display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_taps;
10775 myPipe->VTapsChroma = display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_taps;
10776 myPipe->RotationAngle = display_cfg->plane_descriptors[k].composition.rotation_angle;
10777 myPipe->mirrored = display_cfg->plane_descriptors[k].composition.mirrored;
10778 myPipe->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY[k];
10779 myPipe->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY[k];
10780 myPipe->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC[k];
10781 myPipe->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC[k];
10782 myPipe->InterlaceEnable = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced;
10783 myPipe->NumberOfCursors = display_cfg->plane_descriptors[k].cursor.num_cursors;
10784 myPipe->VBlank = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active;
10785 myPipe->HTotal = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total;
10786 myPipe->HActive = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_active;
10787 myPipe->DCCEnable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10788 myPipe->ODMMode = mode_lib->mp.ODMMode[k];
10789 myPipe->SourcePixelFormat = display_cfg->plane_descriptors[k].pixel_format;
10790 myPipe->BytePerPixelY = mode_lib->mp.BytePerPixelY[k];
10791 myPipe->BytePerPixelC = mode_lib->mp.BytePerPixelC[k];
10792 myPipe->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
10793
10794 #ifdef __DML_VBA_DEBUG__
10795 dml2_printf("DML::%s: Calling CalculatePrefetchSchedule for k=%u\n", __func__, k);
10796 #endif
10797 CalculatePrefetchSchedule_params->display_cfg = display_cfg;
10798 CalculatePrefetchSchedule_params->HostVMInefficiencyFactor = s->HostVMInefficiencyFactorPrefetch;
10799 CalculatePrefetchSchedule_params->myPipe = myPipe;
10800 CalculatePrefetchSchedule_params->DSCDelay = mode_lib->mp.DSCDelay[k];
10801 CalculatePrefetchSchedule_params->DPPCLKDelaySubtotalPlusCNVCFormater = mode_lib->ip.dppclk_delay_subtotal + mode_lib->ip.dppclk_delay_cnvc_formatter;
10802 CalculatePrefetchSchedule_params->DPPCLKDelaySCL = mode_lib->ip.dppclk_delay_scl;
10803 CalculatePrefetchSchedule_params->DPPCLKDelaySCLLBOnly = mode_lib->ip.dppclk_delay_scl_lb_only;
10804 CalculatePrefetchSchedule_params->DPPCLKDelayCNVCCursor = mode_lib->ip.dppclk_delay_cnvc_cursor;
10805 CalculatePrefetchSchedule_params->DISPCLKDelaySubtotal = mode_lib->ip.dispclk_delay_subtotal;
10806 CalculatePrefetchSchedule_params->DPP_RECOUT_WIDTH = (unsigned int)(mode_lib->mp.SwathWidthY[k] / display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio);
10807 CalculatePrefetchSchedule_params->OutputFormat = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].output.output_format;
10808 CalculatePrefetchSchedule_params->MaxInterDCNTileRepeaters = mode_lib->ip.max_inter_dcn_tile_repeaters;
10809 CalculatePrefetchSchedule_params->VStartup = s->MaxVStartupLines[k];
10810 CalculatePrefetchSchedule_params->HostVMMinPageSize = mode_lib->soc.hostvm_min_page_size_kbytes;
10811 CalculatePrefetchSchedule_params->DynamicMetadataEnable = display_cfg->plane_descriptors[k].dynamic_meta_data.enable;
10812 CalculatePrefetchSchedule_params->DynamicMetadataVMEnabled = mode_lib->ip.dynamic_metadata_vm_enabled;
10813 CalculatePrefetchSchedule_params->DynamicMetadataLinesBeforeActiveRequired = display_cfg->plane_descriptors[k].dynamic_meta_data.lines_before_active_required;
10814 CalculatePrefetchSchedule_params->DynamicMetadataTransmittedBytes = display_cfg->plane_descriptors[k].dynamic_meta_data.transmitted_bytes;
10815 CalculatePrefetchSchedule_params->UrgentLatency = mode_lib->mp.UrgentLatency;
10816 CalculatePrefetchSchedule_params->ExtraLatencyPrefetch = mode_lib->mp.ExtraLatencyPrefetch;
10817 CalculatePrefetchSchedule_params->TCalc = mode_lib->mp.TCalc;
10818 CalculatePrefetchSchedule_params->vm_bytes = mode_lib->mp.vm_bytes[k];
10819 CalculatePrefetchSchedule_params->PixelPTEBytesPerRow = mode_lib->mp.PixelPTEBytesPerRow[k];
10820 CalculatePrefetchSchedule_params->PrefetchSourceLinesY = mode_lib->mp.PrefetchSourceLinesY[k];
10821 CalculatePrefetchSchedule_params->VInitPreFillY = mode_lib->mp.VInitPreFillY[k];
10822 CalculatePrefetchSchedule_params->MaxNumSwathY = mode_lib->mp.MaxNumSwathY[k];
10823 CalculatePrefetchSchedule_params->PrefetchSourceLinesC = mode_lib->mp.PrefetchSourceLinesC[k];
10824 CalculatePrefetchSchedule_params->VInitPreFillC = mode_lib->mp.VInitPreFillC[k];
10825 CalculatePrefetchSchedule_params->MaxNumSwathC = mode_lib->mp.MaxNumSwathC[k];
10826 CalculatePrefetchSchedule_params->swath_width_luma_ub = mode_lib->mp.swath_width_luma_ub[k];
10827 CalculatePrefetchSchedule_params->swath_width_chroma_ub = mode_lib->mp.swath_width_chroma_ub[k];
10828 CalculatePrefetchSchedule_params->SwathHeightY = mode_lib->mp.SwathHeightY[k];
10829 CalculatePrefetchSchedule_params->SwathHeightC = mode_lib->mp.SwathHeightC[k];
10830 CalculatePrefetchSchedule_params->TWait = mode_lib->mp.TWait[k];
10831 CalculatePrefetchSchedule_params->Ttrip = mode_lib->mp.TripToMemory;
10832 CalculatePrefetchSchedule_params->Turg = mode_lib->mp.UrgentLatency;
10833 CalculatePrefetchSchedule_params->setup_for_tdlut = display_cfg->plane_descriptors[k].tdlut.setup_for_tdlut;
10834 CalculatePrefetchSchedule_params->tdlut_pte_bytes_per_frame = s->tdlut_pte_bytes_per_frame[k];
10835 CalculatePrefetchSchedule_params->tdlut_bytes_per_frame = s->tdlut_bytes_per_frame[k];
10836 CalculatePrefetchSchedule_params->tdlut_opt_time = s->tdlut_opt_time[k];
10837 CalculatePrefetchSchedule_params->tdlut_drain_time = s->tdlut_drain_time[k];
10838 CalculatePrefetchSchedule_params->num_cursors = (display_cfg->plane_descriptors[k].cursor.cursor_width > 0);
10839 CalculatePrefetchSchedule_params->cursor_bytes_per_chunk = s->cursor_bytes_per_chunk[k];
10840 CalculatePrefetchSchedule_params->cursor_bytes_per_line = s->cursor_bytes_per_line[k];
10841 CalculatePrefetchSchedule_params->dcc_enable = display_cfg->plane_descriptors[k].surface.dcc.enable;
10842 CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present;
10843 CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k];
10844 CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k];
10845
10846 // output
10847 CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k];
10848 CalculatePrefetchSchedule_params->DSTYAfterScaler = &mode_lib->mp.DSTYAfterScaler[k];
10849 CalculatePrefetchSchedule_params->dst_y_prefetch = &mode_lib->mp.dst_y_prefetch[k];
10850 CalculatePrefetchSchedule_params->dst_y_per_vm_vblank = &mode_lib->mp.dst_y_per_vm_vblank[k];
10851 CalculatePrefetchSchedule_params->dst_y_per_row_vblank = &mode_lib->mp.dst_y_per_row_vblank[k];
10852 CalculatePrefetchSchedule_params->VRatioPrefetchY = &mode_lib->mp.VRatioPrefetchY[k];
10853 CalculatePrefetchSchedule_params->VRatioPrefetchC = &mode_lib->mp.VRatioPrefetchC[k];
10854 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWLuma = &mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k];
10855 CalculatePrefetchSchedule_params->RequiredPrefetchPixelDataBWChroma = &mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k];
10856 CalculatePrefetchSchedule_params->NotEnoughTimeForDynamicMetadata = &mode_lib->mp.NotEnoughTimeForDynamicMetadata[k];
10857 CalculatePrefetchSchedule_params->Tno_bw = &mode_lib->mp.Tno_bw[k];
10858 CalculatePrefetchSchedule_params->Tno_bw_flip = &mode_lib->mp.Tno_bw_flip[k];
10859 CalculatePrefetchSchedule_params->prefetch_vmrow_bw = &mode_lib->mp.prefetch_vmrow_bw[k];
10860 CalculatePrefetchSchedule_params->Tdmdl_vm = &mode_lib->mp.Tdmdl_vm[k];
10861 CalculatePrefetchSchedule_params->Tdmdl = &mode_lib->mp.Tdmdl[k];
10862 CalculatePrefetchSchedule_params->TSetup = &mode_lib->mp.TSetup[k];
10863 CalculatePrefetchSchedule_params->Tvm_trips = &s->Tvm_trips[k];
10864 CalculatePrefetchSchedule_params->Tr0_trips = &s->Tr0_trips[k];
10865 CalculatePrefetchSchedule_params->Tvm_trips_flip = &s->Tvm_trips_flip[k];
10866 CalculatePrefetchSchedule_params->Tr0_trips_flip = &s->Tr0_trips_flip[k];
10867 CalculatePrefetchSchedule_params->Tvm_trips_flip_rounded = &s->Tvm_trips_flip_rounded[k];
10868 CalculatePrefetchSchedule_params->Tr0_trips_flip_rounded = &s->Tr0_trips_flip_rounded[k];
10869 CalculatePrefetchSchedule_params->VUpdateOffsetPix = &mode_lib->mp.VUpdateOffsetPix[k];
10870 CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k];
10871 CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k];
10872 CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k];
10873
10874 mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params);
10875
10876 #ifdef __DML_VBA_DEBUG__
10877 dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
10878 #endif
10879 mode_lib->mp.VStartupMin[k] = s->MaxVStartupLines[k];
10880 } // for k
10881
10882 mode_lib->mp.PrefetchModeSupported = true;
10883 for (k = 0; k < s->num_active_planes; ++k) {
10884 if (mode_lib->mp.NoTimeToPrefetch[k] == true ||
10885 mode_lib->mp.NotEnoughTimeForDynamicMetadata[k] ||
10886 mode_lib->mp.DSTYAfterScaler[k] > 8) {
10887 dml2_printf("DML::%s: k=%u, NoTimeToPrefetch = %0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]);
10888 dml2_printf("DML::%s: k=%u, NotEnoughTimeForDynamicMetadata=%u\n", __func__, k, mode_lib->mp.NotEnoughTimeForDynamicMetadata[k]);
10889 dml2_printf("DML::%s: k=%u, DSTYAfterScaler=%u (should be <= 0)\n", __func__, k, mode_lib->mp.DSTYAfterScaler[k]);
10890 mode_lib->mp.PrefetchModeSupported = false;
10891 }
10892 if (mode_lib->mp.dst_y_prefetch[k] < 2)
10893 s->DestinationLineTimesForPrefetchLessThan2 = true;
10894
10895 if (mode_lib->mp.VRatioPrefetchY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ ||
10896 mode_lib->mp.VRatioPrefetchC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) {
10897 s->VRatioPrefetchMoreThanMax = true;
10898 dml2_printf("DML::%s: k=%d, VRatioPrefetchY=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k], __DML2_CALCS_MAX_VRATIO_PRE__);
10899 dml2_printf("DML::%s: k=%d, VRatioPrefetchC=%f (should not be < %f)\n", __func__, k, mode_lib->mp.VRatioPrefetchC[k], __DML2_CALCS_MAX_VRATIO_PRE__);
10900 dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
10901 }
10902
10903 if (mode_lib->mp.NotEnoughUrgentLatencyHiding[k]) {
10904 dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHiding = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHiding[k]);
10905 mode_lib->mp.PrefetchModeSupported = false;
10906 }
10907 }
10908
10909 if (s->VRatioPrefetchMoreThanMax == true || s->DestinationLineTimesForPrefetchLessThan2 == true) {
10910 dml2_printf("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax);
10911 dml2_printf("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2);
10912 mode_lib->mp.PrefetchModeSupported = false;
10913 }
10914
10915 dml2_printf("DML::%s: Prefetch schedule is %sOK at vstartup = %u\n", __func__,
10916 mode_lib->mp.PrefetchModeSupported ? "" : "NOT ", CalculatePrefetchSchedule_params->VStartup);
10917
10918 // Prefetch schedule OK, now check prefetch bw
10919 if (mode_lib->mp.PrefetchModeSupported == true) {
10920 for (k = 0; k < s->num_active_planes; ++k) {
10921 double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
10922 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
10923 CalculateUrgentBurstFactor(
10924 &display_cfg->plane_descriptors[k],
10925 mode_lib->mp.swath_width_luma_ub[k],
10926 mode_lib->mp.swath_width_chroma_ub[k],
10927 mode_lib->mp.SwathHeightY[k],
10928 mode_lib->mp.SwathHeightC[k],
10929 line_time_us,
10930 mode_lib->mp.UrgentLatency,
10931 mode_lib->mp.VRatioPrefetchY[k],
10932 mode_lib->mp.VRatioPrefetchC[k],
10933 mode_lib->mp.BytePerPixelInDETY[k],
10934 mode_lib->mp.BytePerPixelInDETC[k],
10935 mode_lib->mp.DETBufferSizeY[k],
10936 mode_lib->mp.DETBufferSizeC[k],
10937 /* Output */
10938 &mode_lib->mp.UrgentBurstFactorLumaPre[k],
10939 &mode_lib->mp.UrgentBurstFactorChromaPre[k],
10940 &mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
10941
10942 #ifdef __DML_VBA_DEBUG__
10943 dml2_printf("DML::%s: k=%0u DPPPerSurface=%u\n", __func__, k, mode_lib->mp.NoOfDPP[k]);
10944 dml2_printf("DML::%s: k=%0u UrgentBurstFactorLuma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLuma[k]);
10945 dml2_printf("DML::%s: k=%0u UrgentBurstFactorChroma=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChroma[k]);
10946 dml2_printf("DML::%s: k=%0u UrgentBurstFactorLumaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorLumaPre[k]);
10947 dml2_printf("DML::%s: k=%0u UrgentBurstFactorChromaPre=%f\n", __func__, k, mode_lib->mp.UrgentBurstFactorChromaPre[k]);
10948
10949 dml2_printf("DML::%s: k=%0u VRatioPrefetchY=%f\n", __func__, k, mode_lib->mp.VRatioPrefetchY[k]);
10950 dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio);
10951
10952 dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]);
10953 dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]);
10954 dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]);
10955 dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]);
10956 dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]);
10957 dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]);
10958 dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWLuma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWLuma[k]);
10959 dml2_printf("DML::%s: k=%0u RequiredPrefetchPixelDataBWChroma=%f\n", __func__, k, mode_lib->mp.RequiredPrefetchPixelDataBWChroma[k]);
10960 dml2_printf("DML::%s: k=%0u prefetch_cursor_bw=%f\n", __func__, k, mode_lib->mp.prefetch_cursor_bw[k]);
10961 #endif
10962 }
10963
10964 for (k = 0; k <= s->num_active_planes - 1; k++)
10965 mode_lib->mp.final_flip_bw[k] = 0;
10966
10967 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = mode_lib->mp.urg_vactive_bandwidth_required;
10968 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required;
10969 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = mode_lib->mp.urg_bandwidth_required_qual;
10970 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required;
10971 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
10972 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
10973
10974 calculate_peak_bandwidth_params->display_cfg = display_cfg;
10975 calculate_peak_bandwidth_params->inc_flip_bw = 0;
10976 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
10977 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
10978 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
10979 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
10980 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
10981 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
10982 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
10983 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
10984
10985 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.SurfaceReadBandwidthLuma;
10986 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.SurfaceReadBandwidthChroma;
10987 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
10988 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
10989 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
10990 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
10991 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
10992 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
10993 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
10994 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
10995 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
10996 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
10997 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
10998 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
10999 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11000 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11001 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11002 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11003
11004 calculate_peak_bandwidth_required(
11005 &mode_lib->scratch,
11006 calculate_peak_bandwidth_params);
11007
11008 // Check urg peak bandwidth against available urg bw
11009 // check at SDP and DRAM, for all soc states (SVP prefetch an Sys Active)
11010 check_urgent_bandwidth_support(
11011 &mode_lib->mp.FractionOfUrgentBandwidth, // double* frac_urg_bandwidth
11012 &mode_lib->mp.FractionOfUrgentBandwidthMALL, // double* frac_urg_bandwidth_mall
11013 &s->dummy_boolean[1], // vactive bw ok
11014 &mode_lib->mp.PrefetchModeSupported, // prefetch bw ok
11015
11016 mode_lib->soc.mall_allocated_for_dcn_mbytes,
11017 mode_lib->mp.non_urg_bandwidth_required,
11018 mode_lib->mp.urg_vactive_bandwidth_required,
11019 mode_lib->mp.urg_bandwidth_required,
11020 mode_lib->mp.urg_bandwidth_available);
11021
11022 if (!mode_lib->mp.PrefetchModeSupported)
11023 dml2_printf("DML::%s: Bandwidth not sufficient for prefetch!\n", __func__);
11024
11025 for (k = 0; k < s->num_active_planes; ++k) {
11026 if (mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]) {
11027 dml2_printf("DML::%s: k=%u, NotEnoughUrgentLatencyHidingPre = %u\n", __func__, k, mode_lib->mp.NotEnoughUrgentLatencyHidingPre[k]);
11028 mode_lib->mp.PrefetchModeSupported = false;
11029 }
11030 }
11031 } // prefetch schedule ok
11032
11033 // Prefetch schedule and prefetch bw ok, now check flip bw
11034 if (mode_lib->mp.PrefetchModeSupported == true) { // prefetch schedule and prefetch bw ok, now check flip bw
11035
11036 mode_lib->mp.BandwidthAvailableForImmediateFlip =
11037 get_bandwidth_available_for_immediate_flip(
11038 dml2_core_internal_soc_state_sys_active,
11039 mode_lib->mp.urg_bandwidth_required_qual, // no flip
11040 mode_lib->mp.urg_bandwidth_available);
11041 mode_lib->mp.TotImmediateFlipBytes = 0;
11042 for (k = 0; k < s->num_active_planes; ++k) {
11043 if (display_cfg->plane_descriptors[k].immediate_flip) {
11044 s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes(s->HostVMInefficiencyFactor,
11045 mode_lib->mp.vm_bytes[k],
11046 mode_lib->mp.PixelPTEBytesPerRow[k],
11047 mode_lib->mp.meta_row_bytes[k]);
11048 } else {
11049 s->per_pipe_flip_bytes[k] = 0;
11050 }
11051 mode_lib->mp.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->mp.NoOfDPP[k];
11052 #ifdef __DML_VBA_DEBUG__
11053 dml2_printf("DML::%s: k = %u\n", __func__, k);
11054 dml2_printf("DML::%s: DPPPerSurface = %u\n", __func__, mode_lib->mp.NoOfDPP[k]);
11055 dml2_printf("DML::%s: vm_bytes = %u\n", __func__, mode_lib->mp.vm_bytes[k]);
11056 dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, mode_lib->mp.PixelPTEBytesPerRow[k]);
11057 dml2_printf("DML::%s: meta_row_bytes = %u\n", __func__, mode_lib->mp.meta_row_bytes[k]);
11058 dml2_printf("DML::%s: TotImmediateFlipBytes = %u\n", __func__, mode_lib->mp.TotImmediateFlipBytes);
11059 #endif
11060 }
11061 for (k = 0; k < s->num_active_planes; ++k) {
11062 CalculateFlipSchedule(
11063 &mode_lib->scratch,
11064 display_cfg->plane_descriptors[k].immediate_flip,
11065 0, // use_lb_flip_bw
11066 s->HostVMInefficiencyFactor,
11067 s->Tvm_trips_flip[k],
11068 s->Tr0_trips_flip[k],
11069 s->Tvm_trips_flip_rounded[k],
11070 s->Tr0_trips_flip_rounded[k],
11071 display_cfg->gpuvm_enable,
11072 mode_lib->mp.vm_bytes[k],
11073 mode_lib->mp.PixelPTEBytesPerRow[k],
11074 mode_lib->mp.BandwidthAvailableForImmediateFlip,
11075 mode_lib->mp.TotImmediateFlipBytes,
11076 display_cfg->plane_descriptors[k].pixel_format,
11077 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000),
11078 display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio,
11079 display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio,
11080 mode_lib->mp.Tno_bw[k],
11081 mode_lib->mp.dpte_row_height[k],
11082 mode_lib->mp.dpte_row_height_chroma[k],
11083 mode_lib->mp.use_one_row_for_frame_flip[k],
11084 mode_lib->ip.max_flip_time_us,
11085 mode_lib->ip.max_flip_time_lines,
11086 s->per_pipe_flip_bytes[k],
11087 mode_lib->mp.meta_row_bytes[k],
11088 mode_lib->mp.meta_row_height[k],
11089 mode_lib->mp.meta_row_height_chroma[k],
11090 mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable,
11091
11092 // Output
11093 &mode_lib->mp.dst_y_per_vm_flip[k],
11094 &mode_lib->mp.dst_y_per_row_flip[k],
11095 &mode_lib->mp.final_flip_bw[k],
11096 &mode_lib->mp.ImmediateFlipSupportedForPipe[k]);
11097 }
11098
11099 calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw;
11100 calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->mp.urg_bandwidth_required_flip;
11101 calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw;
11102 calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->mp.non_urg_bandwidth_required_flip;
11103 calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw;
11104 calculate_peak_bandwidth_params->surface_peak_required_bw = s->surface_dummy_bw0;
11105
11106 calculate_peak_bandwidth_params->display_cfg = display_cfg;
11107 calculate_peak_bandwidth_params->inc_flip_bw = 1;
11108 calculate_peak_bandwidth_params->num_active_planes = s->num_active_planes;
11109 calculate_peak_bandwidth_params->num_of_dpp = mode_lib->mp.NoOfDPP;
11110 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p0;
11111 calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_nom_overhead_factor_p1;
11112 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p0;
11113 calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->mp.dcc_dram_bw_pref_overhead_factor_p1;
11114 calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor;
11115 calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor;
11116
11117 calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.SurfaceReadBandwidthLuma;
11118 calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.SurfaceReadBandwidthChroma;
11119 calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma;
11120 calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma;
11121 calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l;
11122 calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->mp.excess_vactive_fill_bw_c;
11123 calculate_peak_bandwidth_params->cursor_bw = mode_lib->mp.cursor_bw;
11124 calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11125 calculate_peak_bandwidth_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11126 calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->mp.prefetch_cursor_bw;
11127 calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->mp.prefetch_vmrow_bw;
11128 calculate_peak_bandwidth_params->flip_bw = mode_lib->mp.final_flip_bw;
11129 calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->mp.UrgentBurstFactorLuma;
11130 calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->mp.UrgentBurstFactorChroma;
11131 calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->mp.UrgentBurstFactorCursor;
11132 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->mp.UrgentBurstFactorLumaPre;
11133 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->mp.UrgentBurstFactorChromaPre;
11134 calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->mp.UrgentBurstFactorCursorPre;
11135
11136 calculate_peak_bandwidth_required(
11137 &mode_lib->scratch,
11138 calculate_peak_bandwidth_params);
11139
11140 calculate_immediate_flip_bandwidth_support(
11141 &mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip, // double* frac_urg_bandwidth_flip
11142 &mode_lib->mp.ImmediateFlipSupported, // bool* flip_bandwidth_support_ok
11143
11144 dml2_core_internal_soc_state_sys_active,
11145 mode_lib->mp.urg_bandwidth_required_flip,
11146 mode_lib->mp.non_urg_bandwidth_required_flip,
11147 mode_lib->mp.urg_bandwidth_available);
11148
11149 if (!mode_lib->mp.ImmediateFlipSupported)
11150 dml2_printf("DML::%s: Bandwidth not sufficient for flip!", __func__);
11151
11152 for (k = 0; k < s->num_active_planes; ++k) {
11153 if (display_cfg->plane_descriptors[k].immediate_flip && mode_lib->mp.ImmediateFlipSupportedForPipe[k] == false) {
11154 mode_lib->mp.ImmediateFlipSupported = false;
11155 #ifdef __DML_VBA_DEBUG__
11156 dml2_printf("DML::%s: Pipe %0d not supporting iflip!\n", __func__, k);
11157 #endif
11158 }
11159 }
11160 } else { // flip or prefetch not support
11161 mode_lib->mp.ImmediateFlipSupported = false;
11162 }
11163
11164 // consider flip support is okay if the flip bw is ok or (when user does't require a iflip and there is no host vm)
11165 must_support_iflip = display_cfg->hostvm_enable || s->immediate_flip_required;
11166 mode_lib->mp.PrefetchAndImmediateFlipSupported = (mode_lib->mp.PrefetchModeSupported == true && (!must_support_iflip || mode_lib->mp.ImmediateFlipSupported));
11167
11168 #ifdef __DML_VBA_DEBUG__
11169 dml2_printf("DML::%s: PrefetchModeSupported = %u\n", __func__, mode_lib->mp.PrefetchModeSupported);
11170 for (k = 0; k < s->num_active_planes; ++k)
11171 dml2_printf("DML::%s: immediate_flip_required[%u] = %u\n", __func__, k, display_cfg->plane_descriptors[k].immediate_flip);
11172 dml2_printf("DML::%s: HostVMEnable = %u\n", __func__, display_cfg->hostvm_enable);
11173 dml2_printf("DML::%s: ImmediateFlipSupported = %u\n", __func__, mode_lib->mp.ImmediateFlipSupported);
11174 dml2_printf("DML::%s: PrefetchAndImmediateFlipSupported = %u\n", __func__, mode_lib->mp.PrefetchAndImmediateFlipSupported);
11175 #endif
11176 dml2_printf("DML::%s: Done one iteration: k=%d, MaxVStartupLines=%u\n", __func__, k, s->MaxVStartupLines[k]);
11177 }
11178
11179 for (k = 0; k < s->num_active_planes; ++k)
11180 dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11181
11182 if (!mode_lib->mp.PrefetchAndImmediateFlipSupported) {
11183 dml2_printf("DML::%s: Bad, Prefetch and flip scheduling solution NOT found!\n", __func__);
11184 } else {
11185 dml2_printf("DML::%s: Good, Prefetch and flip scheduling solution found\n", __func__);
11186
11187 // DCC Configuration
11188 for (k = 0; k < s->num_active_planes; ++k) {
11189 #ifdef __DML_VBA_DEBUG__
11190 dml2_printf("DML::%s: Calculate DCC configuration for surface k=%u\n", __func__, k);
11191 #endif
11192 CalculateDCCConfiguration(
11193 display_cfg->plane_descriptors[k].surface.dcc.enable,
11194 display_cfg->overrides.dcc_programming_assumes_scan_direction_unknown,
11195 display_cfg->plane_descriptors[k].pixel_format,
11196 display_cfg->plane_descriptors[k].surface.plane0.width,
11197 display_cfg->plane_descriptors[k].surface.plane1.width,
11198 display_cfg->plane_descriptors[k].surface.plane0.height,
11199 display_cfg->plane_descriptors[k].surface.plane1.height,
11200 s->NomDETInKByte,
11201 mode_lib->mp.Read256BlockHeightY[k],
11202 mode_lib->mp.Read256BlockHeightC[k],
11203 display_cfg->plane_descriptors[k].surface.tiling,
11204 mode_lib->mp.BytePerPixelY[k],
11205 mode_lib->mp.BytePerPixelC[k],
11206 mode_lib->mp.BytePerPixelInDETY[k],
11207 mode_lib->mp.BytePerPixelInDETC[k],
11208 display_cfg->plane_descriptors[k].composition.rotation_angle,
11209
11210 /* Output */
11211 &mode_lib->mp.RequestLuma[k],
11212 &mode_lib->mp.RequestChroma[k],
11213 &mode_lib->mp.DCCYMaxUncompressedBlock[k],
11214 &mode_lib->mp.DCCCMaxUncompressedBlock[k],
11215 &mode_lib->mp.DCCYMaxCompressedBlock[k],
11216 &mode_lib->mp.DCCCMaxCompressedBlock[k],
11217 &mode_lib->mp.DCCYIndependentBlock[k],
11218 &mode_lib->mp.DCCCIndependentBlock[k]);
11219 }
11220
11221 //Watermarks and NB P-State/DRAM Clock Change Support
11222 s->mmSOCParameters.UrgentLatency = mode_lib->mp.UrgentLatency;
11223 s->mmSOCParameters.ExtraLatency = mode_lib->mp.ExtraLatency;
11224 s->mmSOCParameters.ExtraLatency_sr = mode_lib->mp.ExtraLatency_sr;
11225 s->mmSOCParameters.WritebackLatency = mode_lib->soc.qos_parameters.writeback.base_latency_us;
11226 s->mmSOCParameters.DRAMClockChangeLatency = mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us;
11227 s->mmSOCParameters.FCLKChangeLatency = mode_lib->soc.power_management_parameters.fclk_change_blackout_us;
11228 s->mmSOCParameters.SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11229 s->mmSOCParameters.SREnterPlusExitTime = mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us;
11230 s->mmSOCParameters.SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11231 s->mmSOCParameters.SREnterPlusExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_enter_plus_exit_latency_us;
11232 s->mmSOCParameters.USRRetrainingLatency = 0;
11233 s->mmSOCParameters.SMNLatency = 0;
11234 s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index);
11235 s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index);
11236 s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock;
11237 s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type;
11238
11239 CalculateWatermarks_params->display_cfg = display_cfg;
11240 CalculateWatermarks_params->USRRetrainingRequired = false;
11241 CalculateWatermarks_params->NumberOfActiveSurfaces = s->num_active_planes;
11242 CalculateWatermarks_params->MaxLineBufferLines = mode_lib->ip.max_line_buffer_lines;
11243 CalculateWatermarks_params->LineBufferSize = mode_lib->ip.line_buffer_size_bits;
11244 CalculateWatermarks_params->WritebackInterfaceBufferSize = mode_lib->ip.writeback_interface_buffer_size_kbytes;
11245 CalculateWatermarks_params->DCFCLK = mode_lib->mp.Dcfclk;
11246 CalculateWatermarks_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11247 CalculateWatermarks_params->SynchronizeDRRDisplaysForUCLKPStateChange = display_cfg->overrides.synchronize_ddr_displays_for_uclk_pstate_change;
11248 CalculateWatermarks_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11249 CalculateWatermarks_params->mmSOCParameters = s->mmSOCParameters;
11250 CalculateWatermarks_params->WritebackChunkSize = mode_lib->ip.writeback_chunk_size_kbytes;
11251 CalculateWatermarks_params->SOCCLK = s->SOCCLK;
11252 CalculateWatermarks_params->DCFClkDeepSleep = mode_lib->mp.DCFCLKDeepSleep;
11253 CalculateWatermarks_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11254 CalculateWatermarks_params->DETBufferSizeC = mode_lib->mp.DETBufferSizeC;
11255 CalculateWatermarks_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11256 CalculateWatermarks_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11257 CalculateWatermarks_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11258 CalculateWatermarks_params->SwathWidthC = mode_lib->mp.SwathWidthC;
11259 CalculateWatermarks_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11260 CalculateWatermarks_params->BytePerPixelDETC = mode_lib->mp.BytePerPixelInDETC;
11261 CalculateWatermarks_params->DSTXAfterScaler = mode_lib->mp.DSTXAfterScaler;
11262 CalculateWatermarks_params->DSTYAfterScaler = mode_lib->mp.DSTYAfterScaler;
11263 CalculateWatermarks_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11264 CalculateWatermarks_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11265 CalculateWatermarks_params->meta_row_height_l = mode_lib->mp.meta_row_height;
11266 CalculateWatermarks_params->meta_row_height_c = mode_lib->mp.meta_row_height_chroma;
11267 CalculateWatermarks_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11268
11269 // Output
11270 CalculateWatermarks_params->Watermark = &mode_lib->mp.Watermark;
11271 CalculateWatermarks_params->DRAMClockChangeSupport = mode_lib->mp.DRAMClockChangeSupport;
11272 CalculateWatermarks_params->global_dram_clock_change_supported = &mode_lib->mp.global_dram_clock_change_supported;
11273 CalculateWatermarks_params->MaxActiveDRAMClockChangeLatencySupported = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported;
11274 CalculateWatermarks_params->SubViewportLinesNeededInMALL = mode_lib->mp.SubViewportLinesNeededInMALL;
11275 CalculateWatermarks_params->FCLKChangeSupport = mode_lib->mp.FCLKChangeSupport;
11276 CalculateWatermarks_params->global_fclk_change_supported = &mode_lib->mp.global_fclk_change_supported;
11277 CalculateWatermarks_params->MaxActiveFCLKChangeLatencySupported = &mode_lib->mp.MaxActiveFCLKChangeLatencySupported;
11278 CalculateWatermarks_params->USRRetrainingSupport = &mode_lib->mp.USRRetrainingSupport;
11279 CalculateWatermarks_params->g6_temp_read_support = &mode_lib->mp.g6_temp_read_support;
11280 CalculateWatermarks_params->VActiveLatencyHidingMargin = 0;
11281 CalculateWatermarks_params->VActiveLatencyHidingUs = 0;
11282
11283 CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params);
11284
11285 for (k = 0; k < s->num_active_planes; ++k) {
11286 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
11287 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11288 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark);
11289 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total /
11290 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackFCLKChangeWatermark);
11291 } else {
11292 mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = 0;
11293 mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = 0;
11294 }
11295 }
11296
11297 calculate_pstate_keepout_dst_lines(display_cfg, &mode_lib->mp.Watermark, mode_lib->mp.pstate_keepout_dst_lines);
11298
11299 dml2_printf("DML::%s: DEBUG stream_index = %0d\n", __func__, display_cfg->plane_descriptors[0].stream_index);
11300 dml2_printf("DML::%s: DEBUG PixelClock = %d kHz\n", __func__, (display_cfg->stream_descriptors[display_cfg->plane_descriptors[0].stream_index].timing.pixel_clock_khz));
11301
11302 //Display Pipeline Delivery Time in Prefetch, Groups
11303 CalculatePixelDeliveryTimes(
11304 display_cfg,
11305 cfg_support_info,
11306 s->num_active_planes,
11307 mode_lib->mp.VRatioPrefetchY,
11308 mode_lib->mp.VRatioPrefetchC,
11309 mode_lib->mp.swath_width_luma_ub,
11310 mode_lib->mp.swath_width_chroma_ub,
11311 mode_lib->mp.PSCL_THROUGHPUT,
11312 mode_lib->mp.PSCL_THROUGHPUT_CHROMA,
11313 mode_lib->mp.Dppclk,
11314 mode_lib->mp.BytePerPixelC,
11315 mode_lib->mp.req_per_swath_ub_l,
11316 mode_lib->mp.req_per_swath_ub_c,
11317
11318 /* Output */
11319 mode_lib->mp.DisplayPipeLineDeliveryTimeLuma,
11320 mode_lib->mp.DisplayPipeLineDeliveryTimeChroma,
11321 mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch,
11322 mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch,
11323 mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma,
11324 mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma,
11325 mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch,
11326 mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch);
11327
11328 CalculateMetaAndPTETimes_params->scratch = &mode_lib->scratch;
11329 CalculateMetaAndPTETimes_params->display_cfg = display_cfg;
11330 CalculateMetaAndPTETimes_params->NumberOfActiveSurfaces = s->num_active_planes;
11331 CalculateMetaAndPTETimes_params->use_one_row_for_frame = mode_lib->mp.use_one_row_for_frame;
11332 CalculateMetaAndPTETimes_params->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank;
11333 CalculateMetaAndPTETimes_params->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip;
11334 CalculateMetaAndPTETimes_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11335 CalculateMetaAndPTETimes_params->BytePerPixelC = mode_lib->mp.BytePerPixelC;
11336 CalculateMetaAndPTETimes_params->dpte_row_height = mode_lib->mp.dpte_row_height;
11337 CalculateMetaAndPTETimes_params->dpte_row_height_chroma = mode_lib->mp.dpte_row_height_chroma;
11338 CalculateMetaAndPTETimes_params->dpte_group_bytes = mode_lib->mp.dpte_group_bytes;
11339 CalculateMetaAndPTETimes_params->PTERequestSizeY = mode_lib->mp.PTERequestSizeY;
11340 CalculateMetaAndPTETimes_params->PTERequestSizeC = mode_lib->mp.PTERequestSizeC;
11341 CalculateMetaAndPTETimes_params->PixelPTEReqWidthY = mode_lib->mp.PixelPTEReqWidthY;
11342 CalculateMetaAndPTETimes_params->PixelPTEReqHeightY = mode_lib->mp.PixelPTEReqHeightY;
11343 CalculateMetaAndPTETimes_params->PixelPTEReqWidthC = mode_lib->mp.PixelPTEReqWidthC;
11344 CalculateMetaAndPTETimes_params->PixelPTEReqHeightC = mode_lib->mp.PixelPTEReqHeightC;
11345 CalculateMetaAndPTETimes_params->dpte_row_width_luma_ub = mode_lib->mp.dpte_row_width_luma_ub;
11346 CalculateMetaAndPTETimes_params->dpte_row_width_chroma_ub = mode_lib->mp.dpte_row_width_chroma_ub;
11347 CalculateMetaAndPTETimes_params->tdlut_groups_per_2row_ub = s->tdlut_groups_per_2row_ub;
11348 CalculateMetaAndPTETimes_params->mrq_present = mode_lib->ip.dcn_mrq_present;
11349
11350 CalculateMetaAndPTETimes_params->MetaChunkSize = mode_lib->ip.meta_chunk_size_kbytes;
11351 CalculateMetaAndPTETimes_params->MinMetaChunkSizeBytes = mode_lib->ip.min_meta_chunk_size_bytes;
11352 CalculateMetaAndPTETimes_params->meta_row_width = mode_lib->mp.meta_row_width;
11353 CalculateMetaAndPTETimes_params->meta_row_width_chroma = mode_lib->mp.meta_row_width_chroma;
11354 CalculateMetaAndPTETimes_params->meta_row_height = mode_lib->mp.meta_row_height;
11355 CalculateMetaAndPTETimes_params->meta_row_height_chroma = mode_lib->mp.meta_row_height_chroma;
11356 CalculateMetaAndPTETimes_params->meta_req_width = mode_lib->mp.meta_req_width;
11357 CalculateMetaAndPTETimes_params->meta_req_width_chroma = mode_lib->mp.meta_req_width_chroma;
11358 CalculateMetaAndPTETimes_params->meta_req_height = mode_lib->mp.meta_req_height;
11359 CalculateMetaAndPTETimes_params->meta_req_height_chroma = mode_lib->mp.meta_req_height_chroma;
11360
11361 CalculateMetaAndPTETimes_params->time_per_tdlut_group = mode_lib->mp.time_per_tdlut_group;
11362 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_L = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L;
11363 CalculateMetaAndPTETimes_params->DST_Y_PER_PTE_ROW_NOM_C = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C;
11364 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_luma = mode_lib->mp.time_per_pte_group_nom_luma;
11365 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_luma = mode_lib->mp.time_per_pte_group_vblank_luma;
11366 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_luma = mode_lib->mp.time_per_pte_group_flip_luma;
11367 CalculateMetaAndPTETimes_params->time_per_pte_group_nom_chroma = mode_lib->mp.time_per_pte_group_nom_chroma;
11368 CalculateMetaAndPTETimes_params->time_per_pte_group_vblank_chroma = mode_lib->mp.time_per_pte_group_vblank_chroma;
11369 CalculateMetaAndPTETimes_params->time_per_pte_group_flip_chroma = mode_lib->mp.time_per_pte_group_flip_chroma;
11370 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_L = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L;
11371 CalculateMetaAndPTETimes_params->DST_Y_PER_META_ROW_NOM_C = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C;
11372 CalculateMetaAndPTETimes_params->TimePerMetaChunkNominal = mode_lib->mp.TimePerMetaChunkNominal;
11373 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkNominal = mode_lib->mp.TimePerChromaMetaChunkNominal;
11374 CalculateMetaAndPTETimes_params->TimePerMetaChunkVBlank = mode_lib->mp.TimePerMetaChunkVBlank;
11375 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkVBlank = mode_lib->mp.TimePerChromaMetaChunkVBlank;
11376 CalculateMetaAndPTETimes_params->TimePerMetaChunkFlip = mode_lib->mp.TimePerMetaChunkFlip;
11377 CalculateMetaAndPTETimes_params->TimePerChromaMetaChunkFlip = mode_lib->mp.TimePerChromaMetaChunkFlip;
11378
11379 CalculateMetaAndPTETimes(CalculateMetaAndPTETimes_params);
11380
11381 CalculateVMGroupAndRequestTimes(
11382 display_cfg,
11383 s->num_active_planes,
11384 mode_lib->mp.BytePerPixelC,
11385 mode_lib->mp.dst_y_per_vm_vblank,
11386 mode_lib->mp.dst_y_per_vm_flip,
11387 mode_lib->mp.dpte_row_width_luma_ub,
11388 mode_lib->mp.dpte_row_width_chroma_ub,
11389 mode_lib->mp.vm_group_bytes,
11390 mode_lib->mp.dpde0_bytes_per_frame_ub_l,
11391 mode_lib->mp.dpde0_bytes_per_frame_ub_c,
11392 s->tdlut_pte_bytes_per_frame,
11393 mode_lib->mp.meta_pte_bytes_per_frame_ub_l,
11394 mode_lib->mp.meta_pte_bytes_per_frame_ub_c,
11395 mode_lib->ip.dcn_mrq_present,
11396
11397 /* Output */
11398 mode_lib->mp.TimePerVMGroupVBlank,
11399 mode_lib->mp.TimePerVMGroupFlip,
11400 mode_lib->mp.TimePerVMRequestVBlank,
11401 mode_lib->mp.TimePerVMRequestFlip);
11402
11403 // VStartup Adjustment
11404 for (k = 0; k < s->num_active_planes; ++k) {
11405 bool isInterlaceTiming;
11406
11407 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TWait[k] + mode_lib->mp.ExtraLatency;
11408 if (!display_cfg->plane_descriptors[k].dynamic_meta_data.enable)
11409 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.TCalc + mode_lib->mp.MinTTUVBlank[k];
11410
11411 #ifdef __DML_VBA_DEBUG__
11412 dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f (before vstartup margin)\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11413 #endif
11414 s->Tvstartup_margin = (s->MaxVStartupLines[k] - mode_lib->mp.VStartupMin[k]) * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000);
11415 mode_lib->mp.MinTTUVBlank[k] = mode_lib->mp.MinTTUVBlank[k] + s->Tvstartup_margin;
11416
11417 #ifdef __DML_VBA_DEBUG__
11418 dml2_printf("DML::%s: k=%u, Tvstartup_margin = %f\n", __func__, k, s->Tvstartup_margin);
11419 dml2_printf("DML::%s: k=%u, MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]);
11420 dml2_printf("DML::%s: k=%u, MinTTUVBlank = %f\n", __func__, k, mode_lib->mp.MinTTUVBlank[k]);
11421 #endif
11422
11423 mode_lib->mp.Tdmdl[k] = mode_lib->mp.Tdmdl[k] + s->Tvstartup_margin;
11424 if (display_cfg->plane_descriptors[k].dynamic_meta_data.enable && mode_lib->ip.dynamic_metadata_vm_enabled) {
11425 mode_lib->mp.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k] + s->Tvstartup_margin;
11426 }
11427
11428 isInterlaceTiming = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.interlaced && !mode_lib->ip.ptoi_supported);
11429
11430 // The actual positioning of the vstartup
11431 mode_lib->mp.VStartup[k] = (isInterlaceTiming ? (2 * s->MaxVStartupLines[k]) : s->MaxVStartupLines[k]);
11432
11433 s->dlg_vblank_start = ((isInterlaceTiming ? math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch) / 2.0, 1.0) :
11434 display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total) - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11435 s->LSetup = math_floor2(4.0 * mode_lib->mp.TSetup[k] / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), 1.0) / 4.0;
11436 s->blank_lines_remaining = (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active) - mode_lib->mp.VStartup[k];
11437
11438 if (s->blank_lines_remaining < 0) {
11439 dml2_printf("ERROR: Vstartup is larger than vblank!?\n");
11440 s->blank_lines_remaining = 0;
11441 DML2_ASSERT(0);
11442 }
11443 mode_lib->mp.MIN_DST_Y_NEXT_START[k] = s->dlg_vblank_start + s->blank_lines_remaining + s->LSetup;
11444
11445 // debug only
11446 if (((mode_lib->mp.VUpdateOffsetPix[k] + mode_lib->mp.VUpdateWidthPix[k] + mode_lib->mp.VReadyOffsetPix[k]) / (double) display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) <=
11447 (isInterlaceTiming ?
11448 math_floor2((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]) / 2.0, 1.0) :
11449 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch - mode_lib->mp.VStartup[k]))) {
11450 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = true;
11451 } else {
11452 mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k] = false;
11453 }
11454 #ifdef __DML_VBA_DEBUG__
11455 dml2_printf("DML::%s: k=%u, VStartup = %u (max)\n", __func__, k, mode_lib->mp.VStartup[k]);
11456 dml2_printf("DML::%s: k=%u, VStartupMin = %u (max)\n", __func__, k, mode_lib->mp.VStartupMin[k]);
11457 dml2_printf("DML::%s: k=%u, VUpdateOffsetPix = %u\n", __func__, k, mode_lib->mp.VUpdateOffsetPix[k]);
11458 dml2_printf("DML::%s: k=%u, VUpdateWidthPix = %u\n", __func__, k, mode_lib->mp.VUpdateWidthPix[k]);
11459 dml2_printf("DML::%s: k=%u, VReadyOffsetPix = %u\n", __func__, k, mode_lib->mp.VReadyOffsetPix[k]);
11460 dml2_printf("DML::%s: k=%u, HTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total);
11461 dml2_printf("DML::%s: k=%u, VTotal = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_total);
11462 dml2_printf("DML::%s: k=%u, VActive = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_active);
11463 dml2_printf("DML::%s: k=%u, VFrontPorch = %u\n", __func__, k, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.v_front_porch);
11464 dml2_printf("DML::%s: k=%u, TSetup = %f\n", __func__, k, mode_lib->mp.TSetup[k]);
11465 dml2_printf("DML::%s: k=%u, MIN_DST_Y_NEXT_START = %f\n", __func__, k, mode_lib->mp.MIN_DST_Y_NEXT_START[k]);
11466 dml2_printf("DML::%s: k=%u, VREADY_AT_OR_AFTER_VSYNC = %u\n", __func__, k, mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k]);
11467 #endif
11468 }
11469
11470 //Maximum Bandwidth Used
11471 s->TotalWRBandwidth = 0;
11472 s->WRBandwidth = 0;
11473 for (k = 0; k < s->num_active_planes; ++k) {
11474 if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_32) {
11475 s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width /
11476 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4;
11477 } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) {
11478 s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width /
11479 (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8;
11480 }
11481 s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth;
11482 }
11483
11484 mode_lib->mp.TotalDataReadBandwidth = 0;
11485 for (k = 0; k < s->num_active_planes; ++k) {
11486 mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.SurfaceReadBandwidthLuma[k] + mode_lib->mp.SurfaceReadBandwidthChroma[k];
11487 #ifdef __DML_VBA_DEBUG__
11488 dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth);
11489 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]);
11490 dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]);
11491 #endif
11492 }
11493
11494 CalculateStutterEfficiency_params->display_cfg = display_cfg;
11495 CalculateStutterEfficiency_params->CompressedBufferSizeInkByte = mode_lib->mp.CompressedBufferSizeInkByte;
11496 CalculateStutterEfficiency_params->UnboundedRequestEnabled = mode_lib->mp.UnboundedRequestEnabled;
11497 CalculateStutterEfficiency_params->MetaFIFOSizeInKEntries = mode_lib->ip.meta_fifo_size_in_kentries;
11498 CalculateStutterEfficiency_params->ZeroSizeBufferEntries = mode_lib->ip.zero_size_buffer_entries;
11499 CalculateStutterEfficiency_params->PixelChunkSizeInKByte = mode_lib->ip.pixel_chunk_size_kbytes;
11500 CalculateStutterEfficiency_params->NumberOfActiveSurfaces = s->num_active_planes;
11501 CalculateStutterEfficiency_params->ROBBufferSizeInKByte = mode_lib->ip.rob_buffer_size_kbytes;
11502 CalculateStutterEfficiency_params->TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth;
11503 CalculateStutterEfficiency_params->DCFCLK = mode_lib->mp.Dcfclk;
11504 CalculateStutterEfficiency_params->ReturnBW = mode_lib->mp.urg_bandwidth_available_min[dml2_core_internal_soc_state_sys_active];
11505 CalculateStutterEfficiency_params->CompbufReservedSpace64B = mode_lib->mp.compbuf_reserved_space_64b;
11506 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = mode_lib->ip.compbuf_reserved_space_zs;
11507 CalculateStutterEfficiency_params->SRExitTime = mode_lib->soc.power_management_parameters.stutter_exit_latency_us;
11508 CalculateStutterEfficiency_params->SRExitZ8Time = mode_lib->soc.power_management_parameters.z8_stutter_exit_latency_us;
11509 CalculateStutterEfficiency_params->SynchronizeTimings = display_cfg->overrides.synchronize_timings;
11510 CalculateStutterEfficiency_params->StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.StutterEnterPlusExitWatermark;
11511 CalculateStutterEfficiency_params->Z8StutterEnterPlusExitWatermark = mode_lib->mp.Watermark.Z8StutterEnterPlusExitWatermark;
11512 CalculateStutterEfficiency_params->ProgressiveToInterlaceUnitInOPP = mode_lib->ip.ptoi_supported;
11513 CalculateStutterEfficiency_params->MinTTUVBlank = mode_lib->mp.MinTTUVBlank;
11514 CalculateStutterEfficiency_params->DPPPerSurface = mode_lib->mp.NoOfDPP;
11515 CalculateStutterEfficiency_params->DETBufferSizeY = mode_lib->mp.DETBufferSizeY;
11516 CalculateStutterEfficiency_params->BytePerPixelY = mode_lib->mp.BytePerPixelY;
11517 CalculateStutterEfficiency_params->BytePerPixelDETY = mode_lib->mp.BytePerPixelInDETY;
11518 CalculateStutterEfficiency_params->SwathWidthY = mode_lib->mp.SwathWidthY;
11519 CalculateStutterEfficiency_params->SwathHeightY = mode_lib->mp.SwathHeightY;
11520 CalculateStutterEfficiency_params->SwathHeightC = mode_lib->mp.SwathHeightC;
11521 CalculateStutterEfficiency_params->BlockHeight256BytesY = mode_lib->mp.Read256BlockHeightY;
11522 CalculateStutterEfficiency_params->BlockWidth256BytesY = mode_lib->mp.Read256BlockWidthY;
11523 CalculateStutterEfficiency_params->BlockHeight256BytesC = mode_lib->mp.Read256BlockHeightC;
11524 CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC;
11525 CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock;
11526 CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock;
11527 CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.SurfaceReadBandwidthLuma;
11528 CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.SurfaceReadBandwidthChroma;
11529 CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw;
11530 CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw;
11531 CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present;
11532
11533 // output
11534 CalculateStutterEfficiency_params->StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.StutterEfficiencyNotIncludingVBlank;
11535 CalculateStutterEfficiency_params->StutterEfficiency = &mode_lib->mp.StutterEfficiency;
11536 CalculateStutterEfficiency_params->NumberOfStutterBurstsPerFrame = &mode_lib->mp.NumberOfStutterBurstsPerFrame;
11537 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
11538 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiency;
11539 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
11540 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriod;
11541 CalculateStutterEfficiency_params->DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE = &mode_lib->mp.DCHUBBUB_ARB_CSTATE_MAX_CAP_MODE;
11542
11543 // Stutter Efficiency
11544 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
11545
11546 #ifdef __DML_VBA_ALLOW_DELTA__
11547 // Calculate z8 stutter eff assuming 0 reserved space
11548 CalculateStutterEfficiency_params->CompbufReservedSpace64B = 0;
11549 CalculateStutterEfficiency_params->CompbufReservedSpaceZs = 0;
11550
11551 CalculateStutterEfficiency_params->Z8StutterEfficiencyNotIncludingVBlank = &mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase;
11552 CalculateStutterEfficiency_params->Z8StutterEfficiency = &mode_lib->mp.Z8StutterEfficiencyBestCase;
11553 CalculateStutterEfficiency_params->Z8NumberOfStutterBurstsPerFrame = &mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase;
11554 CalculateStutterEfficiency_params->StutterPeriod = &mode_lib->mp.StutterPeriodBestCase;
11555
11556 // Stutter Efficiency
11557 CalculateStutterEfficiency(&mode_lib->scratch, CalculateStutterEfficiency_params);
11558 #else
11559 mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlankBestCase = mode_lib->mp.Z8StutterEfficiencyNotIncludingVBlank;
11560 mode_lib->mp.Z8StutterEfficiencyBestCase = mode_lib->mp.Z8StutterEfficiency;
11561 mode_lib->mp.Z8NumberOfStutterBurstsPerFrameBestCase = mode_lib->mp.Z8NumberOfStutterBurstsPerFrame;
11562 mode_lib->mp.StutterPeriodBestCase = mode_lib->mp.StutterPeriod;
11563 #endif
11564 } // PrefetchAndImmediateFlipSupported
11565
11566 max_uclk_mhz = mode_lib->soc.clk_table.uclk.clk_values_khz[mode_lib->soc.clk_table.uclk.num_clk_values - 1] / 1000.0;
11567 min_return_latency_in_DCFCLK_cycles = (min_return_uclk_cycles / max_uclk_mhz + min_return_fclk_cycles / max_fclk_mhz) * hard_minimum_dcfclk_mhz;
11568 mode_lib->mp.min_return_latency_in_dcfclk = (unsigned int)min_return_latency_in_DCFCLK_cycles;
11569 mode_lib->mp.dcfclk_deep_sleep_hysteresis = (unsigned int)math_max2(32, (double)mode_lib->ip.pixel_chunk_size_kbytes * 1024 * 3 / 4 / 64 - min_return_latency_in_DCFCLK_cycles);
11570 DML2_ASSERT(mode_lib->mp.dcfclk_deep_sleep_hysteresis < 256);
11571
11572 #ifdef __DML_VBA_DEBUG__
11573 dml2_printf("DML::%s: max_fclk_mhz = %f\n", __func__, max_fclk_mhz);
11574 dml2_printf("DML::%s: max_uclk_mhz = %f\n", __func__, max_uclk_mhz);
11575 dml2_printf("DML::%s: hard_minimum_dcfclk_mhz = %f\n", __func__, hard_minimum_dcfclk_mhz);
11576 dml2_printf("DML::%s: min_return_uclk_cycles = %d\n", __func__, min_return_uclk_cycles);
11577 dml2_printf("DML::%s: min_return_fclk_cycles = %d\n", __func__, min_return_fclk_cycles);
11578 dml2_printf("DML::%s: min_return_latency_in_DCFCLK_cycles = %f\n", __func__, min_return_latency_in_DCFCLK_cycles);
11579 dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d \n", __func__, mode_lib->mp.dcfclk_deep_sleep_hysteresis);
11580 dml2_printf("DML::%s: --- END --- \n", __func__);
11581 #endif
11582 return (in_out_params->mode_lib->mp.PrefetchAndImmediateFlipSupported);
11583 }
11584
dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex * in_out_params)11585 bool dml2_core_calcs_mode_programming_ex(struct dml2_core_calcs_mode_programming_ex *in_out_params)
11586 {
11587 dml2_printf("DML::%s: ------------- START ----------\n", __func__);
11588 bool result = dml_core_mode_programming(in_out_params);
11589
11590 dml2_printf("DML::%s: result = %0d\n", __func__, result);
11591 dml2_printf("DML::%s: ------------- DONE ----------\n", __func__);
11592 return result;
11593 }
11594
dml2_core_calcs_get_dpte_row_height(unsigned int * dpte_row_height,struct dml2_core_internal_display_mode_lib * mode_lib,bool is_plane1,enum dml2_source_format_class SourcePixelFormat,enum dml2_swizzle_mode SurfaceTiling,enum dml2_rotation_angle ScanDirection,unsigned int pitch,unsigned int GPUVMMinPageSizeKBytes)11595 void dml2_core_calcs_get_dpte_row_height(
11596 unsigned int *dpte_row_height,
11597 struct dml2_core_internal_display_mode_lib *mode_lib,
11598 bool is_plane1,
11599 enum dml2_source_format_class SourcePixelFormat,
11600 enum dml2_swizzle_mode SurfaceTiling,
11601 enum dml2_rotation_angle ScanDirection,
11602 unsigned int pitch,
11603 unsigned int GPUVMMinPageSizeKBytes)
11604 {
11605 unsigned int BytePerPixelY;
11606 unsigned int BytePerPixelC;
11607 double BytePerPixelInDETY;
11608 double BytePerPixelInDETC;
11609 unsigned int BlockHeight256BytesY;
11610 unsigned int BlockHeight256BytesC;
11611 unsigned int BlockWidth256BytesY;
11612 unsigned int BlockWidth256BytesC;
11613 unsigned int MacroTileWidthY;
11614 unsigned int MacroTileWidthC;
11615 unsigned int MacroTileHeightY;
11616 unsigned int MacroTileHeightC;
11617 bool surf_linear_128_l = false;
11618 bool surf_linear_128_c = false;
11619
11620 CalculateBytePerPixelAndBlockSizes(
11621 SourcePixelFormat,
11622 SurfaceTiling,
11623 pitch,
11624 pitch,
11625
11626 /* Output */
11627 &BytePerPixelY,
11628 &BytePerPixelC,
11629 &BytePerPixelInDETY,
11630 &BytePerPixelInDETC,
11631 &BlockHeight256BytesY,
11632 &BlockHeight256BytesC,
11633 &BlockWidth256BytesY,
11634 &BlockWidth256BytesC,
11635 &MacroTileHeightY,
11636 &MacroTileHeightC,
11637 &MacroTileWidthY,
11638 &MacroTileWidthC,
11639 &surf_linear_128_l,
11640 &surf_linear_128_c);
11641
11642 unsigned int BytePerPixel = is_plane1 ? BytePerPixelC : BytePerPixelY;
11643 unsigned int BlockHeight256Bytes = is_plane1 ? BlockHeight256BytesC : BlockHeight256BytesY;
11644 unsigned int BlockWidth256Bytes = is_plane1 ? BlockWidth256BytesC : BlockWidth256BytesY;
11645 unsigned int MacroTileWidth = is_plane1 ? MacroTileWidthC : MacroTileWidthY;
11646 unsigned int MacroTileHeight = is_plane1 ? MacroTileHeightC : MacroTileHeightY;
11647 unsigned int PTEBufferSizeInRequests = is_plane1 ? mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma : mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma;
11648 #ifdef __DML_VBA_DEBUG__
11649 dml2_printf("DML: %s: is_plane1 = %u\n", __func__, is_plane1);
11650 dml2_printf("DML: %s: BytePerPixel = %u\n", __func__, BytePerPixel);
11651 dml2_printf("DML: %s: BlockHeight256Bytes = %u\n", __func__, BlockHeight256Bytes);
11652 dml2_printf("DML: %s: BlockWidth256Bytes = %u\n", __func__, BlockWidth256Bytes);
11653 dml2_printf("DML: %s: MacroTileWidth = %u\n", __func__, MacroTileWidth);
11654 dml2_printf("DML: %s: MacroTileHeight = %u\n", __func__, MacroTileHeight);
11655 dml2_printf("DML: %s: PTEBufferSizeInRequests = %u\n", __func__, PTEBufferSizeInRequests);
11656 dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_luma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_luma);
11657 dml2_printf("DML: %s: dpte_buffer_size_in_pte_reqs_chroma = %u\n", __func__, mode_lib->ip.dpte_buffer_size_in_pte_reqs_chroma);
11658 dml2_printf("DML: %s: GPUVMMinPageSizeKBytes = %u\n", __func__, GPUVMMinPageSizeKBytes);
11659 #endif
11660 unsigned int dummy_integer[21];
11661
11662 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportStationary = 0;
11663 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCEnable = 0;
11664 mode_lib->scratch.calculate_vm_and_row_bytes_params.NumberOfDPPs = 1;
11665 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockHeight256Bytes = BlockHeight256Bytes;
11666 mode_lib->scratch.calculate_vm_and_row_bytes_params.BlockWidth256Bytes = BlockWidth256Bytes;
11667 mode_lib->scratch.calculate_vm_and_row_bytes_params.SourcePixelFormat = SourcePixelFormat;
11668 mode_lib->scratch.calculate_vm_and_row_bytes_params.SurfaceTiling = SurfaceTiling;
11669 mode_lib->scratch.calculate_vm_and_row_bytes_params.BytePerPixel = BytePerPixel;
11670 mode_lib->scratch.calculate_vm_and_row_bytes_params.RotationAngle = ScanDirection;
11671 mode_lib->scratch.calculate_vm_and_row_bytes_params.SwathWidth = 0;
11672 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportHeight = 0;
11673 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportXStart = 0;
11674 mode_lib->scratch.calculate_vm_and_row_bytes_params.ViewportYStart = 0;
11675 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMEnable = 1;
11676 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMaxPageTableLevels = 4;
11677 mode_lib->scratch.calculate_vm_and_row_bytes_params.GPUVMMinPageSizeKBytes = GPUVMMinPageSizeKBytes;
11678 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTEBufferSizeInRequests = PTEBufferSizeInRequests;
11679 mode_lib->scratch.calculate_vm_and_row_bytes_params.Pitch = pitch;
11680 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileWidth = MacroTileWidth;
11681 mode_lib->scratch.calculate_vm_and_row_bytes_params.MacroTileHeight = MacroTileHeight;
11682 mode_lib->scratch.calculate_vm_and_row_bytes_params.is_phantom = 0;
11683 mode_lib->scratch.calculate_vm_and_row_bytes_params.DCCMetaPitch = 0;
11684 mode_lib->scratch.calculate_vm_and_row_bytes_params.mrq_present = 0;
11685
11686 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow = &dummy_integer[1];
11687 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRowStorage = &dummy_integer[2];
11688 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub = &dummy_integer[3];
11689 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height = dpte_row_height;
11690 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_linear = &dummy_integer[4];
11691 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEBytesPerRow_one_row_per_frame = &dummy_integer[5];
11692 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_width_ub_one_row_per_frame = &dummy_integer[6];
11693 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpte_row_height_one_row_per_frame = &dummy_integer[7];
11694 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_width = &dummy_integer[8];
11695 mode_lib->scratch.calculate_vm_and_row_bytes_params.vmpg_height = &dummy_integer[9];
11696 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqWidth = &dummy_integer[11];
11697 mode_lib->scratch.calculate_vm_and_row_bytes_params.PixelPTEReqHeight = &dummy_integer[12];
11698 mode_lib->scratch.calculate_vm_and_row_bytes_params.PTERequestSize = &dummy_integer[13];
11699 mode_lib->scratch.calculate_vm_and_row_bytes_params.dpde0_bytes_per_frame_ub = &dummy_integer[14];
11700
11701 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_bytes = &dummy_integer[15];
11702 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestWidth = &dummy_integer[16];
11703 mode_lib->scratch.calculate_vm_and_row_bytes_params.MetaRequestHeight = &dummy_integer[17];
11704 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_width = &dummy_integer[18];
11705 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_row_height = &dummy_integer[19];
11706 mode_lib->scratch.calculate_vm_and_row_bytes_params.meta_pte_bytes_per_frame_ub = &dummy_integer[20];
11707
11708 // just supply with enough parameters to calculate dpte
11709 CalculateVMAndRowBytes(&mode_lib->scratch.calculate_vm_and_row_bytes_params);
11710
11711 #ifdef __DML_VBA_DEBUG__
11712 dml2_printf("DML: %s: dpte_row_height = %u\n", __func__, *dpte_row_height);
11713 #endif
11714 }
11715
is_dual_plane(enum dml2_source_format_class source_format)11716 static bool is_dual_plane(enum dml2_source_format_class source_format)
11717 {
11718 bool ret_val = 0;
11719
11720 if ((source_format == dml2_420_12) || (source_format == dml2_420_8) || (source_format == dml2_420_10) || (source_format == dml2_rgbe_alpha))
11721 ret_val = 1;
11722
11723 return ret_val;
11724 }
11725
dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)11726 static unsigned int dml_get_plane_idx(const struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int pipe_idx)
11727 {
11728 unsigned int plane_idx = mode_lib->mp.pipe_plane[pipe_idx];
11729 return plane_idx;
11730 }
11731
rq_dlg_get_wm_regs(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * wm_regs)11732 static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *wm_regs)
11733 {
11734 double refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
11735
11736 wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz);
11737 wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz);
11738 wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz);
11739 wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz);
11740 wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz);
11741 wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz);
11742 wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz);
11743 wm_regs->refcyc_per_trip_to_mem = (unsigned int)(mode_lib->mp.UrgentLatency * refclk_freq_in_mhz);
11744 wm_regs->refcyc_per_meta_trip_to_mem = (unsigned int)(mode_lib->mp.MetaTripToMemory * refclk_freq_in_mhz);
11745 wm_regs->frac_urg_bw_flip = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip * 1000);
11746 wm_regs->frac_urg_bw_nom = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidth * 1000);
11747 wm_regs->frac_urg_bw_mall = (unsigned int)(mode_lib->mp.FractionOfUrgentBandwidthMALL * 1000);
11748 }
11749
log_and_substract_if_non_zero(unsigned int a,unsigned int subtrahend)11750 static unsigned int log_and_substract_if_non_zero(unsigned int a, unsigned int subtrahend)
11751 {
11752 if (a == 0)
11753 return 0;
11754
11755 return (math_log2_approx(a) - subtrahend);
11756 }
11757
dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs * cursor_dlg_regs,const struct dml2_get_cursor_dlg_reg * p)11758 void dml2_core_calcs_cursor_dlg_reg(struct dml2_cursor_dlg_regs *cursor_dlg_regs, const struct dml2_get_cursor_dlg_reg *p)
11759 {
11760 int dst_x_offset = (int) ((p->cursor_x_position + (p->cursor_stereo_en == 0 ? 0 : math_max2(p->cursor_primary_offset, p->cursor_secondary_offset)) -
11761 (p->cursor_hotspot_x * (p->cursor_2x_magnify == 0 ? 1 : 2))) * p->dlg_refclk_mhz / p->pixel_rate_mhz / p->hratio);
11762 cursor_dlg_regs->dst_x_offset = (unsigned int) ((dst_x_offset > 0) ? dst_x_offset : 0);
11763
11764 #ifdef __DML_VBA_DEBUG__
11765 dml2_printf("DML_DLG::%s: cursor_x_position=%d\n", __func__, p->cursor_x_position);
11766 dml2_printf("DML_DLG::%s: dlg_refclk_mhz=%f\n", __func__, p->dlg_refclk_mhz);
11767 dml2_printf("DML_DLG::%s: pixel_rate_mhz=%f\n", __func__, p->pixel_rate_mhz);
11768 dml2_printf("DML_DLG::%s: dst_x_offset=%d\n", __func__, dst_x_offset);
11769 dml2_printf("DML_DLG::%s: dst_x_offset=%d (reg)\n", __func__, cursor_dlg_regs->dst_x_offset);
11770 #endif
11771
11772 cursor_dlg_regs->chunk_hdl_adjust = 3;
11773 cursor_dlg_regs->dst_y_offset = 0;
11774
11775 cursor_dlg_regs->qos_level_fixed = 8;
11776 cursor_dlg_regs->qos_ramp_disable = 0;
11777 }
11778
rq_dlg_get_rq_reg(struct dml2_display_rq_regs * rq_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int pipe_idx)11779 static void rq_dlg_get_rq_reg(struct dml2_display_rq_regs *rq_regs,
11780 const struct dml2_display_cfg *display_cfg,
11781 const struct dml2_core_internal_display_mode_lib *mode_lib,
11782 unsigned int pipe_idx)
11783 {
11784 unsigned int plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
11785 enum dml2_source_format_class source_format = display_cfg->plane_descriptors[plane_idx].pixel_format;
11786 enum dml2_swizzle_mode sw_mode = display_cfg->plane_descriptors[plane_idx].surface.tiling;
11787 bool dual_plane = is_dual_plane((enum dml2_source_format_class)(source_format));
11788
11789 unsigned int pixel_chunk_bytes = 0;
11790 unsigned int min_pixel_chunk_bytes = 0;
11791 unsigned int dpte_group_bytes = 0;
11792 unsigned int mpte_group_bytes = 0;
11793
11794 unsigned int p1_pixel_chunk_bytes = 0;
11795 unsigned int p1_min_pixel_chunk_bytes = 0;
11796 unsigned int p1_dpte_group_bytes = 0;
11797 unsigned int p1_mpte_group_bytes = 0;
11798
11799 unsigned int detile_buf_plane1_addr = 0;
11800 unsigned int detile_buf_size_in_bytes;
11801 double stored_swath_l_bytes;
11802 double stored_swath_c_bytes;
11803 bool is_phantom_pipe;
11804
11805 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] start\n", __func__, pipe_idx);
11806
11807 pixel_chunk_bytes = (unsigned int)(mode_lib->ip.pixel_chunk_size_kbytes * 1024);
11808 min_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.min_pixel_chunk_size_bytes);
11809
11810 if (pixel_chunk_bytes == 64 * 1024)
11811 min_pixel_chunk_bytes = 0;
11812
11813 dpte_group_bytes = (unsigned int)(dml_get_dpte_group_size_in_bytes(mode_lib, pipe_idx));
11814 mpte_group_bytes = (unsigned int)(dml_get_vm_group_size_in_bytes(mode_lib, pipe_idx));
11815
11816 p1_pixel_chunk_bytes = pixel_chunk_bytes;
11817 p1_min_pixel_chunk_bytes = min_pixel_chunk_bytes;
11818 p1_dpte_group_bytes = dpte_group_bytes;
11819 p1_mpte_group_bytes = mpte_group_bytes;
11820
11821 if (source_format == dml2_rgbe_alpha)
11822 p1_pixel_chunk_bytes = (unsigned int)(mode_lib->ip.alpha_pixel_chunk_size_kbytes * 1024);
11823
11824 rq_regs->unbounded_request_enabled = dml_get_unbounded_request_enabled(mode_lib);
11825 rq_regs->rq_regs_l.chunk_size = log_and_substract_if_non_zero(pixel_chunk_bytes, 10);
11826 rq_regs->rq_regs_c.chunk_size = log_and_substract_if_non_zero(p1_pixel_chunk_bytes, 10);
11827
11828 if (min_pixel_chunk_bytes == 0)
11829 rq_regs->rq_regs_l.min_chunk_size = 0;
11830 else
11831 rq_regs->rq_regs_l.min_chunk_size = log_and_substract_if_non_zero(min_pixel_chunk_bytes, 8 - 1);
11832
11833 if (p1_min_pixel_chunk_bytes == 0)
11834 rq_regs->rq_regs_c.min_chunk_size = 0;
11835 else
11836 rq_regs->rq_regs_c.min_chunk_size = log_and_substract_if_non_zero(p1_min_pixel_chunk_bytes, 8 - 1);
11837
11838 rq_regs->rq_regs_l.dpte_group_size = log_and_substract_if_non_zero(dpte_group_bytes, 6);
11839 rq_regs->rq_regs_l.mpte_group_size = log_and_substract_if_non_zero(mpte_group_bytes, 6);
11840 rq_regs->rq_regs_c.dpte_group_size = log_and_substract_if_non_zero(p1_dpte_group_bytes, 6);
11841 rq_regs->rq_regs_c.mpte_group_size = log_and_substract_if_non_zero(p1_mpte_group_bytes, 6);
11842
11843 detile_buf_size_in_bytes = (unsigned int)(dml_get_det_buffer_size_kbytes(mode_lib, pipe_idx) * 1024);
11844
11845 if (sw_mode == dml2_sw_linear && display_cfg->gpuvm_enable) {
11846 unsigned int p0_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_l(mode_lib, pipe_idx));
11847 #ifdef __DML_VBA_DEBUG__
11848 dml2_printf("DML_DLG: %s: p0_pte_row_height_linear = %u\n", __func__, p0_pte_row_height_linear);
11849 #endif
11850 DML2_ASSERT(p0_pte_row_height_linear >= 8);
11851
11852 rq_regs->rq_regs_l.pte_row_height_linear = math_log2_approx(p0_pte_row_height_linear) - 3;
11853 if (dual_plane) {
11854 unsigned int p1_pte_row_height_linear = (unsigned int)(dml_get_dpte_row_height_linear_c(mode_lib, pipe_idx));
11855
11856 #ifdef __DML_VBA_DEBUG__
11857 dml2_printf("DML_DLG: %s: p1_pte_row_height_linear = %u\n", __func__, p1_pte_row_height_linear);
11858 #endif
11859 if (sw_mode == dml2_sw_linear) {
11860 DML2_ASSERT(p1_pte_row_height_linear >= 8);
11861 }
11862 rq_regs->rq_regs_c.pte_row_height_linear = math_log2_approx(p1_pte_row_height_linear) - 3;
11863 }
11864 } else {
11865 rq_regs->rq_regs_l.pte_row_height_linear = 0;
11866 rq_regs->rq_regs_c.pte_row_height_linear = 0;
11867 }
11868
11869 rq_regs->rq_regs_l.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_l(mode_lib, pipe_idx), 0);
11870 rq_regs->rq_regs_c.swath_height = log_and_substract_if_non_zero(dml_get_swath_height_c(mode_lib, pipe_idx), 0);
11871
11872 // FIXME_DCN4, programming guide has dGPU condition
11873 if (pixel_chunk_bytes >= 32 * 1024 || (dual_plane && p1_pixel_chunk_bytes >= 32 * 1024)) { //32kb
11874 rq_regs->drq_expansion_mode = 0;
11875 } else {
11876 rq_regs->drq_expansion_mode = 2;
11877 }
11878 rq_regs->prq_expansion_mode = 1;
11879 rq_regs->crq_expansion_mode = 1;
11880 rq_regs->mrq_expansion_mode = 1;
11881
11882 stored_swath_l_bytes = dml_get_det_stored_buffer_size_l_bytes(mode_lib, pipe_idx);
11883 stored_swath_c_bytes = dml_get_det_stored_buffer_size_c_bytes(mode_lib, pipe_idx);
11884 is_phantom_pipe = dml_get_is_phantom_pipe(display_cfg, mode_lib, pipe_idx);
11885
11886 // Note: detile_buf_plane1_addr is in unit of 1KB
11887 if (dual_plane) {
11888 if (is_phantom_pipe) {
11889 detile_buf_plane1_addr = (unsigned int)((1024.0 * 1024.0) / 2.0 / 1024.0); // half to chroma
11890 } else {
11891 if (stored_swath_l_bytes / stored_swath_c_bytes <= 1.5) {
11892 detile_buf_plane1_addr = (unsigned int)(detile_buf_size_in_bytes / 2.0 / 1024.0); // half to chroma
11893 #ifdef __DML_VBA_DEBUG__
11894 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/2 to chroma)\n", __func__, detile_buf_plane1_addr);
11895 #endif
11896 } else {
11897 detile_buf_plane1_addr = (unsigned int)(dml_round_to_multiple((unsigned int)((2.0 * detile_buf_size_in_bytes) / 3.0), 1024, 0) / 1024.0); // 2/3 to luma
11898 #ifdef __DML_VBA_DEBUG__
11899 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d (1/3 chroma)\n", __func__, detile_buf_plane1_addr);
11900 #endif
11901 }
11902 }
11903 }
11904 rq_regs->plane1_base_address = detile_buf_plane1_addr;
11905
11906 #ifdef __DML_VBA_DEBUG__
11907 dml2_printf("DML_DLG: %s: is_phantom_pipe = %d\n", __func__, is_phantom_pipe);
11908 dml2_printf("DML_DLG: %s: stored_swath_l_bytes = %f\n", __func__, stored_swath_l_bytes);
11909 dml2_printf("DML_DLG: %s: stored_swath_c_bytes = %f\n", __func__, stored_swath_c_bytes);
11910 dml2_printf("DML_DLG: %s: detile_buf_size_in_bytes = %d\n", __func__, detile_buf_size_in_bytes);
11911 dml2_printf("DML_DLG: %s: detile_buf_plane1_addr = %d\n", __func__, detile_buf_plane1_addr);
11912 dml2_printf("DML_DLG: %s: plane1_base_address = %d\n", __func__, rq_regs->plane1_base_address);
11913 #endif
11914 //dml2_printf_rq_regs_st(rq_regs);
11915 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
11916 }
11917
rq_dlg_get_dlg_reg(struct dml2_core_internal_scratch * s,struct dml2_display_dlg_regs * disp_dlg_regs,struct dml2_display_ttu_regs * disp_ttu_regs,const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,const unsigned int pipe_idx)11918 static void rq_dlg_get_dlg_reg(
11919 struct dml2_core_internal_scratch *s,
11920 struct dml2_display_dlg_regs *disp_dlg_regs,
11921 struct dml2_display_ttu_regs *disp_ttu_regs,
11922 const struct dml2_display_cfg *display_cfg,
11923 const struct dml2_core_internal_display_mode_lib *mode_lib,
11924 const unsigned int pipe_idx)
11925 {
11926 struct dml2_core_shared_rq_dlg_get_dlg_reg_locals *l = &s->rq_dlg_get_dlg_reg_locals;
11927
11928 memset(l, 0, sizeof(struct dml2_core_shared_rq_dlg_get_dlg_reg_locals));
11929
11930 dml2_printf("DML_DLG::%s: Calculation for pipe_idx=%d\n", __func__, pipe_idx);
11931
11932 l->plane_idx = dml_get_plane_idx(mode_lib, pipe_idx);
11933 dml2_assert(l->plane_idx < DML2_MAX_PLANES);
11934
11935 l->source_format = dml2_444_8;
11936 l->odm_mode = dml2_odm_mode_bypass;
11937 l->dual_plane = false;
11938 l->htotal = 0;
11939 l->hactive = 0;
11940 l->hblank_end = 0;
11941 l->vblank_end = 0;
11942 l->interlaced = false;
11943 l->pclk_freq_in_mhz = 0.0;
11944 l->refclk_freq_in_mhz = (display_cfg->overrides.hw.dlg_ref_clk_mhz > 0) ? (double)display_cfg->overrides.hw.dlg_ref_clk_mhz : mode_lib->soc.dchub_refclk_mhz;
11945 l->ref_freq_to_pix_freq = 0.0;
11946
11947 if (l->plane_idx < DML2_MAX_PLANES) {
11948
11949 l->timing = &display_cfg->stream_descriptors[display_cfg->plane_descriptors[l->plane_idx].stream_index].timing;
11950 l->source_format = display_cfg->plane_descriptors[l->plane_idx].pixel_format;
11951 l->odm_mode = mode_lib->mp.ODMMode[l->plane_idx];
11952
11953 l->dual_plane = is_dual_plane(l->source_format);
11954
11955 l->htotal = l->timing->h_total;
11956 l->hactive = l->timing->h_active;
11957 l->hblank_end = l->timing->h_blank_end;
11958 l->vblank_end = l->timing->v_blank_end;
11959 l->interlaced = l->timing->interlaced;
11960 l->pclk_freq_in_mhz = (double)l->timing->pixel_clock_khz / 1000;
11961 l->ref_freq_to_pix_freq = l->refclk_freq_in_mhz / l->pclk_freq_in_mhz;
11962
11963 dml2_printf("DML_DLG::%s: plane_idx = %d\n", __func__, l->plane_idx);
11964 dml2_printf("DML_DLG: %s: htotal = %d\n", __func__, l->htotal);
11965 dml2_printf("DML_DLG: %s: refclk_freq_in_mhz = %3.2f\n", __func__, l->refclk_freq_in_mhz);
11966 dml2_printf("DML_DLG: %s: dlg_ref_clk_mhz = %3.2f\n", __func__, display_cfg->overrides.hw.dlg_ref_clk_mhz);
11967 dml2_printf("DML_DLG: %s: soc.refclk_mhz = %3.2f\n", __func__, mode_lib->soc.dchub_refclk_mhz);
11968 dml2_printf("DML_DLG: %s: pclk_freq_in_mhz = %3.2f\n", __func__, l->pclk_freq_in_mhz);
11969 dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
11970 dml2_printf("DML_DLG: %s: interlaced = %d\n", __func__, l->interlaced);
11971
11972 DML2_ASSERT(l->refclk_freq_in_mhz != 0);
11973 DML2_ASSERT(l->pclk_freq_in_mhz != 0);
11974 DML2_ASSERT(l->ref_freq_to_pix_freq < 4.0);
11975
11976 // Need to figure out which side of odm combine we're in
11977 // Assume the pipe instance under the same plane is in order
11978
11979 if (l->odm_mode == dml2_odm_mode_bypass) {
11980 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)((double)l->hblank_end * l->ref_freq_to_pix_freq);
11981 } else if (l->odm_mode == dml2_odm_mode_combine_2to1 || l->odm_mode == dml2_odm_mode_combine_3to1 || l->odm_mode == dml2_odm_mode_combine_4to1) {
11982 // find out how many pipe are in this plane
11983 l->num_active_pipes = mode_lib->mp.num_active_pipes;
11984 l->first_pipe_idx_in_plane = DML2_MAX_PLANES;
11985 l->pipe_idx_in_combine = 0; // pipe index within the plane
11986 l->odm_combine_factor = 2;
11987
11988 if (l->odm_mode == dml2_odm_mode_combine_3to1)
11989 l->odm_combine_factor = 3;
11990 else if (l->odm_mode == dml2_odm_mode_combine_4to1)
11991 l->odm_combine_factor = 4;
11992
11993 for (unsigned int i = 0; i < l->num_active_pipes; i++) {
11994 if (dml_get_plane_idx(mode_lib, i) == l->plane_idx) {
11995 if (i < l->first_pipe_idx_in_plane) {
11996 l->first_pipe_idx_in_plane = i;
11997 }
11998 }
11999 }
12000 l->pipe_idx_in_combine = pipe_idx - l->first_pipe_idx_in_plane; // DML assumes the pipes in the same plane will have continuous indexing (i.e. plane 0 use pipe 0, 1, and plane 1 uses pipe 2, 3, etc.)
12001
12002 disp_dlg_regs->refcyc_h_blank_end = (unsigned int)(((double)l->hblank_end + (double)l->pipe_idx_in_combine * (double)l->hactive / (double)l->odm_combine_factor) * l->ref_freq_to_pix_freq);
12003 dml2_printf("DML_DLG: %s: pipe_idx = %d\n", __func__, pipe_idx);
12004 dml2_printf("DML_DLG: %s: first_pipe_idx_in_plane = %d\n", __func__, l->first_pipe_idx_in_plane);
12005 dml2_printf("DML_DLG: %s: pipe_idx_in_combine = %d\n", __func__, l->pipe_idx_in_combine);
12006 dml2_printf("DML_DLG: %s: odm_combine_factor = %d\n", __func__, l->odm_combine_factor);
12007 }
12008 dml2_printf("DML_DLG: %s: refcyc_h_blank_end = %d\n", __func__, disp_dlg_regs->refcyc_h_blank_end);
12009
12010 DML2_ASSERT(disp_dlg_regs->refcyc_h_blank_end < (unsigned int)math_pow(2, 13));
12011
12012 disp_dlg_regs->ref_freq_to_pix_freq = (unsigned int)(l->ref_freq_to_pix_freq * math_pow(2, 19));
12013 disp_dlg_regs->refcyc_per_htotal = (unsigned int)(l->ref_freq_to_pix_freq * (double)l->htotal * math_pow(2, 8));
12014 disp_dlg_regs->dlg_vblank_end = l->interlaced ? (l->vblank_end / 2) : l->vblank_end; // 15 bits
12015
12016 l->min_ttu_vblank = mode_lib->mp.MinTTUVBlank[mode_lib->mp.pipe_plane[pipe_idx]];
12017 l->min_dst_y_next_start = (unsigned int)(mode_lib->mp.MIN_DST_Y_NEXT_START[mode_lib->mp.pipe_plane[pipe_idx]]);
12018
12019 dml2_printf("DML_DLG: %s: min_ttu_vblank (us) = %3.2f\n", __func__, l->min_ttu_vblank);
12020 dml2_printf("DML_DLG: %s: min_dst_y_next_start = %d\n", __func__, l->min_dst_y_next_start);
12021 dml2_printf("DML_DLG: %s: ref_freq_to_pix_freq = %3.2f\n", __func__, l->ref_freq_to_pix_freq);
12022
12023 l->vready_after_vcount0 = (unsigned int)(mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[mode_lib->mp.pipe_plane[pipe_idx]]);
12024 disp_dlg_regs->vready_after_vcount0 = l->vready_after_vcount0;
12025
12026 dml2_printf("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, disp_dlg_regs->vready_after_vcount0);
12027
12028 l->dst_x_after_scaler = (unsigned int)(mode_lib->mp.DSTXAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12029 l->dst_y_after_scaler = (unsigned int)(mode_lib->mp.DSTYAfterScaler[mode_lib->mp.pipe_plane[pipe_idx]]);
12030
12031 dml2_printf("DML_DLG: %s: dst_x_after_scaler = %d\n", __func__, l->dst_x_after_scaler);
12032 dml2_printf("DML_DLG: %s: dst_y_after_scaler = %d\n", __func__, l->dst_y_after_scaler);
12033
12034 l->dst_y_prefetch = mode_lib->mp.dst_y_prefetch[mode_lib->mp.pipe_plane[pipe_idx]];
12035 l->dst_y_per_vm_vblank = mode_lib->mp.dst_y_per_vm_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12036 l->dst_y_per_row_vblank = mode_lib->mp.dst_y_per_row_vblank[mode_lib->mp.pipe_plane[pipe_idx]];
12037 l->dst_y_per_vm_flip = mode_lib->mp.dst_y_per_vm_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12038 l->dst_y_per_row_flip = mode_lib->mp.dst_y_per_row_flip[mode_lib->mp.pipe_plane[pipe_idx]];
12039
12040 dml2_printf("DML_DLG: %s: dst_y_prefetch (after rnd) = %3.2f\n", __func__, l->dst_y_prefetch);
12041 dml2_printf("DML_DLG: %s: dst_y_per_vm_flip = %3.2f\n", __func__, l->dst_y_per_vm_flip);
12042 dml2_printf("DML_DLG: %s: dst_y_per_row_flip = %3.2f\n", __func__, l->dst_y_per_row_flip);
12043 dml2_printf("DML_DLG: %s: dst_y_per_vm_vblank = %3.2f\n", __func__, l->dst_y_per_vm_vblank);
12044 dml2_printf("DML_DLG: %s: dst_y_per_row_vblank = %3.2f\n", __func__, l->dst_y_per_row_vblank);
12045
12046 if (l->dst_y_prefetch > 0 && l->dst_y_per_vm_vblank > 0 && l->dst_y_per_row_vblank > 0) {
12047 DML2_ASSERT(l->dst_y_prefetch > (l->dst_y_per_vm_vblank + l->dst_y_per_row_vblank));
12048 }
12049
12050 l->vratio_pre_l = mode_lib->mp.VRatioPrefetchY[mode_lib->mp.pipe_plane[pipe_idx]];
12051 l->vratio_pre_c = mode_lib->mp.VRatioPrefetchC[mode_lib->mp.pipe_plane[pipe_idx]];
12052
12053 dml2_printf("DML_DLG: %s: vratio_pre_l = %3.2f\n", __func__, l->vratio_pre_l);
12054 dml2_printf("DML_DLG: %s: vratio_pre_c = %3.2f\n", __func__, l->vratio_pre_c);
12055
12056 // Active
12057 l->refcyc_per_line_delivery_pre_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12058 l->refcyc_per_line_delivery_l = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12059
12060 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_l);
12061 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_l = %3.2f\n", __func__, l->refcyc_per_line_delivery_l);
12062
12063 l->refcyc_per_line_delivery_pre_c = 0.0;
12064 l->refcyc_per_line_delivery_c = 0.0;
12065
12066 if (l->dual_plane) {
12067 l->refcyc_per_line_delivery_pre_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12068 l->refcyc_per_line_delivery_c = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12069
12070 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_pre_c);
12071 dml2_printf("DML_DLG: %s: refcyc_per_line_delivery_c = %3.2f\n", __func__, l->refcyc_per_line_delivery_c);
12072 }
12073
12074 disp_dlg_regs->refcyc_per_vm_dmdata = (unsigned int)(mode_lib->mp.Tdmdl_vm[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12075 disp_dlg_regs->dmdata_dl_delta = (unsigned int)(mode_lib->mp.Tdmdl[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12076
12077 l->refcyc_per_req_delivery_pre_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12078 l->refcyc_per_req_delivery_l = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12079
12080 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_l);
12081 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_l = %3.2f\n", __func__, l->refcyc_per_req_delivery_l);
12082
12083 l->refcyc_per_req_delivery_pre_c = 0.0;
12084 l->refcyc_per_req_delivery_c = 0.0;
12085 if (l->dual_plane) {
12086 l->refcyc_per_req_delivery_pre_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12087 l->refcyc_per_req_delivery_c = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12088
12089 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_pre_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_pre_c);
12090 dml2_printf("DML_DLG: %s: refcyc_per_req_delivery_c = %3.2f\n", __func__, l->refcyc_per_req_delivery_c);
12091 }
12092
12093 // TTU - Cursor
12094 DML2_ASSERT(display_cfg->plane_descriptors[l->plane_idx].cursor.num_cursors <= 1);
12095
12096 // Assign to register structures
12097 disp_dlg_regs->min_dst_y_next_start = (unsigned int)((double)l->min_dst_y_next_start * math_pow(2, 2));
12098 DML2_ASSERT(disp_dlg_regs->min_dst_y_next_start < (unsigned int)math_pow(2, 18));
12099
12100 disp_dlg_regs->dst_y_after_scaler = l->dst_y_after_scaler; // in terms of line
12101 disp_dlg_regs->refcyc_x_after_scaler = (unsigned int)((double)l->dst_x_after_scaler * l->ref_freq_to_pix_freq); // in terms of refclk
12102 disp_dlg_regs->dst_y_prefetch = (unsigned int)(l->dst_y_prefetch * math_pow(2, 2));
12103 disp_dlg_regs->dst_y_per_vm_vblank = (unsigned int)(l->dst_y_per_vm_vblank * math_pow(2, 2));
12104 disp_dlg_regs->dst_y_per_row_vblank = (unsigned int)(l->dst_y_per_row_vblank * math_pow(2, 2));
12105 disp_dlg_regs->dst_y_per_vm_flip = (unsigned int)(l->dst_y_per_vm_flip * math_pow(2, 2));
12106 disp_dlg_regs->dst_y_per_row_flip = (unsigned int)(l->dst_y_per_row_flip * math_pow(2, 2));
12107
12108 disp_dlg_regs->vratio_prefetch = (unsigned int)(l->vratio_pre_l * math_pow(2, 19));
12109 disp_dlg_regs->vratio_prefetch_c = (unsigned int)(l->vratio_pre_c * math_pow(2, 19));
12110
12111 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_vblank);
12112 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank);
12113 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip);
12114 dml2_printf("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip);
12115
12116 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(mode_lib->mp.TimePerVMGroupVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12117 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(mode_lib->mp.TimePerVMGroupFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz);
12118 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(mode_lib->mp.TimePerVMRequestVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12119 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(mode_lib->mp.TimePerVMRequestFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz * math_pow(2, 10));
12120
12121 l->dst_y_per_pte_row_nom_l = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12122 l->dst_y_per_pte_row_nom_c = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12123 l->refcyc_per_pte_group_nom_l = mode_lib->mp.time_per_pte_group_nom_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12124 l->refcyc_per_pte_group_nom_c = mode_lib->mp.time_per_pte_group_nom_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12125 l->refcyc_per_pte_group_vblank_l = mode_lib->mp.time_per_pte_group_vblank_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12126 l->refcyc_per_pte_group_vblank_c = mode_lib->mp.time_per_pte_group_vblank_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12127 l->refcyc_per_pte_group_flip_l = mode_lib->mp.time_per_pte_group_flip_luma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12128 l->refcyc_per_pte_group_flip_c = mode_lib->mp.time_per_pte_group_flip_chroma[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12129 l->refcyc_per_tdlut_group = mode_lib->mp.time_per_tdlut_group[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12130
12131 disp_dlg_regs->dst_y_per_pte_row_nom_l = (unsigned int)(l->dst_y_per_pte_row_nom_l * math_pow(2, 2));
12132 disp_dlg_regs->dst_y_per_pte_row_nom_c = (unsigned int)(l->dst_y_per_pte_row_nom_c * math_pow(2, 2));
12133
12134 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(l->refcyc_per_pte_group_nom_l);
12135 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(l->refcyc_per_pte_group_nom_c);
12136 disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(l->refcyc_per_pte_group_vblank_l);
12137 disp_dlg_regs->refcyc_per_pte_group_vblank_c = (unsigned int)(l->refcyc_per_pte_group_vblank_c);
12138 disp_dlg_regs->refcyc_per_pte_group_flip_l = (unsigned int)(l->refcyc_per_pte_group_flip_l);
12139 disp_dlg_regs->refcyc_per_pte_group_flip_c = (unsigned int)(l->refcyc_per_pte_group_flip_c);
12140 disp_dlg_regs->refcyc_per_line_delivery_pre_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_l, 1);
12141 disp_dlg_regs->refcyc_per_line_delivery_l = (unsigned int)math_floor2(l->refcyc_per_line_delivery_l, 1);
12142 disp_dlg_regs->refcyc_per_line_delivery_pre_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_pre_c, 1);
12143 disp_dlg_regs->refcyc_per_line_delivery_c = (unsigned int)math_floor2(l->refcyc_per_line_delivery_c, 1);
12144
12145 l->dst_y_per_meta_row_nom_l = mode_lib->mp.DST_Y_PER_META_ROW_NOM_L[mode_lib->mp.pipe_plane[pipe_idx]];
12146 l->dst_y_per_meta_row_nom_c = mode_lib->mp.DST_Y_PER_META_ROW_NOM_C[mode_lib->mp.pipe_plane[pipe_idx]];
12147 l->refcyc_per_meta_chunk_nom_l = mode_lib->mp.TimePerMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12148 l->refcyc_per_meta_chunk_nom_c = mode_lib->mp.TimePerChromaMetaChunkNominal[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12149 l->refcyc_per_meta_chunk_vblank_l = mode_lib->mp.TimePerMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12150 l->refcyc_per_meta_chunk_vblank_c = mode_lib->mp.TimePerChromaMetaChunkVBlank[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12151 l->refcyc_per_meta_chunk_flip_l = mode_lib->mp.TimePerMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12152 l->refcyc_per_meta_chunk_flip_c = mode_lib->mp.TimePerChromaMetaChunkFlip[mode_lib->mp.pipe_plane[pipe_idx]] * l->refclk_freq_in_mhz;
12153
12154 disp_dlg_regs->dst_y_per_meta_row_nom_l = (unsigned int)(l->dst_y_per_meta_row_nom_l * math_pow(2, 2));
12155 disp_dlg_regs->dst_y_per_meta_row_nom_c = (unsigned int)(l->dst_y_per_meta_row_nom_c * math_pow(2, 2));
12156 disp_dlg_regs->refcyc_per_meta_chunk_nom_l = (unsigned int)(l->refcyc_per_meta_chunk_nom_l);
12157 disp_dlg_regs->refcyc_per_meta_chunk_nom_c = (unsigned int)(l->refcyc_per_meta_chunk_nom_c);
12158 disp_dlg_regs->refcyc_per_meta_chunk_vblank_l = (unsigned int)(l->refcyc_per_meta_chunk_vblank_l);
12159 disp_dlg_regs->refcyc_per_meta_chunk_vblank_c = (unsigned int)(l->refcyc_per_meta_chunk_vblank_c);
12160 disp_dlg_regs->refcyc_per_meta_chunk_flip_l = (unsigned int)(l->refcyc_per_meta_chunk_flip_l);
12161 disp_dlg_regs->refcyc_per_meta_chunk_flip_c = (unsigned int)(l->refcyc_per_meta_chunk_flip_c);
12162
12163 disp_dlg_regs->refcyc_per_tdlut_group = (unsigned int)(l->refcyc_per_tdlut_group);
12164 disp_dlg_regs->dst_y_delta_drq_limit = 0x7fff; // off
12165
12166 disp_ttu_regs->refcyc_per_req_delivery_pre_l = (unsigned int)(l->refcyc_per_req_delivery_pre_l * math_pow(2, 10));
12167 disp_ttu_regs->refcyc_per_req_delivery_l = (unsigned int)(l->refcyc_per_req_delivery_l * math_pow(2, 10));
12168 disp_ttu_regs->refcyc_per_req_delivery_pre_c = (unsigned int)(l->refcyc_per_req_delivery_pre_c * math_pow(2, 10));
12169 disp_ttu_regs->refcyc_per_req_delivery_c = (unsigned int)(l->refcyc_per_req_delivery_c * math_pow(2, 10));
12170 disp_ttu_regs->qos_level_low_wm = 0;
12171
12172 disp_ttu_regs->qos_level_high_wm = (unsigned int)(4.0 * (double)l->htotal * l->ref_freq_to_pix_freq);
12173
12174 disp_ttu_regs->qos_level_flip = 14;
12175 disp_ttu_regs->qos_level_fixed_l = 8;
12176 disp_ttu_regs->qos_level_fixed_c = 8;
12177 disp_ttu_regs->qos_ramp_disable_l = 0;
12178 disp_ttu_regs->qos_ramp_disable_c = 0;
12179 disp_ttu_regs->min_ttu_vblank = (unsigned int)(l->min_ttu_vblank * l->refclk_freq_in_mhz);
12180
12181 // CHECK for HW registers' range, DML2_ASSERT or clamp
12182 DML2_ASSERT(l->refcyc_per_req_delivery_pre_l < math_pow(2, 13));
12183 DML2_ASSERT(l->refcyc_per_req_delivery_l < math_pow(2, 13));
12184 DML2_ASSERT(l->refcyc_per_req_delivery_pre_c < math_pow(2, 13));
12185 DML2_ASSERT(l->refcyc_per_req_delivery_c < math_pow(2, 13));
12186 if (disp_dlg_regs->refcyc_per_vm_group_vblank >= (unsigned int)math_pow(2, 23))
12187 disp_dlg_regs->refcyc_per_vm_group_vblank = (unsigned int)(math_pow(2, 23) - 1);
12188
12189 if (disp_dlg_regs->refcyc_per_vm_group_flip >= (unsigned int)math_pow(2, 23))
12190 disp_dlg_regs->refcyc_per_vm_group_flip = (unsigned int)(math_pow(2, 23) - 1);
12191
12192 if (disp_dlg_regs->refcyc_per_vm_req_vblank >= (unsigned int)math_pow(2, 23))
12193 disp_dlg_regs->refcyc_per_vm_req_vblank = (unsigned int)(math_pow(2, 23) - 1);
12194
12195 if (disp_dlg_regs->refcyc_per_vm_req_flip >= (unsigned int)math_pow(2, 23))
12196 disp_dlg_regs->refcyc_per_vm_req_flip = (unsigned int)(math_pow(2, 23) - 1);
12197
12198
12199 DML2_ASSERT(disp_dlg_regs->dst_y_after_scaler < (unsigned int)8);
12200 DML2_ASSERT(disp_dlg_regs->refcyc_x_after_scaler < (unsigned int)math_pow(2, 13));
12201
12202 if (disp_dlg_regs->dst_y_per_pte_row_nom_l >= (unsigned int)math_pow(2, 17)) {
12203 dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_L %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_l, (unsigned int)math_pow(2, 17) - 1);
12204 l->dst_y_per_pte_row_nom_l = (unsigned int)math_pow(2, 17) - 1;
12205 }
12206 if (l->dual_plane) {
12207 if (disp_dlg_regs->dst_y_per_pte_row_nom_c >= (unsigned int)math_pow(2, 17)) {
12208 dml2_printf("DML_DLG: %s: Warning DST_Y_PER_PTE_ROW_NOM_C %u > register max U15.2 %u, clamp to max\n", __func__, disp_dlg_regs->dst_y_per_pte_row_nom_c, (unsigned int)math_pow(2, 17) - 1);
12209 l->dst_y_per_pte_row_nom_c = (unsigned int)math_pow(2, 17) - 1;
12210 }
12211 }
12212
12213 if (disp_dlg_regs->refcyc_per_pte_group_nom_l >= (unsigned int)math_pow(2, 23))
12214 disp_dlg_regs->refcyc_per_pte_group_nom_l = (unsigned int)(math_pow(2, 23) - 1);
12215 if (l->dual_plane) {
12216 if (disp_dlg_regs->refcyc_per_pte_group_nom_c >= (unsigned int)math_pow(2, 23))
12217 disp_dlg_regs->refcyc_per_pte_group_nom_c = (unsigned int)(math_pow(2, 23) - 1);
12218 }
12219 DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_l < (unsigned int)math_pow(2, 13));
12220 if (l->dual_plane) {
12221 DML2_ASSERT(disp_dlg_regs->refcyc_per_pte_group_vblank_c < (unsigned int)math_pow(2, 13));
12222 }
12223
12224 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_l < (unsigned int)math_pow(2, 13));
12225 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_l < (unsigned int)math_pow(2, 13));
12226 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_pre_c < (unsigned int)math_pow(2, 13));
12227 DML2_ASSERT(disp_dlg_regs->refcyc_per_line_delivery_c < (unsigned int)math_pow(2, 13));
12228 DML2_ASSERT(disp_ttu_regs->qos_level_low_wm < (unsigned int)math_pow(2, 14));
12229 DML2_ASSERT(disp_ttu_regs->qos_level_high_wm < (unsigned int)math_pow(2, 14));
12230 DML2_ASSERT(disp_ttu_regs->min_ttu_vblank < (unsigned int)math_pow(2, 24));
12231
12232 dml2_printf("DML_DLG::%s: Calculation for pipe[%d] done\n", __func__, pipe_idx);
12233
12234 }
12235 }
12236
rq_dlg_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * arb_param)12237 static void rq_dlg_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *arb_param)
12238 {
12239 arb_param->max_req_outstanding = mode_lib->soc.max_outstanding_reqs;
12240 arb_param->min_req_outstanding = mode_lib->soc.max_outstanding_reqs; // turn off the sat level feature if this set to max
12241 arb_param->sdpif_request_rate_limit = (3 * mode_lib->ip.words_per_channel * mode_lib->soc.clk_table.dram_config.channel_count) / 4;
12242 arb_param->sdpif_request_rate_limit = arb_param->sdpif_request_rate_limit < 96 ? 96 : arb_param->sdpif_request_rate_limit;
12243 arb_param->sat_level_us = 60;
12244 arb_param->hvm_max_qos_commit_threshold = 0xf;
12245 arb_param->hvm_min_req_outstand_commit_threshold = 0xa;
12246 arb_param->compbuf_reserved_space_kbytes = dml_get_compbuf_reserved_space_64b(mode_lib) * 64 / 1024;
12247 arb_param->compbuf_size = mode_lib->mp.CompressedBufferSizeInkByte / mode_lib->ip.compressed_buffer_segment_size_in_kbytes;
12248 arb_param->allow_sdpif_rate_limit_when_cstate_req = dml_get_hw_debug5(mode_lib);
12249 arb_param->dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
12250
12251 #ifdef __DML_VBA_DEBUG__
12252 dml2_printf("DML::%s: max_req_outstanding = %d\n", __func__, arb_param->max_req_outstanding);
12253 dml2_printf("DML::%s: sdpif_request_rate_limit = %d\n", __func__, arb_param->sdpif_request_rate_limit);
12254 dml2_printf("DML::%s: compbuf_reserved_space_kbytes = %d\n", __func__, arb_param->compbuf_reserved_space_kbytes);
12255 dml2_printf("DML::%s: allow_sdpif_rate_limit_when_cstate_req = %d\n", __func__, arb_param->allow_sdpif_rate_limit_when_cstate_req);
12256 dml2_printf("DML::%s: dcfclk_deep_sleep_hysteresis = %d\n", __func__, arb_param->dcfclk_deep_sleep_hysteresis);
12257 #endif
12258
12259 }
12260
dml2_core_calcs_get_watermarks(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_watermark_regs * out)12261 void dml2_core_calcs_get_watermarks(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_dchub_watermark_regs *out)
12262 {
12263 rq_dlg_get_wm_regs(display_cfg, mode_lib, out);
12264 }
12265
dml2_core_calcs_get_arb_params(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_arb_regs * out)12266 void dml2_core_calcs_get_arb_params(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_arb_regs *out)
12267 {
12268 rq_dlg_get_arb_params(display_cfg, mode_lib, out);
12269 }
12270
dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg * display_cfg,struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_dchub_per_pipe_register_set * out,int pipe_index)12271 void dml2_core_calcs_get_pipe_regs(const struct dml2_display_cfg *display_cfg,
12272 struct dml2_core_internal_display_mode_lib *mode_lib,
12273 struct dml2_dchub_per_pipe_register_set *out, int pipe_index)
12274 {
12275 rq_dlg_get_rq_reg(&out->rq_regs, display_cfg, mode_lib, pipe_index);
12276 rq_dlg_get_dlg_reg(&mode_lib->scratch, &out->dlg_regs, &out->ttu_regs, display_cfg, mode_lib, pipe_index);
12277 out->det_size = dml_get_det_buffer_size_kbytes(mode_lib, pipe_index) / mode_lib->ip.config_return_buffer_segment_size_in_kbytes;
12278 }
12279
dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,union dml2_global_sync_programming * out,int pipe_index)12280 void dml2_core_calcs_get_global_sync_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, union dml2_global_sync_programming *out, int pipe_index)
12281 {
12282 out->dcn4x.vready_offset_pixels = dml_get_vready_offset(mode_lib, pipe_index);
12283 out->dcn4x.vstartup_lines = dml_get_vstartup_calculated(mode_lib, pipe_index);
12284 out->dcn4x.vupdate_offset_pixels = dml_get_vupdate_offset(mode_lib, pipe_index);
12285 out->dcn4x.vupdate_vupdate_width_pixels = dml_get_vupdate_width(mode_lib, pipe_index);
12286 out->dcn4x.pstate_keepout_start_lines = dml_get_pstate_keepout_dst_lines(mode_lib, pipe_index);
12287 }
12288
dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_per_stream_programming * out,int pipe_index)12289 void dml2_core_calcs_get_stream_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_per_stream_programming *out, int pipe_index)
12290 {
12291 dml2_core_calcs_get_global_sync_programming(mode_lib, &out->global_sync, pipe_index);
12292 }
12293
dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,struct dmub_cmd_fams2_global_config * fams2_global_config)12294 void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12295 const struct display_configuation_with_meta *display_cfg,
12296 struct dmub_cmd_fams2_global_config *fams2_global_config)
12297 {
12298 fams2_global_config->features.bits.enable = display_cfg->stage3.fams2_required;
12299
12300 if (fams2_global_config->features.bits.enable) {
12301 fams2_global_config->features.bits.enable_stall_recovery = true;
12302 fams2_global_config->features.bits.allow_delay_check_mode = FAMS2_ALLOW_DELAY_CHECK_FROM_START;
12303
12304 fams2_global_config->max_allow_delay_us = mode_lib->ip_caps.fams2.max_allow_delay_us;
12305 fams2_global_config->lock_wait_time_us = mode_lib->ip_caps.fams2.lock_timeout_us;
12306 fams2_global_config->recovery_timeout_us = mode_lib->ip_caps.fams2.recovery_timeout_us;
12307 fams2_global_config->hwfq_flip_programming_delay_us = mode_lib->ip_caps.fams2.flip_programming_delay_us;
12308
12309 fams2_global_config->num_streams = display_cfg->display_config.num_streams;
12310 }
12311 }
12312
dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib * mode_lib,const struct display_configuation_with_meta * display_cfg,struct dmub_fams2_stream_static_state * fams2_programming,enum dml2_uclk_pstate_support_method pstate_method,int plane_index)12313 void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib,
12314 const struct display_configuation_with_meta *display_cfg,
12315 struct dmub_fams2_stream_static_state *fams2_programming,
12316 enum dml2_uclk_pstate_support_method pstate_method,
12317 int plane_index)
12318 {
12319 const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index];
12320 const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index];
12321 const struct dml2_fams2_meta *stream_fams2_meta = &display_cfg->stage3.stream_fams2_meta[plane_descriptor->stream_index];
12322
12323 unsigned int i;
12324
12325 if (display_cfg->display_config.overrides.all_streams_blanked) {
12326 /* stream is blanked, so do nothing */
12327 return;
12328 }
12329
12330 /* from display configuration */
12331 fams2_programming->htotal = (uint16_t)stream_descriptor->timing.h_total;
12332 fams2_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total;
12333 fams2_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal -
12334 stream_descriptor->timing.v_front_porch);
12335 fams2_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal -
12336 stream_descriptor->timing.v_front_porch -
12337 stream_descriptor->timing.v_active);
12338 fams2_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled;
12339
12340 /* from meta */
12341 fams2_programming->otg_vline_time_ns =
12342 (unsigned int)(stream_fams2_meta->otg_vline_time_us * 1000.0);
12343 fams2_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines;
12344 fams2_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines;
12345 fams2_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines;
12346 fams2_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal -
12347 stream_descriptor->timing.v_front_porch -
12348 stream_fams2_meta->method_drr.programming_delay_otg_vlines);
12349 fams2_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines;
12350 fams2_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal;
12351
12352 /* from core */
12353 fams2_programming->config.bits.min_ttu_vblank_usable = true;
12354 for (i = 0; i < display_cfg->display_config.num_planes; i++) {
12355 /* check if all planes support p-state in blank */
12356 if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index &&
12357 mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) {
12358 fams2_programming->config.bits.min_ttu_vblank_usable = false;
12359 break;
12360 }
12361 }
12362
12363 switch (pstate_method) {
12364 case dml2_uclk_pstate_support_method_vactive:
12365 case dml2_uclk_pstate_support_method_fw_vactive_drr:
12366 /* legacy vactive */
12367 fams2_programming->type = FAMS2_STREAM_TYPE_VACTIVE;
12368 fams2_programming->sub_state.legacy.vactive_det_fill_delay_otg_vlines =
12369 (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines;
12370 fams2_programming->allow_start_otg_vline =
12371 (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline;
12372 fams2_programming->allow_end_otg_vline =
12373 (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline;
12374 fams2_programming->config.bits.clamp_vtotal_min = true;
12375 break;
12376 case dml2_uclk_pstate_support_method_vblank:
12377 case dml2_uclk_pstate_support_method_fw_vblank_drr:
12378 /* legacy vblank */
12379 fams2_programming->type = FAMS2_STREAM_TYPE_VBLANK;
12380 fams2_programming->allow_start_otg_vline =
12381 (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline;
12382 fams2_programming->allow_end_otg_vline =
12383 (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline;
12384 fams2_programming->config.bits.clamp_vtotal_min = true;
12385 break;
12386 case dml2_uclk_pstate_support_method_fw_drr:
12387 /* drr */
12388 fams2_programming->type = FAMS2_STREAM_TYPE_DRR;
12389 fams2_programming->sub_state.drr.programming_delay_otg_vlines =
12390 (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines;
12391 fams2_programming->sub_state.drr.nom_stretched_vtotal =
12392 (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal;
12393 fams2_programming->allow_start_otg_vline =
12394 (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline;
12395 fams2_programming->allow_end_otg_vline =
12396 (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline;
12397 /* drr only clamps to vtotal min for single display */
12398 fams2_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1;
12399 fams2_programming->sub_state.drr.only_stretch_if_required = true;
12400 break;
12401 case dml2_uclk_pstate_support_method_fw_subvp_phantom:
12402 case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr:
12403 /* subvp */
12404 fams2_programming->type = FAMS2_STREAM_TYPE_SUBVP;
12405 fams2_programming->sub_state.subvp.vratio_numerator =
12406 (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0);
12407 fams2_programming->sub_state.subvp.vratio_denominator = 1000;
12408 fams2_programming->sub_state.subvp.programming_delay_otg_vlines =
12409 (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines;
12410 fams2_programming->sub_state.subvp.prefetch_to_mall_otg_vlines =
12411 (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines;
12412 fams2_programming->sub_state.subvp.phantom_vtotal =
12413 (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal;
12414 fams2_programming->sub_state.subvp.phantom_vactive =
12415 (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive;
12416 fams2_programming->sub_state.subvp.config.bits.is_multi_planar =
12417 plane_descriptor->surface.plane1.height > 0;
12418 fams2_programming->sub_state.subvp.config.bits.is_yuv420 =
12419 plane_descriptor->pixel_format == dml2_420_8 ||
12420 plane_descriptor->pixel_format == dml2_420_10 ||
12421 plane_descriptor->pixel_format == dml2_420_12;
12422
12423 fams2_programming->allow_start_otg_vline =
12424 (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline;
12425 fams2_programming->allow_end_otg_vline =
12426 (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline;
12427 fams2_programming->config.bits.clamp_vtotal_min = true;
12428 break;
12429 case dml2_uclk_pstate_support_method_reserved_hw:
12430 case dml2_uclk_pstate_support_method_reserved_fw:
12431 case dml2_uclk_pstate_support_method_reserved_fw_drr_fixed:
12432 case dml2_uclk_pstate_support_method_reserved_fw_drr_var:
12433 case dml2_uclk_pstate_support_method_not_supported:
12434 case dml2_uclk_pstate_support_method_count:
12435 default:
12436 /* this should never happen */
12437 break;
12438 }
12439 }
12440
dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_mcache_surface_allocation * out,int plane_idx)12441 void dml2_core_calcs_get_mcache_allocation(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_mcache_surface_allocation *out, int plane_idx)
12442 {
12443 unsigned int n;
12444
12445 out->num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, plane_idx);
12446 out->num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, plane_idx);
12447 out->shift_granularity.p0 = dml_get_plane_mcache_shift_granularity_plane0(mode_lib, plane_idx);
12448 out->shift_granularity.p1 = dml_get_plane_mcache_shift_granularity_plane1(mode_lib, plane_idx);
12449
12450 for (n = 0; n < out->num_mcaches_plane0; n++)
12451 out->mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, plane_idx, n);
12452
12453 for (n = 0; n < out->num_mcaches_plane1; n++)
12454 out->mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, plane_idx, n);
12455
12456 out->last_slice_sharing.mall_comb_mcache_p0 = dml_get_plane_mall_comb_mcache_l(mode_lib, plane_idx);
12457 out->last_slice_sharing.mall_comb_mcache_p1 = dml_get_plane_mall_comb_mcache_c(mode_lib, plane_idx);
12458 out->last_slice_sharing.plane0_plane1 = dml_get_plane_lc_comb_mcache(mode_lib, plane_idx);
12459 out->informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, plane_idx);
12460 out->informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, plane_idx);
12461
12462 out->valid = true;
12463 }
12464
dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib * mode_lib,unsigned int * out,int pipe_index)12465 void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index)
12466 {
12467 *out = dml_get_surface_size_in_mall_bytes(mode_lib, pipe_index);
12468 }
12469
dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_plane_support_info * out,int plane_idx)12470 void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_plane_support_info *out, int plane_idx)
12471 {
12472 out->mall_svp_size_requirement_ways = 0;
12473
12474 out->nominal_vblank_pstate_latency_hiding_us =
12475 (int)(display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.h_total /
12476 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_idx].stream_index].timing.pixel_clock_khz / 1000) * mode_lib->ms.TWait[plane_idx]);
12477
12478 out->dram_change_latency_hiding_margin_in_active = (int)mode_lib->ms.VActiveLatencyHidingMargin[plane_idx];
12479
12480 out->active_latency_hiding_us = (int)mode_lib->ms.VActiveLatencyHidingUs[plane_idx];
12481
12482 out->dram_change_vactive_det_fill_delay_us = (unsigned int)math_ceil(mode_lib->ms.dram_change_vactive_det_fill_delay_us[plane_idx]);
12483 }
12484
dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg * display_cfg,const struct dml2_core_internal_display_mode_lib * mode_lib,struct core_stream_support_info * out,int plane_index)12485 void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index)
12486 {
12487 double phantom_processing_delay_pix;
12488 unsigned int phantom_processing_delay_lines;
12489 unsigned int phantom_min_v_active_lines;
12490 unsigned int phantom_v_active_lines;
12491 unsigned int phantom_v_startup_lines;
12492 unsigned int phantom_v_blank_lines;
12493 unsigned int main_v_blank_lines;
12494 unsigned int rem;
12495
12496 phantom_processing_delay_pix = (double)((mode_lib->ip.subvp_fw_processing_delay_us + mode_lib->ip.subvp_pstate_allow_width_us) *
12497 ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.pixel_clock_khz / 1000));
12498 phantom_processing_delay_lines = (unsigned int)(phantom_processing_delay_pix / (double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total);
12499 dml2_core_div_rem(phantom_processing_delay_pix,
12500 display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.h_total,
12501 &rem);
12502 if (rem)
12503 phantom_processing_delay_lines++;
12504
12505 phantom_v_startup_lines = dml_get_plane_max_vstartup_lines(mode_lib, plane_index);
12506 phantom_min_v_active_lines = (unsigned int)math_ceil((double)dml_get_plane_subviewport_lines_needed_in_mall(mode_lib, plane_index) /
12507 display_cfg->plane_descriptors[plane_index].composition.scaler_info.plane0.v_ratio);
12508 phantom_v_active_lines = phantom_processing_delay_lines + phantom_min_v_active_lines + mode_lib->ip.subvp_swath_height_margin_lines;
12509
12510 // phantom_vblank = max(vbp(vstartup) + vactive + vfp(always 1) + vsync(can be 1), main_vblank)
12511 phantom_v_blank_lines = phantom_v_startup_lines + 1 + 1;
12512 main_v_blank_lines = display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_total - display_cfg->stream_descriptors[display_cfg->plane_descriptors[plane_index].stream_index].timing.v_active;
12513 if (phantom_v_blank_lines > main_v_blank_lines)
12514 phantom_v_blank_lines = main_v_blank_lines;
12515
12516 out->phantom_v_active = phantom_v_active_lines;
12517 // phantom_vtotal = vactive + vblank
12518 out->phantom_v_total = phantom_v_active_lines + phantom_v_blank_lines;
12519
12520 out->phantom_min_v_active = phantom_min_v_active_lines;
12521 out->phantom_v_startup = phantom_v_startup_lines;
12522
12523 out->vblank_reserved_time_us = display_cfg->plane_descriptors[plane_index].overrides.reserved_vblank_time_ns / 1000;
12524 #if defined(__DML_VBA_DEBUG__)
12525 dml2_printf("DML::%s: subvp_fw_processing_delay_us = %d\n", __func__, mode_lib->ip.subvp_fw_processing_delay_us);
12526 dml2_printf("DML::%s: subvp_pstate_allow_width_us = %d\n", __func__, mode_lib->ip.subvp_pstate_allow_width_us);
12527 dml2_printf("DML::%s: subvp_swath_height_margin_lines = %d\n", __func__, mode_lib->ip.subvp_swath_height_margin_lines);
12528 dml2_printf("DML::%s: vblank_reserved_time_us = %f\n", __func__, out->vblank_reserved_time_us);
12529 #endif
12530 }
12531
dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib * mode_lib,struct dml2_display_cfg_programming * out)12532 void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out)
12533 {
12534 unsigned int k, n;
12535
12536 out->informative.mode_support_info.ModeIsSupported = mode_lib->ms.support.ModeSupport;
12537 out->informative.mode_support_info.ImmediateFlipSupport = mode_lib->ms.support.ImmediateFlipSupport;
12538 out->informative.mode_support_info.WritebackLatencySupport = mode_lib->ms.support.WritebackLatencySupport;
12539 out->informative.mode_support_info.ScaleRatioAndTapsSupport = mode_lib->ms.support.ScaleRatioAndTapsSupport;
12540 out->informative.mode_support_info.SourceFormatPixelAndScanSupport = mode_lib->ms.support.SourceFormatPixelAndScanSupport;
12541 out->informative.mode_support_info.P2IWith420 = mode_lib->ms.support.P2IWith420;
12542 out->informative.mode_support_info.DSCOnlyIfNecessaryWithBPP = false;
12543 out->informative.mode_support_info.DSC422NativeNotSupported = mode_lib->ms.support.DSC422NativeNotSupported;
12544 out->informative.mode_support_info.LinkRateDoesNotMatchDPVersion = mode_lib->ms.support.LinkRateDoesNotMatchDPVersion;
12545 out->informative.mode_support_info.LinkRateForMultistreamNotIndicated = mode_lib->ms.support.LinkRateForMultistreamNotIndicated;
12546 out->informative.mode_support_info.BPPForMultistreamNotIndicated = mode_lib->ms.support.BPPForMultistreamNotIndicated;
12547 out->informative.mode_support_info.MultistreamWithHDMIOreDP = mode_lib->ms.support.MultistreamWithHDMIOreDP;
12548 out->informative.mode_support_info.MSOOrODMSplitWithNonDPLink = mode_lib->ms.support.MSOOrODMSplitWithNonDPLink;
12549 out->informative.mode_support_info.NotEnoughLanesForMSO = mode_lib->ms.support.NotEnoughLanesForMSO;
12550 out->informative.mode_support_info.NumberOfOTGSupport = mode_lib->ms.support.NumberOfOTGSupport;
12551 out->informative.mode_support_info.NumberOfHDMIFRLSupport = mode_lib->ms.support.NumberOfHDMIFRLSupport;
12552 out->informative.mode_support_info.NumberOfDP2p0Support = mode_lib->ms.support.NumberOfDP2p0Support;
12553 out->informative.mode_support_info.WritebackScaleRatioAndTapsSupport = mode_lib->ms.support.WritebackScaleRatioAndTapsSupport;
12554 out->informative.mode_support_info.CursorSupport = mode_lib->ms.support.CursorSupport;
12555 out->informative.mode_support_info.PitchSupport = mode_lib->ms.support.PitchSupport;
12556 out->informative.mode_support_info.ViewportExceedsSurface = mode_lib->ms.support.ViewportExceedsSurface;
12557 out->informative.mode_support_info.ImmediateFlipRequiredButTheRequirementForEachSurfaceIsNotSpecified = false;
12558 out->informative.mode_support_info.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe = mode_lib->ms.support.ImmediateFlipOrHostVMAndPStateWithMALLFullFrameOrPhantomPipe;
12559 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPStateAndStaticScreen = mode_lib->ms.support.InvalidCombinationOfMALLUseForPStateAndStaticScreen;
12560 out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState;
12561 out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize;
12562 out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits;
12563
12564 out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots;
12565 out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits;
12566 out->informative.mode_support_info.NotEnoughDSCSlices = mode_lib->ms.support.NotEnoughDSCSlices;
12567 out->informative.mode_support_info.PixelsPerLinePerDSCUnitSupport = mode_lib->ms.support.PixelsPerLinePerDSCUnitSupport;
12568 out->informative.mode_support_info.DSCCLKRequiredMoreThanSupported = mode_lib->ms.support.DSCCLKRequiredMoreThanSupported;
12569 out->informative.mode_support_info.DTBCLKRequiredMoreThanSupported = mode_lib->ms.support.DTBCLKRequiredMoreThanSupported;
12570 out->informative.mode_support_info.LinkCapacitySupport = mode_lib->ms.support.LinkCapacitySupport;
12571
12572 out->informative.mode_support_info.ROBSupport = mode_lib->ms.support.ROBSupport;
12573 out->informative.mode_support_info.OutstandingRequestsSupport = mode_lib->ms.support.OutstandingRequestsSupport;
12574 out->informative.mode_support_info.OutstandingRequestsUrgencyAvoidance = mode_lib->ms.support.OutstandingRequestsUrgencyAvoidance;
12575 out->informative.mode_support_info.PTEBufferSizeNotExceeded = mode_lib->ms.support.PTEBufferSizeNotExceeded;
12576 out->informative.mode_support_info.DCCMetaBufferSizeNotExceeded = mode_lib->ms.support.DCCMetaBufferSizeNotExceeded;
12577
12578 out->informative.mode_support_info.TotalVerticalActiveBandwidthSupport = mode_lib->ms.support.AvgBandwidthSupport;
12579 out->informative.mode_support_info.VActiveBandwidthSupport = mode_lib->ms.support.UrgVactiveBandwidthSupport;
12580 out->informative.mode_support_info.USRRetrainingSupport = mode_lib->ms.support.USRRetrainingSupport;
12581
12582 out->informative.mode_support_info.PrefetchSupported = mode_lib->ms.support.PrefetchSupported;
12583 out->informative.mode_support_info.DynamicMetadataSupported = mode_lib->ms.support.DynamicMetadataSupported;
12584 out->informative.mode_support_info.VRatioInPrefetchSupported = mode_lib->ms.support.VRatioInPrefetchSupported;
12585 out->informative.mode_support_info.DISPCLK_DPPCLK_Support = mode_lib->ms.support.DISPCLK_DPPCLK_Support;
12586 out->informative.mode_support_info.TotalAvailablePipesSupport = mode_lib->ms.support.TotalAvailablePipesSupport;
12587 out->informative.mode_support_info.ViewportSizeSupport = mode_lib->ms.support.ViewportSizeSupport;
12588
12589 for (k = 0; k < out->display_config.num_planes; k++) {
12590
12591 out->informative.mode_support_info.FCLKChangeSupport[k] = mode_lib->ms.support.FCLKChangeSupport[k];
12592 out->informative.mode_support_info.MPCCombineEnable[k] = mode_lib->ms.support.MPCCombineEnable[k];
12593 out->informative.mode_support_info.ODMMode[k] = mode_lib->ms.support.ODMMode[k];
12594 out->informative.mode_support_info.DPPPerSurface[k] = mode_lib->ms.support.DPPPerSurface[k];
12595 out->informative.mode_support_info.DSCEnabled[k] = mode_lib->ms.support.DSCEnabled[k];
12596 out->informative.mode_support_info.FECEnabled[k] = mode_lib->ms.support.FECEnabled[k];
12597 out->informative.mode_support_info.NumberOfDSCSlices[k] = mode_lib->ms.support.NumberOfDSCSlices[k];
12598 out->informative.mode_support_info.OutputBpp[k] = mode_lib->ms.support.OutputBpp[k];
12599
12600 if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_unknown)
12601 out->informative.mode_support_info.OutputType[k] = dml2_output_type_unknown;
12602 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp)
12603 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp;
12604 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_edp)
12605 out->informative.mode_support_info.OutputType[k] = dml2_output_type_edp;
12606 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_dp2p0)
12607 out->informative.mode_support_info.OutputType[k] = dml2_output_type_dp2p0;
12608 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmi)
12609 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmi;
12610 else if (mode_lib->ms.support.OutputType[k] == dml2_core_internal_output_type_hdmifrl)
12611 out->informative.mode_support_info.OutputType[k] = dml2_output_type_hdmifrl;
12612
12613 if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_unknown)
12614 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_unknown;
12615 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr)
12616 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr;
12617 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr2)
12618 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr2;
12619 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_hbr3)
12620 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_hbr3;
12621 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr10)
12622 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr10;
12623 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr13p5)
12624 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr13p5;
12625 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_dp_rate_uhbr20)
12626 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_dp_rate_uhbr20;
12627 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_3x3)
12628 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_3x3;
12629 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x3)
12630 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x3;
12631 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_6x4)
12632 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_6x4;
12633 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_8x4)
12634 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_8x4;
12635 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_10x4)
12636 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_10x4;
12637 else if (mode_lib->ms.support.OutputRate[k] == dml2_core_internal_output_rate_hdmi_rate_12x4)
12638 out->informative.mode_support_info.OutputRate[k] = dml2_output_rate_hdmi_rate_12x4;
12639
12640 out->informative.mode_support_info.AlignedYPitch[k] = mode_lib->ms.support.AlignedYPitch[k];
12641 out->informative.mode_support_info.AlignedCPitch[k] = mode_lib->ms.support.AlignedCPitch[k];
12642 }
12643
12644 out->informative.watermarks.urgent_us = dml_get_wm_urgent(mode_lib);
12645 out->informative.watermarks.writeback_urgent_us = dml_get_wm_writeback_urgent(mode_lib);
12646 out->informative.watermarks.writeback_pstate_us = dml_get_wm_writeback_dram_clock_change(mode_lib);
12647 out->informative.watermarks.writeback_fclk_pstate_us = dml_get_wm_writeback_fclk_change(mode_lib);
12648
12649 out->informative.watermarks.cstate_exit_us = dml_get_wm_stutter_exit(mode_lib);
12650 out->informative.watermarks.cstate_enter_plus_exit_us = dml_get_wm_stutter_enter_exit(mode_lib);
12651 out->informative.watermarks.z8_cstate_exit_us = dml_get_wm_z8_stutter_exit(mode_lib);
12652 out->informative.watermarks.z8_cstate_enter_plus_exit_us = dml_get_wm_z8_stutter_enter_exit(mode_lib);
12653 out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib);
12654 out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib);
12655 out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib);
12656 out->informative.watermarks.g6_temp_read_watermark_us = dml_get_wm_g6_temp_read(mode_lib);
12657
12658 out->informative.mall.total_surface_size_in_mall_bytes = 0;
12659 for (k = 0; k < out->display_config.num_planes; ++k)
12660 out->informative.mall.total_surface_size_in_mall_bytes += mode_lib->mp.SurfaceSizeInTheMALL[k];
12661
12662 out->informative.qos.min_return_latency_in_dcfclk = mode_lib->mp.min_return_latency_in_dcfclk;
12663 out->informative.qos.urgent_latency_us = dml_get_urgent_latency(mode_lib);
12664
12665 out->informative.qos.max_urgent_latency_us = dml_get_max_urgent_latency_us(mode_lib);
12666 out->informative.qos.avg_non_urgent_latency_us = dml_get_avg_non_urgent_latency_us(mode_lib);
12667 out->informative.qos.avg_urgent_latency_us = dml_get_avg_urgent_latency_us(mode_lib);
12668
12669 out->informative.qos.wm_memory_trip_us = dml_get_wm_memory_trip(mode_lib);
12670 out->informative.qos.meta_trip_memory_us = dml_get_meta_trip_memory_us(mode_lib);
12671 out->informative.qos.fraction_of_urgent_bandwidth = dml_get_fraction_of_urgent_bandwidth(mode_lib);
12672 out->informative.qos.fraction_of_urgent_bandwidth_immediate_flip = dml_get_fraction_of_urgent_bandwidth_imm_flip(mode_lib);
12673 out->informative.qos.fraction_of_urgent_bandwidth_mall = dml_get_fraction_of_urgent_bandwidth_mall(mode_lib);
12674
12675 out->informative.qos.avg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_required_sdp(mode_lib);
12676 out->informative.qos.avg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_required_dram(mode_lib);
12677 out->informative.qos.avg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_required_sdp(mode_lib);
12678 out->informative.qos.avg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_required_dram(mode_lib);
12679
12680 out->informative.qos.avg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_avg_bw_available_sdp(mode_lib);
12681 out->informative.qos.avg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_avg_bw_available_dram(mode_lib);
12682 out->informative.qos.avg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_avg_bw_available_sdp(mode_lib);
12683 out->informative.qos.avg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_avg_bw_available_dram(mode_lib);
12684
12685 out->informative.qos.urg_bw_available.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_available_sdp(mode_lib);
12686 out->informative.qos.urg_bw_available.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_available_dram(mode_lib);
12687 out->informative.qos.urg_bw_available.sys_active.dram_vm_only_bw_mbps = dml_get_sys_active_urg_bw_available_dram_vm_only(mode_lib);
12688
12689 out->informative.qos.urg_bw_available.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_available_sdp(mode_lib);
12690 out->informative.qos.urg_bw_available.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram(mode_lib);
12691 out->informative.qos.urg_bw_available.svp_prefetch.dram_vm_only_bw_mbps = dml_get_svp_prefetch_urg_bw_available_dram_vm_only(mode_lib);
12692
12693 out->informative.qos.urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp(mode_lib);
12694 out->informative.qos.urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram(mode_lib);
12695 out->informative.qos.urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp(mode_lib);
12696 out->informative.qos.urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram(mode_lib);
12697
12698 out->informative.qos.non_urg_bw_required.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp(mode_lib);
12699 out->informative.qos.non_urg_bw_required.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram(mode_lib);
12700 out->informative.qos.non_urg_bw_required.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp(mode_lib);
12701 out->informative.qos.non_urg_bw_required.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram(mode_lib);
12702
12703 out->informative.qos.urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_urg_bw_required_sdp_flip(mode_lib);
12704 out->informative.qos.urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_urg_bw_required_dram_flip(mode_lib);
12705 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_urg_bw_required_sdp_flip(mode_lib);
12706 out->informative.qos.urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_urg_bw_required_dram_flip(mode_lib);
12707
12708 out->informative.qos.non_urg_bw_required_with_flip.sys_active.sdp_bw_mbps = dml_get_sys_active_non_urg_required_sdp_flip(mode_lib);
12709 out->informative.qos.non_urg_bw_required_with_flip.sys_active.dram_bw_mbps = dml_get_sys_active_non_urg_required_dram_flip(mode_lib);
12710 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.sdp_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_sdp_flip(mode_lib);
12711 out->informative.qos.non_urg_bw_required_with_flip.svp_prefetch.dram_bw_mbps = dml_get_svp_prefetch_non_urg_bw_required_dram_flip(mode_lib);
12712
12713 out->informative.crb.comp_buffer_size_kbytes = dml_get_comp_buffer_size_kbytes(mode_lib);
12714 out->informative.crb.UnboundedRequestEnabled = dml_get_unbounded_request_enabled(mode_lib);
12715
12716 out->informative.crb.compbuf_reserved_space_64b = dml_get_compbuf_reserved_space_64b(mode_lib);
12717 out->informative.misc.hw_debug5 = dml_get_hw_debug5(mode_lib);
12718 out->informative.misc.dcfclk_deep_sleep_hysteresis = dml_get_dcfclk_deep_sleep_hysteresis(mode_lib);
12719
12720 out->informative.power_management.stutter_efficiency = dml_get_stutter_efficiency_no_vblank(mode_lib);
12721 out->informative.power_management.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
12722 out->informative.power_management.stutter_num_bursts = dml_get_stutter_num_bursts(mode_lib);
12723
12724 out->informative.power_management.z8.stutter_efficiency = dml_get_stutter_efficiency_z8(mode_lib);
12725 out->informative.power_management.z8.stutter_efficiency_with_vblank = dml_get_stutter_efficiency(mode_lib);
12726 out->informative.power_management.z8.stutter_num_bursts = dml_get_stutter_num_bursts_z8(mode_lib);
12727 out->informative.power_management.z8.stutter_period = dml_get_stutter_period(mode_lib);
12728
12729 out->informative.power_management.z8.bestcase.stutter_efficiency = dml_get_stutter_efficiency_z8_bestcase(mode_lib);
12730 out->informative.power_management.z8.bestcase.stutter_num_bursts = dml_get_stutter_num_bursts_z8_bestcase(mode_lib);
12731 out->informative.power_management.z8.bestcase.stutter_period = dml_get_stutter_period_bestcase(mode_lib);
12732
12733 out->informative.misc.cstate_max_cap_mode = dml_get_cstate_max_cap_mode(mode_lib);
12734
12735 out->min_clocks.dcn4x.dpprefclk_khz = (int unsigned)dml_get_global_dppclk_khz(mode_lib);
12736
12737 out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib);
12738
12739 for (k = 0; k < out->display_config.num_planes; k++) {
12740
12741 if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us)
12742 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
12743 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
12744 out->informative.misc.PrefetchMode[k] = 0;
12745 else if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.fclk_change_blackout_us)
12746 && (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us))
12747 out->informative.misc.PrefetchMode[k] = 1;
12748 else if (out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.stutter_enter_plus_exit_latency_us)
12749 out->informative.misc.PrefetchMode[k] = 2;
12750 else
12751 out->informative.misc.PrefetchMode[k] = 3;
12752
12753 out->informative.misc.min_ttu_vblank_us[k] = mode_lib->mp.MinTTUVBlank[k];
12754 out->informative.mall.subviewport_lines_needed_in_mall[k] = mode_lib->mp.SubViewportLinesNeededInMALL[k];
12755 out->informative.crb.det_size_in_kbytes[k] = mode_lib->mp.DETBufferSizeInKByte[k];
12756 out->informative.crb.DETBufferSizeY[k] = mode_lib->mp.DETBufferSizeY[k];
12757 out->informative.misc.ImmediateFlipSupportedForPipe[k] = mode_lib->mp.ImmediateFlipSupportedForPipe[k];
12758 out->informative.misc.UsesMALLForStaticScreen[k] = mode_lib->mp.is_using_mall_for_ss[k];
12759 out->informative.plane_info[k].dpte_row_height_plane0 = mode_lib->mp.dpte_row_height[k];
12760 out->informative.plane_info[k].dpte_row_height_plane1 = mode_lib->mp.dpte_row_height_chroma[k];
12761 out->informative.plane_info[k].meta_row_height_plane0 = mode_lib->mp.meta_row_height[k];
12762 out->informative.plane_info[k].meta_row_height_plane1 = mode_lib->mp.meta_row_height_chroma[k];
12763 out->informative.dcc_control[k].max_uncompressed_block_plane0 = mode_lib->mp.DCCYMaxUncompressedBlock[k];
12764 out->informative.dcc_control[k].max_compressed_block_plane0 = mode_lib->mp.DCCYMaxCompressedBlock[k];
12765 out->informative.dcc_control[k].independent_block_plane0 = mode_lib->mp.DCCYIndependentBlock[k];
12766 out->informative.dcc_control[k].max_uncompressed_block_plane1 = mode_lib->mp.DCCCMaxUncompressedBlock[k];
12767 out->informative.dcc_control[k].max_compressed_block_plane1 = mode_lib->mp.DCCCMaxCompressedBlock[k];
12768 out->informative.dcc_control[k].independent_block_plane1 = mode_lib->mp.DCCCIndependentBlock[k];
12769 out->informative.misc.dst_x_after_scaler[k] = mode_lib->mp.DSTXAfterScaler[k];
12770 out->informative.misc.dst_y_after_scaler[k] = mode_lib->mp.DSTYAfterScaler[k];
12771 out->informative.misc.prefetch_source_lines_plane0[k] = mode_lib->mp.PrefetchSourceLinesY[k];
12772 out->informative.misc.prefetch_source_lines_plane1[k] = mode_lib->mp.PrefetchSourceLinesC[k];
12773 out->informative.misc.vready_at_or_after_vsync[k] = mode_lib->mp.VREADY_AT_OR_AFTER_VSYNC[k];
12774 out->informative.misc.min_dst_y_next_start[k] = mode_lib->mp.MIN_DST_Y_NEXT_START[k];
12775 out->informative.plane_info[k].swath_width_plane0 = mode_lib->mp.SwathWidthY[k];
12776 out->informative.plane_info[k].swath_height_plane0 = mode_lib->mp.SwathHeightY[k];
12777 out->informative.plane_info[k].swath_height_plane1 = mode_lib->mp.SwathHeightC[k];
12778 out->informative.misc.CursorDstXOffset[k] = mode_lib->mp.CursorDstXOffset[k];
12779 out->informative.misc.CursorDstYOffset[k] = mode_lib->mp.CursorDstYOffset[k];
12780 out->informative.misc.CursorChunkHDLAdjust[k] = mode_lib->mp.CursorChunkHDLAdjust[k];
12781 out->informative.misc.dpte_group_bytes[k] = mode_lib->mp.dpte_group_bytes[k];
12782 out->informative.misc.vm_group_bytes[k] = mode_lib->mp.vm_group_bytes[k];
12783 out->informative.misc.DisplayPipeRequestDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLuma[k];
12784 out->informative.misc.DisplayPipeRequestDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChroma[k];
12785 out->informative.misc.DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeLumaPrefetch[k];
12786 out->informative.misc.DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeRequestDeliveryTimeChromaPrefetch[k];
12787 out->informative.misc.TimePerVMGroupVBlank[k] = mode_lib->mp.TimePerVMGroupVBlank[k];
12788 out->informative.misc.TimePerVMGroupFlip[k] = mode_lib->mp.TimePerVMGroupFlip[k];
12789 out->informative.misc.TimePerVMRequestVBlank[k] = mode_lib->mp.TimePerVMRequestVBlank[k];
12790 out->informative.misc.TimePerVMRequestFlip[k] = mode_lib->mp.TimePerVMRequestFlip[k];
12791 out->informative.misc.Tdmdl_vm[k] = mode_lib->mp.Tdmdl_vm[k];
12792 out->informative.misc.Tdmdl[k] = mode_lib->mp.Tdmdl[k];
12793 out->informative.misc.VStartup[k] = mode_lib->mp.VStartup[k];
12794 out->informative.misc.VUpdateOffsetPix[k] = mode_lib->mp.VUpdateOffsetPix[k];
12795 out->informative.misc.VUpdateWidthPix[k] = mode_lib->mp.VUpdateWidthPix[k];
12796 out->informative.misc.VReadyOffsetPix[k] = mode_lib->mp.VReadyOffsetPix[k];
12797
12798 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_L[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_L[k];
12799 out->informative.misc.DST_Y_PER_PTE_ROW_NOM_C[k] = mode_lib->mp.DST_Y_PER_PTE_ROW_NOM_C[k];
12800 out->informative.misc.time_per_pte_group_nom_luma[k] = mode_lib->mp.time_per_pte_group_nom_luma[k];
12801 out->informative.misc.time_per_pte_group_nom_chroma[k] = mode_lib->mp.time_per_pte_group_nom_chroma[k];
12802 out->informative.misc.time_per_pte_group_vblank_luma[k] = mode_lib->mp.time_per_pte_group_vblank_luma[k];
12803 out->informative.misc.time_per_pte_group_vblank_chroma[k] = mode_lib->mp.time_per_pte_group_vblank_chroma[k];
12804 out->informative.misc.time_per_pte_group_flip_luma[k] = mode_lib->mp.time_per_pte_group_flip_luma[k];
12805 out->informative.misc.time_per_pte_group_flip_chroma[k] = mode_lib->mp.time_per_pte_group_flip_chroma[k];
12806 out->informative.misc.VRatioPrefetchY[k] = mode_lib->mp.VRatioPrefetchY[k];
12807 out->informative.misc.VRatioPrefetchC[k] = mode_lib->mp.VRatioPrefetchC[k];
12808 out->informative.misc.DestinationLinesForPrefetch[k] = mode_lib->mp.dst_y_prefetch[k];
12809 out->informative.misc.DestinationLinesToRequestVMInVBlank[k] = mode_lib->mp.dst_y_per_vm_vblank[k];
12810 out->informative.misc.DestinationLinesToRequestRowInVBlank[k] = mode_lib->mp.dst_y_per_row_vblank[k];
12811 out->informative.misc.DestinationLinesToRequestVMInImmediateFlip[k] = mode_lib->mp.dst_y_per_vm_flip[k];
12812 out->informative.misc.DestinationLinesToRequestRowInImmediateFlip[k] = mode_lib->mp.dst_y_per_row_flip[k];
12813 out->informative.misc.DisplayPipeLineDeliveryTimeLuma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLuma[k];
12814 out->informative.misc.DisplayPipeLineDeliveryTimeChroma[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChroma[k];
12815 out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k];
12816 out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k];
12817
12818 out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k];
12819 out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k];
12820 out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k];
12821 out->informative.misc.BIGK_FRAGMENT_SIZE[k] = mode_lib->mp.BIGK_FRAGMENT_SIZE[k];
12822 out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k];
12823 out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k];
12824 out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k];
12825 }
12826
12827 // For this DV informative layer, all pipes in the same planes will just use the same id
12828 // will have the optimization and helper layer later on
12829 // only work when we can have high "mcache" that fit everything without thrashing the cache
12830 for (k = 0; k < out->display_config.num_planes; k++) {
12831 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0 = dml_get_plane_num_mcaches_plane0(mode_lib, k);
12832 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane0 = dml_get_plane_mcache_row_bytes_plane0(mode_lib, k);
12833
12834 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane0; n++) {
12835 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane0[n] = dml_get_plane_array_mcache_offsets_plane0(mode_lib, k, n);
12836 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane0[n] = k;
12837 }
12838
12839 out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1 = dml_get_plane_num_mcaches_plane1(mode_lib, k);
12840 out->informative.non_optimized_mcache_allocation[k].informative.meta_row_bytes_plane1 = dml_get_plane_mcache_row_bytes_plane1(mode_lib, k);
12841
12842 for (n = 0; n < out->informative.non_optimized_mcache_allocation[k].num_mcaches_plane1; n++) {
12843 out->informative.non_optimized_mcache_allocation[k].mcache_x_offsets_plane1[n] = dml_get_plane_array_mcache_offsets_plane1(mode_lib, k, n);
12844 out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k;
12845 }
12846 }
12847
12848 out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles
12849 / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0)
12850 + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock
12851 + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock
12852 * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0);
12853
12854 if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) {
12855 if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024
12856 / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) {
12857 out->informative.misc.ROBUrgencyAvoidance = true;
12858 } else {
12859 out->informative.misc.ROBUrgencyAvoidance = false;
12860 }
12861 } else {
12862 out->informative.misc.ROBUrgencyAvoidance = true;
12863 }
12864 }
12865