1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Copyright 2023 Advanced Micro Devices, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 * OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors: AMD
24 *
25 */
26
27 #include "dml2_policy.h"
28
get_optimal_ntuple(const struct soc_bounding_box_st * socbb,struct soc_state_bounding_box_st * entry)29 static void get_optimal_ntuple(
30 const struct soc_bounding_box_st *socbb,
31 struct soc_state_bounding_box_st *entry)
32 {
33 if (entry->dcfclk_mhz > 0) {
34 float bw_on_sdp = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
35
36 entry->fabricclk_mhz = bw_on_sdp / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
37 entry->dram_speed_mts = bw_on_sdp / (socbb->num_chans *
38 socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
39 } else if (entry->fabricclk_mhz > 0) {
40 float bw_on_fabric = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
41
42 entry->dcfclk_mhz = bw_on_fabric / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
43 entry->dram_speed_mts = bw_on_fabric / (socbb->num_chans *
44 socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
45 } else if (entry->dram_speed_mts > 0) {
46 float bw_on_dram = (float)(entry->dram_speed_mts * socbb->num_chans *
47 socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
48
49 entry->fabricclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
50 entry->dcfclk_mhz = bw_on_dram / (socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
51 }
52 }
53
calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st * socbb,struct soc_state_bounding_box_st * entry)54 static float calculate_net_bw_in_mbytes_sec(const struct soc_bounding_box_st *socbb,
55 struct soc_state_bounding_box_st *entry)
56 {
57 float memory_bw_mbytes_sec = (float)(entry->dram_speed_mts * socbb->num_chans *
58 socbb->dram_channel_width_bytes * ((float)socbb->pct_ideal_dram_bw_after_urgent_pixel_only / 100));
59
60 float fabric_bw_mbytes_sec = (float)(entry->fabricclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_fabric_bw_after_urgent / 100));
61
62 float sdp_bw_mbytes_sec = (float)(entry->dcfclk_mhz * socbb->return_bus_width_bytes * ((float)socbb->pct_ideal_sdp_bw_after_urgent / 100));
63
64 float limiting_bw_mbytes_sec = memory_bw_mbytes_sec;
65
66 if (fabric_bw_mbytes_sec < limiting_bw_mbytes_sec)
67 limiting_bw_mbytes_sec = fabric_bw_mbytes_sec;
68
69 if (sdp_bw_mbytes_sec < limiting_bw_mbytes_sec)
70 limiting_bw_mbytes_sec = sdp_bw_mbytes_sec;
71
72 return limiting_bw_mbytes_sec;
73 }
74
insert_entry_into_table_sorted(const struct soc_bounding_box_st * socbb,struct soc_states_st * table,struct soc_state_bounding_box_st * entry)75 static void insert_entry_into_table_sorted(const struct soc_bounding_box_st *socbb,
76 struct soc_states_st *table,
77 struct soc_state_bounding_box_st *entry)
78 {
79 int index = 0;
80 int i = 0;
81 float net_bw_of_new_state = 0;
82
83 get_optimal_ntuple(socbb, entry);
84
85 if (table->num_states == 0) {
86 index = 0;
87 } else {
88 net_bw_of_new_state = calculate_net_bw_in_mbytes_sec(socbb, entry);
89 while (net_bw_of_new_state > calculate_net_bw_in_mbytes_sec(socbb, &table->state_array[index])) {
90 index++;
91 if (index >= (int) table->num_states)
92 break;
93 }
94
95 for (i = table->num_states; i > index; i--) {
96 table->state_array[i] = table->state_array[i - 1];
97 }
98 //ASSERT(index < MAX_CLK_TABLE_SIZE);
99 }
100
101 table->state_array[index] = *entry;
102 table->state_array[index].dcfclk_mhz = (int)entry->dcfclk_mhz;
103 table->state_array[index].fabricclk_mhz = (int)entry->fabricclk_mhz;
104 table->state_array[index].dram_speed_mts = (int)entry->dram_speed_mts;
105 table->num_states++;
106 }
107
remove_entry_from_table_at_index(struct soc_states_st * table,unsigned int index)108 static void remove_entry_from_table_at_index(struct soc_states_st *table,
109 unsigned int index)
110 {
111 int i;
112
113 if (table->num_states == 0)
114 return;
115
116 for (i = index; i < (int) table->num_states - 1; i++) {
117 table->state_array[i] = table->state_array[i + 1];
118 }
119 memset(&table->state_array[--table->num_states], 0, sizeof(struct soc_state_bounding_box_st));
120 }
121
dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch * s,struct dml2_policy_build_synthetic_soc_states_params * p)122 int dml2_policy_build_synthetic_soc_states(struct dml2_policy_build_synthetic_soc_states_scratch *s,
123 struct dml2_policy_build_synthetic_soc_states_params *p)
124 {
125 int i, j;
126 unsigned int min_fclk_mhz = p->in_states->state_array[0].fabricclk_mhz;
127 unsigned int min_dcfclk_mhz = p->in_states->state_array[0].dcfclk_mhz;
128 unsigned int min_socclk_mhz = p->in_states->state_array[0].socclk_mhz;
129
130 int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0,
131 max_phyclk_mhz = 0, max_dtbclk_mhz = 0, max_fclk_mhz = 0,
132 max_uclk_mhz = 0, max_socclk_mhz = 0;
133
134 int num_uclk_dpms = 0, num_fclk_dpms = 0;
135
136 for (i = 0; i < __DML_MAX_STATE_ARRAY_SIZE__; i++) {
137 if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz)
138 max_dcfclk_mhz = (int) p->in_states->state_array[i].dcfclk_mhz;
139 if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz)
140 max_fclk_mhz = (int) p->in_states->state_array[i].fabricclk_mhz;
141 if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz)
142 max_socclk_mhz = (int) p->in_states->state_array[i].socclk_mhz;
143 if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz)
144 max_uclk_mhz = (int) p->in_states->state_array[i].dram_speed_mts;
145 if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz)
146 max_dispclk_mhz = (int) p->in_states->state_array[i].dispclk_mhz;
147 if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz)
148 max_dppclk_mhz = (int) p->in_states->state_array[i].dppclk_mhz;
149 if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz)
150 max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz;
151 if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz)
152 max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz;
153
154 if (p->in_states->state_array[i].fabricclk_mhz > 0)
155 num_fclk_dpms++;
156 if (p->in_states->state_array[i].dram_speed_mts > 0)
157 num_uclk_dpms++;
158 }
159
160 if (!max_dcfclk_mhz || !max_dispclk_mhz || !max_dppclk_mhz || !max_phyclk_mhz || !max_dtbclk_mhz)
161 return -1;
162
163 p->out_states->num_states = 0;
164
165 s->entry = p->in_states->state_array[0];
166
167 s->entry.dispclk_mhz = max_dispclk_mhz;
168 s->entry.dppclk_mhz = max_dppclk_mhz;
169 s->entry.dtbclk_mhz = max_dtbclk_mhz;
170 s->entry.phyclk_mhz = max_phyclk_mhz;
171
172 s->entry.dscclk_mhz = max_dispclk_mhz / 3;
173 s->entry.phyclk_mhz = max_phyclk_mhz;
174 s->entry.dtbclk_mhz = max_dtbclk_mhz;
175
176 // Insert all the DCFCLK STAs first
177 for (i = 0; i < p->num_dcfclk_stas; i++) {
178 s->entry.dcfclk_mhz = p->dcfclk_stas_mhz[i];
179 s->entry.fabricclk_mhz = 0;
180 s->entry.dram_speed_mts = 0;
181 if (i > 0)
182 s->entry.socclk_mhz = max_socclk_mhz;
183
184 insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
185 }
186
187 // Insert the UCLK DPMS
188 for (i = 0; i < num_uclk_dpms; i++) {
189 s->entry.dcfclk_mhz = 0;
190 s->entry.fabricclk_mhz = 0;
191 s->entry.dram_speed_mts = p->in_states->state_array[i].dram_speed_mts;
192 if (i == 0) {
193 s->entry.socclk_mhz = min_socclk_mhz;
194 } else {
195 s->entry.socclk_mhz = max_socclk_mhz;
196 }
197
198 insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
199 }
200
201 // Insert FCLK DPMs (if present)
202 if (num_fclk_dpms > 2) {
203 for (i = 0; i < num_fclk_dpms; i++) {
204 s->entry.dcfclk_mhz = 0;
205 s->entry.fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz;
206 s->entry.dram_speed_mts = 0;
207
208 insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
209 }
210 }
211 // Add max FCLK
212 else {
213 s->entry.dcfclk_mhz = 0;
214 s->entry.fabricclk_mhz = p->in_states->state_array[num_fclk_dpms - 1].fabricclk_mhz;
215 s->entry.dram_speed_mts = 0;
216
217 insert_entry_into_table_sorted(p->in_bbox, p->out_states, &s->entry);
218 }
219
220 // Remove states that require higher clocks than are supported
221 for (i = p->out_states->num_states - 1; i >= 0; i--) {
222 if (p->out_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz ||
223 p->out_states->state_array[i].fabricclk_mhz > max_fclk_mhz ||
224 p->out_states->state_array[i].dram_speed_mts > max_uclk_mhz)
225 remove_entry_from_table_at_index(p->out_states, i);
226 }
227
228 // At this point, the table contains all "points of interest" based on
229 // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock
230 // ratios (by derate, are exact).
231
232 // Round up UCLK to DPMs
233 for (i = p->out_states->num_states - 1; i >= 0; i--) {
234 for (j = 0; j < num_uclk_dpms; j++) {
235 if (p->in_states->state_array[j].dram_speed_mts >= p->out_states->state_array[i].dram_speed_mts) {
236 p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[j].dram_speed_mts;
237 break;
238 }
239 }
240 }
241
242 // If FCLK is coarse grained, round up to next DPMs
243 if (num_fclk_dpms > 2) {
244 for (i = p->out_states->num_states - 1; i >= 0; i--) {
245 for (j = 0; j < num_fclk_dpms; j++) {
246 if (p->in_states->state_array[j].fabricclk_mhz >= p->out_states->state_array[i].fabricclk_mhz) {
247 p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[j].fabricclk_mhz;
248 break;
249 }
250 }
251 }
252 }
253
254 // Clamp to min FCLK/DCFCLK
255 for (i = p->out_states->num_states - 1; i >= 0; i--) {
256 if (p->out_states->state_array[i].fabricclk_mhz < min_fclk_mhz) {
257 p->out_states->state_array[i].fabricclk_mhz = min_fclk_mhz;
258 }
259 if (p->out_states->state_array[i].dcfclk_mhz < min_dcfclk_mhz) {
260 p->out_states->state_array[i].dcfclk_mhz = min_dcfclk_mhz;
261 }
262 }
263
264 // Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
265 i = 0;
266 while (i < (int) p->out_states->num_states - 1) {
267 if (p->out_states->state_array[i].dcfclk_mhz == p->out_states->state_array[i + 1].dcfclk_mhz &&
268 p->out_states->state_array[i].fabricclk_mhz == p->out_states->state_array[i + 1].fabricclk_mhz &&
269 p->out_states->state_array[i].dram_speed_mts == p->out_states->state_array[i + 1].dram_speed_mts)
270 remove_entry_from_table_at_index(p->out_states, i);
271 else
272 i++;
273 }
274
275 return 0;
276 }
277
build_unoptimized_policy_settings(enum dml_project_id project,struct dml_mode_eval_policy_st * policy)278 void build_unoptimized_policy_settings(enum dml_project_id project, struct dml_mode_eval_policy_st *policy)
279 {
280 for (int i = 0; i < __DML_NUM_PLANES__; i++) {
281 policy->MPCCombineUse[i] = dml_mpc_as_needed_for_voltage; // TOREVIEW: Is this still needed? When is MPCC useful for pstate given CRB?
282 policy->ODMUse[i] = dml_odm_use_policy_combine_as_needed;
283 policy->ImmediateFlipRequirement[i] = dml_immediate_flip_required;
284 policy->AllowForPStateChangeOrStutterInVBlank[i] = dml_prefetch_support_uclk_fclk_and_stutter_if_possible;
285 }
286
287 /* Change the default policy initializations as per spreadsheet. We might need to
288 * review and change them later on as per Jun's earlier comments.
289 */
290 policy->UseUnboundedRequesting = dml_unbounded_requesting_enable;
291 policy->UseMinimumRequiredDCFCLK = false;
292 policy->DRAMClockChangeRequirementFinal = true; // TOREVIEW: What does this mean?
293 policy->FCLKChangeRequirementFinal = true; // TOREVIEW: What does this mean?
294 policy->USRRetrainingRequiredFinal = true;
295 policy->EnhancedPrefetchScheduleAccelerationFinal = true; // TOREVIEW: What does this mean?
296 policy->NomDETInKByteOverrideEnable = false;
297 policy->NomDETInKByteOverrideValue = 0;
298 policy->DCCProgrammingAssumesScanDirectionUnknownFinal = true;
299 policy->SynchronizeTimingsFinal = true;
300 policy->SynchronizeDRRDisplaysForUCLKPStateChangeFinal = true;
301 policy->AssumeModeSupportAtMaxPwrStateEvenDRAMClockChangeNotSupported = true; // TOREVIEW: What does this mean?
302 policy->AssumeModeSupportAtMaxPwrStateEvenFClockChangeNotSupported = true; // TOREVIEW: What does this mean?
303 if (project == dml_project_dcn35 ||
304 project == dml_project_dcn351) {
305 policy->DCCProgrammingAssumesScanDirectionUnknownFinal = false;
306 policy->EnhancedPrefetchScheduleAccelerationFinal = 0;
307 policy->AllowForPStateChangeOrStutterInVBlankFinal = dml_prefetch_support_uclk_fclk_and_stutter_if_possible; /*new*/
308 policy->UseOnlyMaxPrefetchModes = 1;
309 }
310 }
311