1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
3 
4 #include "eswitch.h"
5 #include "lib/mlx5.h"
6 #include "esw/qos.h"
7 #include "en/port.h"
8 #define CREATE_TRACE_POINTS
9 #include "diag/qos_tracepoint.h"
10 
11 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
12 #define MLX5_MIN_BW_SHARE 1
13 
14 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
15 	min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
16 
17 struct mlx5_esw_rate_group {
18 	u32 tsar_ix;
19 	u32 max_rate;
20 	u32 min_rate;
21 	u32 bw_share;
22 	struct list_head list;
23 };
24 
esw_qos_tsar_config(struct mlx5_core_dev * dev,u32 * sched_ctx,u32 tsar_ix,u32 max_rate,u32 bw_share)25 static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
26 			       u32 tsar_ix, u32 max_rate, u32 bw_share)
27 {
28 	u32 bitmask = 0;
29 
30 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
31 		return -EOPNOTSUPP;
32 
33 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
34 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
35 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
36 	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
37 
38 	return mlx5_modify_scheduling_element_cmd(dev,
39 						  SCHEDULING_HIERARCHY_E_SWITCH,
40 						  sched_ctx,
41 						  tsar_ix,
42 						  bitmask);
43 }
44 
esw_qos_group_config(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)45 static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
46 				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
47 {
48 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
49 	struct mlx5_core_dev *dev = esw->dev;
50 	int err;
51 
52 	err = esw_qos_tsar_config(dev, sched_ctx,
53 				  group->tsar_ix,
54 				  max_rate, bw_share);
55 	if (err)
56 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
57 
58 	trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
59 
60 	return err;
61 }
62 
esw_qos_vport_config(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)63 static int esw_qos_vport_config(struct mlx5_eswitch *esw,
64 				struct mlx5_vport *vport,
65 				u32 max_rate, u32 bw_share,
66 				struct netlink_ext_ack *extack)
67 {
68 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
69 	struct mlx5_core_dev *dev = esw->dev;
70 	int err;
71 
72 	if (!vport->qos.enabled)
73 		return -EIO;
74 
75 	err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix,
76 				  max_rate, bw_share);
77 	if (err) {
78 		esw_warn(esw->dev,
79 			 "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
80 			 vport->vport, err);
81 		NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
82 		return err;
83 	}
84 
85 	trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
86 
87 	return 0;
88 }
89 
esw_qos_calculate_min_rate_divider(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,bool group_level)90 static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
91 					      struct mlx5_esw_rate_group *group,
92 					      bool group_level)
93 {
94 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
95 	struct mlx5_vport *evport;
96 	u32 max_guarantee = 0;
97 	unsigned long i;
98 
99 	if (group_level) {
100 		struct mlx5_esw_rate_group *group;
101 
102 		list_for_each_entry(group, &esw->qos.groups, list) {
103 			if (group->min_rate < max_guarantee)
104 				continue;
105 			max_guarantee = group->min_rate;
106 		}
107 	} else {
108 		mlx5_esw_for_each_vport(esw, i, evport) {
109 			if (!evport->enabled || !evport->qos.enabled ||
110 			    evport->qos.group != group || evport->qos.min_rate < max_guarantee)
111 				continue;
112 			max_guarantee = evport->qos.min_rate;
113 		}
114 	}
115 
116 	if (max_guarantee)
117 		return max_t(u32, max_guarantee / fw_max_bw_share, 1);
118 
119 	/* If vports min rate divider is 0 but their group has bw_share configured, then
120 	 * need to set bw_share for vports to minimal value.
121 	 */
122 	if (!group_level && !max_guarantee && group && group->bw_share)
123 		return 1;
124 	return 0;
125 }
126 
esw_qos_calc_bw_share(u32 min_rate,u32 divider,u32 fw_max)127 static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
128 {
129 	if (divider)
130 		return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
131 
132 	return 0;
133 }
134 
esw_qos_normalize_vports_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)135 static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
136 					     struct mlx5_esw_rate_group *group,
137 					     struct netlink_ext_ack *extack)
138 {
139 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
140 	u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
141 	struct mlx5_vport *evport;
142 	unsigned long i;
143 	u32 bw_share;
144 	int err;
145 
146 	mlx5_esw_for_each_vport(esw, i, evport) {
147 		if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
148 			continue;
149 		bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
150 
151 		if (bw_share == evport->qos.bw_share)
152 			continue;
153 
154 		err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
155 		if (err)
156 			return err;
157 
158 		evport->qos.bw_share = bw_share;
159 	}
160 
161 	return 0;
162 }
163 
esw_qos_normalize_groups_min_rate(struct mlx5_eswitch * esw,u32 divider,struct netlink_ext_ack * extack)164 static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
165 					     struct netlink_ext_ack *extack)
166 {
167 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
168 	struct mlx5_esw_rate_group *group;
169 	u32 bw_share;
170 	int err;
171 
172 	list_for_each_entry(group, &esw->qos.groups, list) {
173 		bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
174 
175 		if (bw_share == group->bw_share)
176 			continue;
177 
178 		err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
179 		if (err)
180 			return err;
181 
182 		group->bw_share = bw_share;
183 
184 		/* All the group's vports need to be set with default bw_share
185 		 * to enable them with QOS
186 		 */
187 		err = esw_qos_normalize_vports_min_rate(esw, group, extack);
188 
189 		if (err)
190 			return err;
191 	}
192 
193 	return 0;
194 }
195 
esw_qos_set_vport_min_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 min_rate,struct netlink_ext_ack * extack)196 static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
197 				      u32 min_rate, struct netlink_ext_ack *extack)
198 {
199 	u32 fw_max_bw_share, previous_min_rate;
200 	bool min_rate_supported;
201 	int err;
202 
203 	lockdep_assert_held(&esw->state_lock);
204 	fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
205 	min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
206 				fw_max_bw_share >= MLX5_MIN_BW_SHARE;
207 	if (min_rate && !min_rate_supported)
208 		return -EOPNOTSUPP;
209 	if (min_rate == evport->qos.min_rate)
210 		return 0;
211 
212 	previous_min_rate = evport->qos.min_rate;
213 	evport->qos.min_rate = min_rate;
214 	err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
215 	if (err)
216 		evport->qos.min_rate = previous_min_rate;
217 
218 	return err;
219 }
220 
esw_qos_set_vport_max_rate(struct mlx5_eswitch * esw,struct mlx5_vport * evport,u32 max_rate,struct netlink_ext_ack * extack)221 static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport,
222 				      u32 max_rate, struct netlink_ext_ack *extack)
223 {
224 	u32 act_max_rate = max_rate;
225 	bool max_rate_supported;
226 	int err;
227 
228 	lockdep_assert_held(&esw->state_lock);
229 	max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
230 
231 	if (max_rate && !max_rate_supported)
232 		return -EOPNOTSUPP;
233 	if (max_rate == evport->qos.max_rate)
234 		return 0;
235 
236 	/* If parent group has rate limit need to set to group
237 	 * value when new max rate is 0.
238 	 */
239 	if (evport->qos.group && !max_rate)
240 		act_max_rate = evport->qos.group->max_rate;
241 
242 	err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
243 
244 	if (!err)
245 		evport->qos.max_rate = max_rate;
246 
247 	return err;
248 }
249 
esw_qos_set_group_min_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 min_rate,struct netlink_ext_ack * extack)250 static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
251 				      u32 min_rate, struct netlink_ext_ack *extack)
252 {
253 	u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
254 	struct mlx5_core_dev *dev = esw->dev;
255 	u32 previous_min_rate, divider;
256 	int err;
257 
258 	if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
259 		return -EOPNOTSUPP;
260 
261 	if (min_rate == group->min_rate)
262 		return 0;
263 
264 	previous_min_rate = group->min_rate;
265 	group->min_rate = min_rate;
266 	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
267 	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
268 	if (err) {
269 		group->min_rate = previous_min_rate;
270 		NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
271 
272 		/* Attempt restoring previous configuration */
273 		divider = esw_qos_calculate_min_rate_divider(esw, group, true);
274 		if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
275 			NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
276 	}
277 
278 	return err;
279 }
280 
esw_qos_set_group_max_rate(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,u32 max_rate,struct netlink_ext_ack * extack)281 static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
282 				      struct mlx5_esw_rate_group *group,
283 				      u32 max_rate, struct netlink_ext_ack *extack)
284 {
285 	struct mlx5_vport *vport;
286 	unsigned long i;
287 	int err;
288 
289 	if (group->max_rate == max_rate)
290 		return 0;
291 
292 	err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
293 	if (err)
294 		return err;
295 
296 	group->max_rate = max_rate;
297 
298 	/* Any unlimited vports in the group should be set
299 	 * with the value of the group.
300 	 */
301 	mlx5_esw_for_each_vport(esw, i, vport) {
302 		if (!vport->enabled || !vport->qos.enabled ||
303 		    vport->qos.group != group || vport->qos.max_rate)
304 			continue;
305 
306 		err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
307 		if (err)
308 			NL_SET_ERR_MSG_MOD(extack,
309 					   "E-Switch vport implicit rate limit setting failed");
310 	}
311 
312 	return err;
313 }
314 
esw_qos_element_type_supported(struct mlx5_core_dev * dev,int type)315 static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
316 {
317 	switch (type) {
318 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
319 		return MLX5_CAP_QOS(dev, esw_element_type) &
320 		       ELEMENT_TYPE_CAP_MASK_TSAR;
321 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
322 		return MLX5_CAP_QOS(dev, esw_element_type) &
323 		       ELEMENT_TYPE_CAP_MASK_VPORT;
324 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
325 		return MLX5_CAP_QOS(dev, esw_element_type) &
326 		       ELEMENT_TYPE_CAP_MASK_VPORT_TC;
327 	case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
328 		return MLX5_CAP_QOS(dev, esw_element_type) &
329 		       ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
330 	}
331 	return false;
332 }
333 
esw_qos_vport_create_sched_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share)334 static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
335 					      struct mlx5_vport *vport,
336 					      u32 max_rate, u32 bw_share)
337 {
338 	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
339 	struct mlx5_esw_rate_group *group = vport->qos.group;
340 	struct mlx5_core_dev *dev = esw->dev;
341 	u32 parent_tsar_ix;
342 	void *vport_elem;
343 	int err;
344 
345 	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT))
346 		return -EOPNOTSUPP;
347 
348 	parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
349 	MLX5_SET(scheduling_context, sched_ctx, element_type,
350 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
351 	vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
352 	MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
353 	MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
354 	MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
355 	MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
356 
357 	err = mlx5_create_scheduling_element_cmd(dev,
358 						 SCHEDULING_HIERARCHY_E_SWITCH,
359 						 sched_ctx,
360 						 &vport->qos.esw_tsar_ix);
361 	if (err) {
362 		esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
363 			 vport->vport, err);
364 		return err;
365 	}
366 
367 	return 0;
368 }
369 
esw_qos_update_group_scheduling_element(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * curr_group,struct mlx5_esw_rate_group * new_group,struct netlink_ext_ack * extack)370 static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
371 						   struct mlx5_vport *vport,
372 						   struct mlx5_esw_rate_group *curr_group,
373 						   struct mlx5_esw_rate_group *new_group,
374 						   struct netlink_ext_ack *extack)
375 {
376 	u32 max_rate;
377 	int err;
378 
379 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
380 						  SCHEDULING_HIERARCHY_E_SWITCH,
381 						  vport->qos.esw_tsar_ix);
382 	if (err) {
383 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
384 		return err;
385 	}
386 
387 	vport->qos.group = new_group;
388 	max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
389 
390 	/* If vport is unlimited, we set the group's value.
391 	 * Therefore, if the group is limited it will apply to
392 	 * the vport as well and if not, vport will remain unlimited.
393 	 */
394 	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
395 	if (err) {
396 		NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
397 		goto err_sched;
398 	}
399 
400 	return 0;
401 
402 err_sched:
403 	vport->qos.group = curr_group;
404 	max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
405 	if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
406 		esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
407 			 vport->vport);
408 
409 	return err;
410 }
411 
esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)412 static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
413 				      struct mlx5_vport *vport,
414 				      struct mlx5_esw_rate_group *group,
415 				      struct netlink_ext_ack *extack)
416 {
417 	struct mlx5_esw_rate_group *new_group, *curr_group;
418 	int err;
419 
420 	if (!vport->enabled)
421 		return -EINVAL;
422 
423 	curr_group = vport->qos.group;
424 	new_group = group ?: esw->qos.group0;
425 	if (curr_group == new_group)
426 		return 0;
427 
428 	err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
429 	if (err)
430 		return err;
431 
432 	/* Recalculate bw share weights of old and new groups */
433 	if (vport->qos.bw_share || new_group->bw_share) {
434 		esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
435 		esw_qos_normalize_vports_min_rate(esw, new_group, extack);
436 	}
437 
438 	return 0;
439 }
440 
441 static struct mlx5_esw_rate_group *
__esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)442 __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
443 {
444 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
445 	struct mlx5_esw_rate_group *group;
446 	__be32 *attr;
447 	u32 divider;
448 	int err;
449 
450 	group = kzalloc(sizeof(*group), GFP_KERNEL);
451 	if (!group)
452 		return ERR_PTR(-ENOMEM);
453 
454 	MLX5_SET(scheduling_context, tsar_ctx, element_type,
455 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
456 
457 	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
458 	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
459 
460 	MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
461 		 esw->qos.root_tsar_ix);
462 	err = mlx5_create_scheduling_element_cmd(esw->dev,
463 						 SCHEDULING_HIERARCHY_E_SWITCH,
464 						 tsar_ctx,
465 						 &group->tsar_ix);
466 	if (err) {
467 		NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
468 		goto err_sched_elem;
469 	}
470 
471 	list_add_tail(&group->list, &esw->qos.groups);
472 
473 	divider = esw_qos_calculate_min_rate_divider(esw, group, true);
474 	if (divider) {
475 		err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
476 		if (err) {
477 			NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
478 			goto err_min_rate;
479 		}
480 	}
481 	trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
482 
483 	return group;
484 
485 err_min_rate:
486 	list_del(&group->list);
487 	if (mlx5_destroy_scheduling_element_cmd(esw->dev,
488 						SCHEDULING_HIERARCHY_E_SWITCH,
489 						group->tsar_ix))
490 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
491 err_sched_elem:
492 	kfree(group);
493 	return ERR_PTR(err);
494 }
495 
496 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack);
497 static void esw_qos_put(struct mlx5_eswitch *esw);
498 
499 static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)500 esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
501 {
502 	struct mlx5_esw_rate_group *group;
503 	int err;
504 
505 	if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
506 		return ERR_PTR(-EOPNOTSUPP);
507 
508 	err = esw_qos_get(esw, extack);
509 	if (err)
510 		return ERR_PTR(err);
511 
512 	group = __esw_qos_create_rate_group(esw, extack);
513 	if (IS_ERR(group))
514 		esw_qos_put(esw);
515 
516 	return group;
517 }
518 
__esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)519 static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
520 					struct mlx5_esw_rate_group *group,
521 					struct netlink_ext_ack *extack)
522 {
523 	u32 divider;
524 	int err;
525 
526 	list_del(&group->list);
527 
528 	divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
529 	err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
530 	if (err)
531 		NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
532 
533 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
534 						  SCHEDULING_HIERARCHY_E_SWITCH,
535 						  group->tsar_ix);
536 	if (err)
537 		NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
538 
539 	trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
540 
541 	kfree(group);
542 
543 	return err;
544 }
545 
esw_qos_destroy_rate_group(struct mlx5_eswitch * esw,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)546 static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
547 				      struct mlx5_esw_rate_group *group,
548 				      struct netlink_ext_ack *extack)
549 {
550 	int err;
551 
552 	err = __esw_qos_destroy_rate_group(esw, group, extack);
553 	esw_qos_put(esw);
554 
555 	return err;
556 }
557 
esw_qos_create(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)558 static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
559 {
560 	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
561 	struct mlx5_core_dev *dev = esw->dev;
562 	__be32 *attr;
563 	int err;
564 
565 	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
566 		return -EOPNOTSUPP;
567 
568 	if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) ||
569 	    !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
570 		return -EOPNOTSUPP;
571 
572 	MLX5_SET(scheduling_context, tsar_ctx, element_type,
573 		 SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
574 
575 	attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
576 	*attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
577 
578 	err = mlx5_create_scheduling_element_cmd(dev,
579 						 SCHEDULING_HIERARCHY_E_SWITCH,
580 						 tsar_ctx,
581 						 &esw->qos.root_tsar_ix);
582 	if (err) {
583 		esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
584 		return err;
585 	}
586 
587 	INIT_LIST_HEAD(&esw->qos.groups);
588 	if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
589 		esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
590 		if (IS_ERR(esw->qos.group0)) {
591 			esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
592 				 PTR_ERR(esw->qos.group0));
593 			err = PTR_ERR(esw->qos.group0);
594 			goto err_group0;
595 		}
596 	}
597 	refcount_set(&esw->qos.refcnt, 1);
598 
599 	return 0;
600 
601 err_group0:
602 	if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH,
603 						esw->qos.root_tsar_ix))
604 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n");
605 
606 	return err;
607 }
608 
esw_qos_destroy(struct mlx5_eswitch * esw)609 static void esw_qos_destroy(struct mlx5_eswitch *esw)
610 {
611 	int err;
612 
613 	if (esw->qos.group0)
614 		__esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
615 
616 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
617 						  SCHEDULING_HIERARCHY_E_SWITCH,
618 						  esw->qos.root_tsar_ix);
619 	if (err)
620 		esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
621 }
622 
esw_qos_get(struct mlx5_eswitch * esw,struct netlink_ext_ack * extack)623 static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
624 {
625 	int err = 0;
626 
627 	lockdep_assert_held(&esw->state_lock);
628 
629 	if (!refcount_inc_not_zero(&esw->qos.refcnt)) {
630 		/* esw_qos_create() set refcount to 1 only on success.
631 		 * No need to decrement on failure.
632 		 */
633 		err = esw_qos_create(esw, extack);
634 	}
635 
636 	return err;
637 }
638 
esw_qos_put(struct mlx5_eswitch * esw)639 static void esw_qos_put(struct mlx5_eswitch *esw)
640 {
641 	lockdep_assert_held(&esw->state_lock);
642 	if (refcount_dec_and_test(&esw->qos.refcnt))
643 		esw_qos_destroy(esw);
644 }
645 
esw_qos_vport_enable(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 bw_share,struct netlink_ext_ack * extack)646 static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
647 				u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
648 {
649 	int err;
650 
651 	lockdep_assert_held(&esw->state_lock);
652 	if (vport->qos.enabled)
653 		return 0;
654 
655 	err = esw_qos_get(esw, extack);
656 	if (err)
657 		return err;
658 
659 	vport->qos.group = esw->qos.group0;
660 
661 	err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
662 	if (err)
663 		goto err_out;
664 
665 	vport->qos.enabled = true;
666 	trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
667 
668 	return 0;
669 
670 err_out:
671 	esw_qos_put(esw);
672 
673 	return err;
674 }
675 
mlx5_esw_qos_vport_disable(struct mlx5_eswitch * esw,struct mlx5_vport * vport)676 void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
677 {
678 	int err;
679 
680 	lockdep_assert_held(&esw->state_lock);
681 	if (!vport->qos.enabled)
682 		return;
683 	WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
684 	     "Disabling QoS on port before detaching it from group");
685 
686 	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
687 						  SCHEDULING_HIERARCHY_E_SWITCH,
688 						  vport->qos.esw_tsar_ix);
689 	if (err)
690 		esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
691 			 vport->vport, err);
692 
693 	memset(&vport->qos, 0, sizeof(vport->qos));
694 	trace_mlx5_esw_vport_qos_destroy(vport);
695 
696 	esw_qos_put(esw);
697 }
698 
mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch * esw,struct mlx5_vport * vport,u32 max_rate,u32 min_rate)699 int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
700 				u32 max_rate, u32 min_rate)
701 {
702 	int err;
703 
704 	lockdep_assert_held(&esw->state_lock);
705 	err = esw_qos_vport_enable(esw, vport, 0, 0, NULL);
706 	if (err)
707 		return err;
708 
709 	err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL);
710 	if (!err)
711 		err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL);
712 
713 	return err;
714 }
715 
mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev * mdev)716 static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev)
717 {
718 	struct ethtool_link_ksettings lksettings;
719 	struct net_device *slave, *master;
720 	u32 speed = SPEED_UNKNOWN;
721 
722 	/* Lock ensures a stable reference to master and slave netdevice
723 	 * while port speed of master is queried.
724 	 */
725 	ASSERT_RTNL();
726 
727 	slave = mlx5_uplink_netdev_get(mdev);
728 	if (!slave)
729 		goto out;
730 
731 	master = netdev_master_upper_dev_get(slave);
732 	if (master && !__ethtool_get_link_ksettings(master, &lksettings))
733 		speed = lksettings.base.speed;
734 
735 out:
736 	return speed;
737 }
738 
mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev * mdev,u32 * link_speed_max,bool hold_rtnl_lock,struct netlink_ext_ack * extack)739 static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max,
740 					   bool hold_rtnl_lock, struct netlink_ext_ack *extack)
741 {
742 	int err;
743 
744 	if (!mlx5_lag_is_active(mdev))
745 		goto skip_lag;
746 
747 	if (hold_rtnl_lock)
748 		rtnl_lock();
749 
750 	*link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev);
751 
752 	if (hold_rtnl_lock)
753 		rtnl_unlock();
754 
755 	if (*link_speed_max != (u32)SPEED_UNKNOWN)
756 		return 0;
757 
758 skip_lag:
759 	err = mlx5_port_max_linkspeed(mdev, link_speed_max);
760 	if (err)
761 		NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
762 
763 	return err;
764 }
765 
mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev * mdev,const char * name,u32 link_speed_max,u64 value,struct netlink_ext_ack * extack)766 static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev,
767 					  const char *name, u32 link_speed_max,
768 					  u64 value, struct netlink_ext_ack *extack)
769 {
770 	if (value > link_speed_max) {
771 		pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
772 		       name, value, link_speed_max);
773 		NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
774 		return -EINVAL;
775 	}
776 
777 	return 0;
778 }
779 
mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch * esw,u16 vport_num,u32 rate_mbps)780 int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
781 {
782 	u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
783 	struct mlx5_vport *vport;
784 	u32 link_speed_max;
785 	u32 bitmask;
786 	int err;
787 
788 	vport = mlx5_eswitch_get_vport(esw, vport_num);
789 	if (IS_ERR(vport))
790 		return PTR_ERR(vport);
791 
792 	if (rate_mbps) {
793 		err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL);
794 		if (err)
795 			return err;
796 
797 		err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police",
798 						     link_speed_max, rate_mbps, NULL);
799 		if (err)
800 			return err;
801 	}
802 
803 	mutex_lock(&esw->state_lock);
804 	if (!vport->qos.enabled) {
805 		/* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */
806 		err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL);
807 	} else {
808 		MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
809 
810 		bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
811 		err = mlx5_modify_scheduling_element_cmd(esw->dev,
812 							 SCHEDULING_HIERARCHY_E_SWITCH,
813 							 ctx,
814 							 vport->qos.esw_tsar_ix,
815 							 bitmask);
816 	}
817 	mutex_unlock(&esw->state_lock);
818 
819 	return err;
820 }
821 
822 #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
823 
824 /* Converts bytes per second value passed in a pointer into megabits per
825  * second, rewriting last. If converted rate exceed link speed or is not a
826  * fraction of Mbps - returns error.
827  */
esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev * mdev,const char * name,u64 * rate,struct netlink_ext_ack * extack)828 static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
829 					u64 *rate, struct netlink_ext_ack *extack)
830 {
831 	u32 link_speed_max, remainder;
832 	u64 value;
833 	int err;
834 
835 	value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder);
836 	if (remainder) {
837 		pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
838 		       name, *rate);
839 		NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
840 		return -EINVAL;
841 	}
842 
843 	err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack);
844 	if (err)
845 		return err;
846 
847 	err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack);
848 	if (err)
849 		return err;
850 
851 	*rate = value;
852 	return 0;
853 }
854 
855 /* Eswitch devlink rate API */
856 
mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_share,struct netlink_ext_ack * extack)857 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
858 					    u64 tx_share, struct netlink_ext_ack *extack)
859 {
860 	struct mlx5_vport *vport = priv;
861 	struct mlx5_eswitch *esw;
862 	int err;
863 
864 	esw = vport->dev->priv.eswitch;
865 	if (!mlx5_esw_allowed(esw))
866 		return -EPERM;
867 
868 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
869 	if (err)
870 		return err;
871 
872 	mutex_lock(&esw->state_lock);
873 	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
874 	if (err)
875 		goto unlock;
876 
877 	err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
878 unlock:
879 	mutex_unlock(&esw->state_lock);
880 	return err;
881 }
882 
mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate * rate_leaf,void * priv,u64 tx_max,struct netlink_ext_ack * extack)883 int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
884 					  u64 tx_max, struct netlink_ext_ack *extack)
885 {
886 	struct mlx5_vport *vport = priv;
887 	struct mlx5_eswitch *esw;
888 	int err;
889 
890 	esw = vport->dev->priv.eswitch;
891 	if (!mlx5_esw_allowed(esw))
892 		return -EPERM;
893 
894 	err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
895 	if (err)
896 		return err;
897 
898 	mutex_lock(&esw->state_lock);
899 	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
900 	if (err)
901 		goto unlock;
902 
903 	err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
904 unlock:
905 	mutex_unlock(&esw->state_lock);
906 	return err;
907 }
908 
mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate * rate_node,void * priv,u64 tx_share,struct netlink_ext_ack * extack)909 int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
910 					    u64 tx_share, struct netlink_ext_ack *extack)
911 {
912 	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
913 	struct mlx5_eswitch *esw = dev->priv.eswitch;
914 	struct mlx5_esw_rate_group *group = priv;
915 	int err;
916 
917 	err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
918 	if (err)
919 		return err;
920 
921 	mutex_lock(&esw->state_lock);
922 	err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
923 	mutex_unlock(&esw->state_lock);
924 	return err;
925 }
926 
mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate * rate_node,void * priv,u64 tx_max,struct netlink_ext_ack * extack)927 int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
928 					  u64 tx_max, struct netlink_ext_ack *extack)
929 {
930 	struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
931 	struct mlx5_eswitch *esw = dev->priv.eswitch;
932 	struct mlx5_esw_rate_group *group = priv;
933 	int err;
934 
935 	err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
936 	if (err)
937 		return err;
938 
939 	mutex_lock(&esw->state_lock);
940 	err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
941 	mutex_unlock(&esw->state_lock);
942 	return err;
943 }
944 
mlx5_esw_devlink_rate_node_new(struct devlink_rate * rate_node,void ** priv,struct netlink_ext_ack * extack)945 int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
946 				   struct netlink_ext_ack *extack)
947 {
948 	struct mlx5_esw_rate_group *group;
949 	struct mlx5_eswitch *esw;
950 	int err = 0;
951 
952 	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
953 	if (IS_ERR(esw))
954 		return PTR_ERR(esw);
955 
956 	mutex_lock(&esw->state_lock);
957 	if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
958 		NL_SET_ERR_MSG_MOD(extack,
959 				   "Rate node creation supported only in switchdev mode");
960 		err = -EOPNOTSUPP;
961 		goto unlock;
962 	}
963 
964 	group = esw_qos_create_rate_group(esw, extack);
965 	if (IS_ERR(group)) {
966 		err = PTR_ERR(group);
967 		goto unlock;
968 	}
969 
970 	*priv = group;
971 unlock:
972 	mutex_unlock(&esw->state_lock);
973 	return err;
974 }
975 
mlx5_esw_devlink_rate_node_del(struct devlink_rate * rate_node,void * priv,struct netlink_ext_ack * extack)976 int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
977 				   struct netlink_ext_ack *extack)
978 {
979 	struct mlx5_esw_rate_group *group = priv;
980 	struct mlx5_eswitch *esw;
981 	int err;
982 
983 	esw = mlx5_devlink_eswitch_get(rate_node->devlink);
984 	if (IS_ERR(esw))
985 		return PTR_ERR(esw);
986 
987 	mutex_lock(&esw->state_lock);
988 	err = esw_qos_destroy_rate_group(esw, group, extack);
989 	mutex_unlock(&esw->state_lock);
990 	return err;
991 }
992 
mlx5_esw_qos_vport_update_group(struct mlx5_eswitch * esw,struct mlx5_vport * vport,struct mlx5_esw_rate_group * group,struct netlink_ext_ack * extack)993 int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
994 				    struct mlx5_vport *vport,
995 				    struct mlx5_esw_rate_group *group,
996 				    struct netlink_ext_ack *extack)
997 {
998 	int err = 0;
999 
1000 	mutex_lock(&esw->state_lock);
1001 	if (!vport->qos.enabled && !group)
1002 		goto unlock;
1003 
1004 	err = esw_qos_vport_enable(esw, vport, 0, 0, extack);
1005 	if (!err)
1006 		err = esw_qos_vport_update_group(esw, vport, group, extack);
1007 unlock:
1008 	mutex_unlock(&esw->state_lock);
1009 	return err;
1010 }
1011 
mlx5_esw_devlink_rate_parent_set(struct devlink_rate * devlink_rate,struct devlink_rate * parent,void * priv,void * parent_priv,struct netlink_ext_ack * extack)1012 int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
1013 				     struct devlink_rate *parent,
1014 				     void *priv, void *parent_priv,
1015 				     struct netlink_ext_ack *extack)
1016 {
1017 	struct mlx5_esw_rate_group *group;
1018 	struct mlx5_vport *vport = priv;
1019 
1020 	if (!parent)
1021 		return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
1022 						       vport, NULL, extack);
1023 
1024 	group = parent_priv;
1025 	return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
1026 }
1027