// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ #include "eswitch.h" #include "lib/mlx5.h" #include "esw/qos.h" #include "en/port.h" #define CREATE_TRACE_POINTS #include "diag/qos_tracepoint.h" /* Minimum supported BW share value by the HW is 1 Mbit/sec */ #define MLX5_MIN_BW_SHARE 1 #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) struct mlx5_esw_rate_group { u32 tsar_ix; u32 max_rate; u32 min_rate; u32 bw_share; struct list_head list; }; static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, u32 tsar_ix, u32 max_rate, u32 bw_share) { u32 bitmask = 0; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; return mlx5_modify_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, sched_ctx, tsar_ix, bitmask); } static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; int err; err = esw_qos_tsar_config(dev, sched_ctx, group->tsar_ix, max_rate, bw_share); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); return err; } static int esw_qos_vport_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; int err; if (!vport->qos.enabled) return -EIO; err = esw_qos_tsar_config(dev, sched_ctx, vport->qos.esw_tsar_ix, max_rate, bw_share); if (err) { esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", vport->vport, err); NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); return err; } trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); return 0; } static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, bool group_level) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); struct mlx5_vport *evport; u32 max_guarantee = 0; unsigned long i; if (group_level) { struct mlx5_esw_rate_group *group; list_for_each_entry(group, &esw->qos.groups, list) { if (group->min_rate < max_guarantee) continue; max_guarantee = group->min_rate; } } else { mlx5_esw_for_each_vport(esw, i, evport) { if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group || evport->qos.min_rate < max_guarantee) continue; max_guarantee = evport->qos.min_rate; } } if (max_guarantee) return max_t(u32, max_guarantee / fw_max_bw_share, 1); /* If vports min rate divider is 0 but their group has bw_share configured, then * need to set bw_share for vports to minimal value. */ if (!group_level && !max_guarantee && group && group->bw_share) return 1; return 0; } static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) { if (divider) return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); return 0; } static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); struct mlx5_vport *evport; unsigned long i; u32 bw_share; int err; mlx5_esw_for_each_vport(esw, i, evport) { if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) continue; bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); if (bw_share == evport->qos.bw_share) continue; err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); if (err) return err; evport->qos.bw_share = bw_share; } return 0; } static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, struct netlink_ext_ack *extack) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); struct mlx5_esw_rate_group *group; u32 bw_share; int err; list_for_each_entry(group, &esw->qos.groups, list) { bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); if (bw_share == group->bw_share) continue; err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); if (err) return err; group->bw_share = bw_share; /* All the group's vports need to be set with default bw_share * to enable them with QOS */ err = esw_qos_normalize_vports_min_rate(esw, group, extack); if (err) return err; } return 0; } static int esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, u32 min_rate, struct netlink_ext_ack *extack) { u32 fw_max_bw_share, previous_min_rate; bool min_rate_supported; int err; lockdep_assert_held(&esw->state_lock); fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE; if (min_rate && !min_rate_supported) return -EOPNOTSUPP; if (min_rate == evport->qos.min_rate) return 0; previous_min_rate = evport->qos.min_rate; evport->qos.min_rate = min_rate; err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); if (err) evport->qos.min_rate = previous_min_rate; return err; } static int esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, struct mlx5_vport *evport, u32 max_rate, struct netlink_ext_ack *extack) { u32 act_max_rate = max_rate; bool max_rate_supported; int err; lockdep_assert_held(&esw->state_lock); max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); if (max_rate && !max_rate_supported) return -EOPNOTSUPP; if (max_rate == evport->qos.max_rate) return 0; /* If parent group has rate limit need to set to group * value when new max rate is 0. */ if (evport->qos.group && !max_rate) act_max_rate = evport->qos.group->max_rate; err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); if (!err) evport->qos.max_rate = max_rate; return err; } static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, u32 min_rate, struct netlink_ext_ack *extack) { u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); struct mlx5_core_dev *dev = esw->dev; u32 previous_min_rate, divider; int err; if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) return -EOPNOTSUPP; if (min_rate == group->min_rate) return 0; previous_min_rate = group->min_rate; group->min_rate = min_rate; divider = esw_qos_calculate_min_rate_divider(esw, group, true); err = esw_qos_normalize_groups_min_rate(esw, divider, extack); if (err) { group->min_rate = previous_min_rate; NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); /* Attempt restoring previous configuration */ divider = esw_qos_calculate_min_rate_divider(esw, group, true); if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); } return err; } static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, u32 max_rate, struct netlink_ext_ack *extack) { struct mlx5_vport *vport; unsigned long i; int err; if (group->max_rate == max_rate) return 0; err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); if (err) return err; group->max_rate = max_rate; /* Any unlimited vports in the group should be set * with the value of the group. */ mlx5_esw_for_each_vport(esw, i, vport) { if (!vport->enabled || !vport->qos.enabled || vport->qos.group != group || vport->qos.max_rate) continue; err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch vport implicit rate limit setting failed"); } return err; } static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) { switch (type) { case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: return MLX5_CAP_QOS(dev, esw_element_type) & ELEMENT_TYPE_CAP_MASK_TSAR; case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: return MLX5_CAP_QOS(dev, esw_element_type) & ELEMENT_TYPE_CAP_MASK_VPORT; case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: return MLX5_CAP_QOS(dev, esw_element_type) & ELEMENT_TYPE_CAP_MASK_VPORT_TC; case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: return MLX5_CAP_QOS(dev, esw_element_type) & ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; } return false; } static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u32 max_rate, u32 bw_share) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group = vport->qos.group; struct mlx5_core_dev *dev = esw->dev; u32 parent_tsar_ix; void *vport_elem; int err; if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT)) return -EOPNOTSUPP; parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, sched_ctx, &vport->qos.esw_tsar_ix); if (err) { esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", vport->vport, err); return err; } return 0; } static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, struct mlx5_vport *vport, struct mlx5_esw_rate_group *curr_group, struct mlx5_esw_rate_group *new_group, struct netlink_ext_ack *extack) { u32 max_rate; int err; err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, vport->qos.esw_tsar_ix); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); return err; } vport->qos.group = new_group; max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; /* If vport is unlimited, we set the group's value. * Therefore, if the group is limited it will apply to * the vport as well and if not, vport will remain unlimited. */ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); goto err_sched; } return 0; err_sched: vport->qos.group = curr_group; max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", vport->vport); return err; } static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, struct mlx5_vport *vport, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { struct mlx5_esw_rate_group *new_group, *curr_group; int err; if (!vport->enabled) return -EINVAL; curr_group = vport->qos.group; new_group = group ?: esw->qos.group0; if (curr_group == new_group) return 0; err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); if (err) return err; /* Recalculate bw share weights of old and new groups */ if (vport->qos.bw_share || new_group->bw_share) { esw_qos_normalize_vports_min_rate(esw, curr_group, extack); esw_qos_normalize_vports_min_rate(esw, new_group, extack); } return 0; } static struct mlx5_esw_rate_group * __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group; __be32 *attr; u32 divider; int err; group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM); MLX5_SET(scheduling_context, tsar_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, esw->qos.root_tsar_ix); err = mlx5_create_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, tsar_ctx, &group->tsar_ix); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); goto err_sched_elem; } list_add_tail(&group->list, &esw->qos.groups); divider = esw_qos_calculate_min_rate_divider(esw, group, true); if (divider) { err = esw_qos_normalize_groups_min_rate(esw, divider, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); goto err_min_rate; } } trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); return group; err_min_rate: list_del(&group->list); if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, group->tsar_ix)) NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); err_sched_elem: kfree(group); return ERR_PTR(err); } static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); static void esw_qos_put(struct mlx5_eswitch *esw); static struct mlx5_esw_rate_group * esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { struct mlx5_esw_rate_group *group; int err; if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) return ERR_PTR(-EOPNOTSUPP); err = esw_qos_get(esw, extack); if (err) return ERR_PTR(err); group = __esw_qos_create_rate_group(esw, extack); if (IS_ERR(group)) esw_qos_put(esw); return group; } static int __esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { u32 divider; int err; list_del(&group->list); divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); err = esw_qos_normalize_groups_min_rate(esw, divider, extack); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, group->tsar_ix); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); kfree(group); return err; } static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { int err; err = __esw_qos_destroy_rate_group(esw, group, extack); esw_qos_put(esw); return err; } static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; __be32 *attr; int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return -EOPNOTSUPP; if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) || !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, tsar_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, tsar_ctx, &esw->qos.root_tsar_ix); if (err) { esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); return err; } INIT_LIST_HEAD(&esw->qos.groups); if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); if (IS_ERR(esw->qos.group0)) { esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", PTR_ERR(esw->qos.group0)); err = PTR_ERR(esw->qos.group0); goto err_group0; } } refcount_set(&esw->qos.refcnt, 1); return 0; err_group0: if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, esw->qos.root_tsar_ix)) esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); return err; } static void esw_qos_destroy(struct mlx5_eswitch *esw) { int err; if (esw->qos.group0) __esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, esw->qos.root_tsar_ix); if (err) esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); } static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { int err = 0; lockdep_assert_held(&esw->state_lock); if (!refcount_inc_not_zero(&esw->qos.refcnt)) { /* esw_qos_create() set refcount to 1 only on success. * No need to decrement on failure. */ err = esw_qos_create(esw, extack); } return err; } static void esw_qos_put(struct mlx5_eswitch *esw) { lockdep_assert_held(&esw->state_lock); if (refcount_dec_and_test(&esw->qos.refcnt)) esw_qos_destroy(esw); } static int esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { int err; lockdep_assert_held(&esw->state_lock); if (vport->qos.enabled) return 0; err = esw_qos_get(esw, extack); if (err) return err; vport->qos.group = esw->qos.group0; err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); if (err) goto err_out; vport->qos.enabled = true; trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); return 0; err_out: esw_qos_put(esw); return err; } void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { int err; lockdep_assert_held(&esw->state_lock); if (!vport->qos.enabled) return; WARN(vport->qos.group && vport->qos.group != esw->qos.group0, "Disabling QoS on port before detaching it from group"); err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, vport->qos.esw_tsar_ix); if (err) esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", vport->vport, err); memset(&vport->qos, 0, sizeof(vport->qos)); trace_mlx5_esw_vport_qos_destroy(vport); esw_qos_put(esw); } int mlx5_esw_qos_set_vport_rate(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u32 max_rate, u32 min_rate) { int err; lockdep_assert_held(&esw->state_lock); err = esw_qos_vport_enable(esw, vport, 0, 0, NULL); if (err) return err; err = esw_qos_set_vport_min_rate(esw, vport, min_rate, NULL); if (!err) err = esw_qos_set_vport_max_rate(esw, vport, max_rate, NULL); return err; } static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) { struct ethtool_link_ksettings lksettings; struct net_device *slave, *master; u32 speed = SPEED_UNKNOWN; /* Lock ensures a stable reference to master and slave netdevice * while port speed of master is queried. */ ASSERT_RTNL(); slave = mlx5_uplink_netdev_get(mdev); if (!slave) goto out; master = netdev_master_upper_dev_get(slave); if (master && !__ethtool_get_link_ksettings(master, &lksettings)) speed = lksettings.base.speed; out: return speed; } static int mlx5_esw_qos_max_link_speed_get(struct mlx5_core_dev *mdev, u32 *link_speed_max, bool hold_rtnl_lock, struct netlink_ext_ack *extack) { int err; if (!mlx5_lag_is_active(mdev)) goto skip_lag; if (hold_rtnl_lock) rtnl_lock(); *link_speed_max = mlx5_esw_qos_lag_link_speed_get_locked(mdev); if (hold_rtnl_lock) rtnl_unlock(); if (*link_speed_max != (u32)SPEED_UNKNOWN) return 0; skip_lag: err = mlx5_port_max_linkspeed(mdev, link_speed_max); if (err) NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); return err; } static int mlx5_esw_qos_link_speed_verify(struct mlx5_core_dev *mdev, const char *name, u32 link_speed_max, u64 value, struct netlink_ext_ack *extack) { if (value > link_speed_max) { pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", name, value, link_speed_max); NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); return -EINVAL; } return 0; } int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) { u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_vport *vport; u32 link_speed_max; u32 bitmask; int err; vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return PTR_ERR(vport); if (rate_mbps) { err = mlx5_esw_qos_max_link_speed_get(esw->dev, &link_speed_max, false, NULL); if (err) return err; err = mlx5_esw_qos_link_speed_verify(esw->dev, "Police", link_speed_max, rate_mbps, NULL); if (err) return err; } mutex_lock(&esw->state_lock); if (!vport->qos.enabled) { /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ err = esw_qos_vport_enable(esw, vport, rate_mbps, vport->qos.bw_share, NULL); } else { MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; err = mlx5_modify_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx, vport->qos.esw_tsar_ix, bitmask); } mutex_unlock(&esw->state_lock); return err; } #define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ /* Converts bytes per second value passed in a pointer into megabits per * second, rewriting last. If converted rate exceed link speed or is not a * fraction of Mbps - returns error. */ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, u64 *rate, struct netlink_ext_ack *extack) { u32 link_speed_max, remainder; u64 value; int err; value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &remainder); if (remainder) { pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", name, *rate); NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); return -EINVAL; } err = mlx5_esw_qos_max_link_speed_get(mdev, &link_speed_max, true, extack); if (err) return err; err = mlx5_esw_qos_link_speed_verify(mdev, name, link_speed_max, value, extack); if (err) return err; *rate = value; return 0; } /* Eswitch devlink rate API */ int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, u64 tx_share, struct netlink_ext_ack *extack) { struct mlx5_vport *vport = priv; struct mlx5_eswitch *esw; int err; esw = vport->dev->priv.eswitch; if (!mlx5_esw_allowed(esw)) return -EPERM; err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); if (err) return err; mutex_lock(&esw->state_lock); err = esw_qos_vport_enable(esw, vport, 0, 0, extack); if (err) goto unlock; err = esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); unlock: mutex_unlock(&esw->state_lock); return err; } int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, u64 tx_max, struct netlink_ext_ack *extack) { struct mlx5_vport *vport = priv; struct mlx5_eswitch *esw; int err; esw = vport->dev->priv.eswitch; if (!mlx5_esw_allowed(esw)) return -EPERM; err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); if (err) return err; mutex_lock(&esw->state_lock); err = esw_qos_vport_enable(esw, vport, 0, 0, extack); if (err) goto unlock; err = esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); unlock: mutex_unlock(&esw->state_lock); return err; } int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, u64 tx_share, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_esw_rate_group *group = priv; int err; err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); if (err) return err; mutex_lock(&esw->state_lock); err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); mutex_unlock(&esw->state_lock); return err; } int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, u64 tx_max, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5_esw_rate_group *group = priv; int err; err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); if (err) return err; mutex_lock(&esw->state_lock); err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); mutex_unlock(&esw->state_lock); return err; } int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, struct netlink_ext_ack *extack) { struct mlx5_esw_rate_group *group; struct mlx5_eswitch *esw; int err = 0; esw = mlx5_devlink_eswitch_get(rate_node->devlink); if (IS_ERR(esw)) return PTR_ERR(esw); mutex_lock(&esw->state_lock); if (esw->mode != MLX5_ESWITCH_OFFLOADS) { NL_SET_ERR_MSG_MOD(extack, "Rate node creation supported only in switchdev mode"); err = -EOPNOTSUPP; goto unlock; } group = esw_qos_create_rate_group(esw, extack); if (IS_ERR(group)) { err = PTR_ERR(group); goto unlock; } *priv = group; unlock: mutex_unlock(&esw->state_lock); return err; } int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, struct netlink_ext_ack *extack) { struct mlx5_esw_rate_group *group = priv; struct mlx5_eswitch *esw; int err; esw = mlx5_devlink_eswitch_get(rate_node->devlink); if (IS_ERR(esw)) return PTR_ERR(esw); mutex_lock(&esw->state_lock); err = esw_qos_destroy_rate_group(esw, group, extack); mutex_unlock(&esw->state_lock); return err; } int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, struct mlx5_vport *vport, struct mlx5_esw_rate_group *group, struct netlink_ext_ack *extack) { int err = 0; mutex_lock(&esw->state_lock); if (!vport->qos.enabled && !group) goto unlock; err = esw_qos_vport_enable(esw, vport, 0, 0, extack); if (!err) err = esw_qos_vport_update_group(esw, vport, group, extack); unlock: mutex_unlock(&esw->state_lock); return err; } int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, struct devlink_rate *parent, void *priv, void *parent_priv, struct netlink_ext_ack *extack) { struct mlx5_esw_rate_group *group; struct mlx5_vport *vport = priv; if (!parent) return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, NULL, extack); group = parent_priv; return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); }