diff options
author | Paolo Abeni <pabeni@redhat.com> | 2024-10-21 14:26:52 +0300 |
---|---|---|
committer | Paolo Abeni <pabeni@redhat.com> | 2024-10-21 14:26:53 +0300 |
commit | 6f07cd8301706b661776074ddc97c991d107cc91 (patch) | |
tree | 955eba050fb4011315101cf1e5e4a82aa4e0ffb8 | |
parent | 7cb08476e19fb3d0dce618df7c11713434553e27 (diff) | |
parent | 7b919caaeb18f6d44133cd6f948a137af30daa4f (diff) | |
download | linux-6f07cd8301706b661776074ddc97c991d107cc91.tar.xz |
Merge branch 'net-mlx5-refactor-esw-qos-to-support-generalized-operations'
Tariq Toukan says:
====================
net/mlx5: Refactor esw QoS to support generalized operations
This patch series from the team to mlx5 core driver consists of one main
QoS part followed by small misc patches.
This main part (patches 1 to 11) by Carolina refactors the QoS handling
to generalize operations on scheduling groups and vports. These changes
are necessary to support new features that will extend group
functionality, introduce new group types, and support deeper
hierarchies.
Additionally, this refactor updates the terminology from "group" to
"node" to better reflect the hardware’s rate hierarchy and its use
of scheduling element nodes.
Simplify group scheduling element creation:
- net/mlx5: Refactor QoS group scheduling element creation
Refactor to support generalized operations for QoS:
- net/mlx5: Introduce node type to rate group structure
- net/mlx5: Add parent group support in rate group structure
- net/mlx5: Restrict domain list insertion to root TSAR ancestors
- net/mlx5: Rename vport QoS group reference to parent
- net/mlx5: Introduce node struct and rename group terminology to node
- net/mlx5: Refactor vport scheduling element creation function
- net/mlx5: Refactor vport QoS to use scheduling node structure
- net/mlx5: Remove vport QoS enabled flag
Support generalized operations for QoS elements:
- net/mlx5: Simplify QoS scheduling element configuration
- net/mlx5: Generalize QoS operations for nodes and vports
On top, patch 12 by Moshe handles FW request to move to drop mode.
In patch 13, Benjamin Poirier removes an empty eswitch flow table when
not used, which improves packet processing performance.
Patches 14 and 15 by Moshe are small field renamings as preparation for
future fields addition to these structures.
Series generated against:
commit c531f2269a53 ("net: bcmasp: enable SW timestamping")
====================
Link: https://patch.msgid.link/20241016173617.217736-1-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
11 files changed, 435 insertions, 398 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c index 1c062a2e8996..45737d039252 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -318,7 +318,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, } actions[num_actions++] = smfs_rule->count_action; - actions[num_actions++] = attr->modify_hdr->action.dr_action; + actions[num_actions++] = attr->modify_hdr->fs_dr_action.dr_action; actions[num_actions++] = fs_smfs->fwd_action; nat = (attr->ft == fs_smfs->ct_nat); @@ -379,7 +379,7 @@ static int mlx5_ct_fs_smfs_ct_rule_update(struct mlx5_ct_fs *fs, struct mlx5_ct_ struct mlx5dr_rule *rule; actions[0] = smfs_rule->count_action; - actions[1] = attr->modify_hdr->action.dr_action; + actions[1] = attr->modify_hdr->fs_dr_action.dr_action; actions[2] = fs_smfs->fwd_action; rule = mlx5_smfs_rule_create(smfs_rule->smfs_matcher->dr_matcher, spec, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 86af1891395f..d0f38818363f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -195,7 +195,7 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_vport *vport) return; dl_port = vport->dl_port; - mlx5_esw_qos_vport_update_group(vport, NULL, NULL); + mlx5_esw_qos_vport_update_node(vport, NULL, NULL); devl_rate_leaf_destroy(&dl_port->dl_port); devl_port_unregister(&dl_port->dl_port); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h index 645bad0d625f..43550a416a6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h @@ -9,6 +9,7 @@ #include <linux/tracepoint.h> #include "eswitch.h" +#include "qos.h" TRACE_EVENT(mlx5_esw_vport_qos_destroy, TP_PROTO(const struct mlx5_core_dev *dev, const struct mlx5_vport *vport), @@ -19,7 +20,7 @@ TRACE_EVENT(mlx5_esw_vport_qos_destroy, ), TP_fast_assign(__assign_str(devname); __entry->vport_id = vport->vport; - __entry->sched_elem_ix = vport->qos.esw_sched_elem_ix; + __entry->sched_elem_ix = mlx5_esw_qos_vport_get_sched_elem_ix(vport); ), TP_printk("(%s) vport=%hu sched_elem_ix=%u\n", __get_str(devname), __entry->vport_id, __entry->sched_elem_ix @@ -35,18 +36,18 @@ DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, __field(unsigned int, sched_elem_ix) __field(unsigned int, bw_share) __field(unsigned int, max_rate) - __field(void *, group) + __field(void *, parent) ), TP_fast_assign(__assign_str(devname); __entry->vport_id = vport->vport; - __entry->sched_elem_ix = vport->qos.esw_sched_elem_ix; + __entry->sched_elem_ix = mlx5_esw_qos_vport_get_sched_elem_ix(vport); __entry->bw_share = bw_share; __entry->max_rate = max_rate; - __entry->group = vport->qos.group; + __entry->parent = mlx5_esw_qos_vport_get_parent(vport); ), - TP_printk("(%s) vport=%hu sched_elem_ix=%u bw_share=%u, max_rate=%u group=%p\n", + TP_printk("(%s) vport=%hu sched_elem_ix=%u bw_share=%u, max_rate=%u parent=%p\n", __get_str(devname), __entry->vport_id, __entry->sched_elem_ix, - __entry->bw_share, __entry->max_rate, __entry->group + __entry->bw_share, __entry->max_rate, __entry->parent ) ); @@ -62,57 +63,57 @@ DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config, TP_ARGS(dev, vport, bw_share, max_rate) ); -DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template, +DECLARE_EVENT_CLASS(mlx5_esw_node_qos_template, TP_PROTO(const struct mlx5_core_dev *dev, - const struct mlx5_esw_rate_group *group, + const struct mlx5_esw_sched_node *node, unsigned int tsar_ix), - TP_ARGS(dev, group, tsar_ix), + TP_ARGS(dev, node, tsar_ix), TP_STRUCT__entry(__string(devname, dev_name(dev->device)) - __field(const void *, group) + __field(const void *, node) __field(unsigned int, tsar_ix) ), TP_fast_assign(__assign_str(devname); - __entry->group = group; + __entry->node = node; __entry->tsar_ix = tsar_ix; ), - TP_printk("(%s) group=%p tsar_ix=%u\n", - __get_str(devname), __entry->group, __entry->tsar_ix + TP_printk("(%s) node=%p tsar_ix=%u\n", + __get_str(devname), __entry->node, __entry->tsar_ix ) ); -DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create, +DEFINE_EVENT(mlx5_esw_node_qos_template, mlx5_esw_node_qos_create, TP_PROTO(const struct mlx5_core_dev *dev, - const struct mlx5_esw_rate_group *group, + const struct mlx5_esw_sched_node *node, unsigned int tsar_ix), - TP_ARGS(dev, group, tsar_ix) + TP_ARGS(dev, node, tsar_ix) ); -DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy, +DEFINE_EVENT(mlx5_esw_node_qos_template, mlx5_esw_node_qos_destroy, TP_PROTO(const struct mlx5_core_dev *dev, - const struct mlx5_esw_rate_group *group, + const struct mlx5_esw_sched_node *node, unsigned int tsar_ix), - TP_ARGS(dev, group, tsar_ix) + TP_ARGS(dev, node, tsar_ix) ); -TRACE_EVENT(mlx5_esw_group_qos_config, +TRACE_EVENT(mlx5_esw_node_qos_config, TP_PROTO(const struct mlx5_core_dev *dev, - const struct mlx5_esw_rate_group *group, + const struct mlx5_esw_sched_node *node, unsigned int tsar_ix, u32 bw_share, u32 max_rate), - TP_ARGS(dev, group, tsar_ix, bw_share, max_rate), + TP_ARGS(dev, node, tsar_ix, bw_share, max_rate), TP_STRUCT__entry(__string(devname, dev_name(dev->device)) - __field(const void *, group) + __field(const void *, node) __field(unsigned int, tsar_ix) __field(unsigned int, bw_share) __field(unsigned int, max_rate) ), TP_fast_assign(__assign_str(devname); - __entry->group = group; + __entry->node = node; __entry->tsar_ix = tsar_ix; __entry->bw_share = bw_share; __entry->max_rate = max_rate; ), - TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n", - __get_str(devname), __entry->group, __entry->tsar_ix, + TP_printk("(%s) node=%p tsar_ix=%u bw_share=%u max_rate=%u\n", + __get_str(devname), __entry->node, __entry->tsar_ix, __entry->bw_share, __entry->max_rate ) ); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 288c797e4a78..45183de424f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -176,20 +176,10 @@ static void esw_destroy_legacy_vepa_table(struct mlx5_eswitch *esw) static int esw_create_legacy_table(struct mlx5_eswitch *esw) { - int err; - memset(&esw->fdb_table.legacy, 0, sizeof(struct legacy_fdb)); atomic64_set(&esw->user_count, 0); - err = esw_create_legacy_vepa_table(esw); - if (err) - return err; - - err = esw_create_legacy_fdb_table(esw); - if (err) - esw_destroy_legacy_vepa_table(esw); - - return err; + return esw_create_legacy_fdb_table(esw); } static void esw_cleanup_vepa_rules(struct mlx5_eswitch *esw) @@ -259,15 +249,22 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, if (!setting) { esw_cleanup_vepa_rules(esw); + esw_destroy_legacy_vepa_table(esw); return 0; } if (esw->fdb_table.legacy.vepa_uplink_rule) return 0; + err = esw_create_legacy_vepa_table(esw); + if (err) + return err; + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return -ENOMEM; + if (!spec) { + err = -ENOMEM; + goto out; + } /* Uplink rule forward uplink traffic to FDB */ misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); @@ -303,8 +300,10 @@ static int _mlx5_eswitch_set_vepa_locked(struct mlx5_eswitch *esw, out: kvfree(spec); - if (err) + if (err) { esw_cleanup_vepa_rules(esw); + esw_destroy_legacy_vepa_table(esw); + } return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index ee6f76a6f0b5..7e7f99b38a37 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -11,12 +11,12 @@ /* Minimum supported BW share value by the HW is 1 Mbit/sec */ #define MLX5_MIN_BW_SHARE 1 -/* Holds rate groups associated with an E-Switch. */ +/* Holds rate nodes associated with an E-Switch. */ struct mlx5_qos_domain { /* Serializes access to all qos changes in the qos domain. */ struct mutex lock; - /* List of all mlx5_esw_rate_groups. */ - struct list_head groups; + /* List of all mlx5_esw_sched_nodes. */ + struct list_head nodes; }; static void esw_qos_lock(struct mlx5_eswitch *esw) @@ -43,7 +43,7 @@ static struct mlx5_qos_domain *esw_qos_domain_alloc(void) return NULL; mutex_init(&qos_domain->lock); - INIT_LIST_HEAD(&qos_domain->groups); + INIT_LIST_HEAD(&qos_domain->nodes); return qos_domain; } @@ -61,134 +61,142 @@ static void esw_qos_domain_release(struct mlx5_eswitch *esw) esw->qos.domain = NULL; } -struct mlx5_esw_rate_group { - u32 tsar_ix; +enum sched_node_type { + SCHED_NODE_TYPE_VPORTS_TSAR, + SCHED_NODE_TYPE_VPORT, +}; + +static const char * const sched_node_type_str[] = { + [SCHED_NODE_TYPE_VPORTS_TSAR] = "vports TSAR", + [SCHED_NODE_TYPE_VPORT] = "vport", +}; + +struct mlx5_esw_sched_node { + u32 ix; /* Bandwidth parameters. */ u32 max_rate; u32 min_rate; - /* A computed value indicating relative min_rate between group members. */ + /* A computed value indicating relative min_rate between node's children. */ u32 bw_share; - /* Membership in the qos domain 'groups' list. */ - struct list_head parent_entry; - /* The eswitch this group belongs to. */ + /* The parent node in the rate hierarchy. */ + struct mlx5_esw_sched_node *parent; + /* Entry in the parent node's children list. */ + struct list_head entry; + /* The type of this node in the rate hierarchy. */ + enum sched_node_type type; + /* The eswitch this node belongs to. */ struct mlx5_eswitch *esw; - /* Vport members of this group.*/ - struct list_head members; + /* The children nodes of this node, empty list for leaf nodes. */ + struct list_head children; + /* Valid only if this node is associated with a vport. */ + struct mlx5_vport *vport; }; -static void esw_qos_vport_set_group(struct mlx5_vport *vport, struct mlx5_esw_rate_group *group) +static void +esw_qos_node_set_parent(struct mlx5_esw_sched_node *node, struct mlx5_esw_sched_node *parent) { - list_del_init(&vport->qos.group_entry); - vport->qos.group = group; - list_add_tail(&vport->qos.group_entry, &group->members); + list_del_init(&node->entry); + node->parent = parent; + list_add_tail(&node->entry, &parent->children); + node->esw = parent->esw; } -static int esw_qos_sched_elem_config(struct mlx5_core_dev *dev, u32 sched_elem_ix, - u32 max_rate, u32 bw_share) +u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport) { - u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; - u32 bitmask = 0; - - if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) - return -EOPNOTSUPP; - - MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); - MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); - bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; - bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + if (!vport->qos.sched_node) + return 0; - return mlx5_modify_scheduling_element_cmd(dev, - SCHEDULING_HIERARCHY_E_SWITCH, - sched_ctx, - sched_elem_ix, - bitmask); + return vport->qos.sched_node->ix; } -static int esw_qos_group_config(struct mlx5_esw_rate_group *group, - u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) +struct mlx5_esw_sched_node * +mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport) { - struct mlx5_core_dev *dev = group->esw->dev; - int err; + if (!vport->qos.sched_node) + return NULL; - err = esw_qos_sched_elem_config(dev, group->tsar_ix, max_rate, bw_share); - if (err) - NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); + return vport->qos.sched_node->parent; +} - trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); +static void esw_qos_sched_elem_config_warn(struct mlx5_esw_sched_node *node, int err) +{ + if (node->vport) { + esw_warn(node->esw->dev, + "E-Switch modify %s scheduling element failed (vport=%d,err=%d)\n", + sched_node_type_str[node->type], node->vport->vport, err); + return; + } - return err; + esw_warn(node->esw->dev, + "E-Switch modify %s scheduling element failed (err=%d)\n", + sched_node_type_str[node->type], err); } -static int esw_qos_vport_config(struct mlx5_vport *vport, - u32 max_rate, u32 bw_share, - struct netlink_ext_ack *extack) +static int esw_qos_sched_elem_config(struct mlx5_esw_sched_node *node, u32 max_rate, u32 bw_share, + struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = vport->qos.group->esw->dev; + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = node->esw->dev; + u32 bitmask = 0; int err; - err = esw_qos_sched_elem_config(dev, vport->qos.esw_sched_elem_ix, max_rate, bw_share); + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + + err = mlx5_modify_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + node->ix, + bitmask); if (err) { - esw_warn(dev, - "E-Switch modify vport scheduling element failed (vport=%d,err=%d)\n", - vport->vport, err); - NL_SET_ERR_MSG_MOD(extack, "E-Switch modify vport scheduling element failed"); + esw_qos_sched_elem_config_warn(node, err); + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify scheduling element failed"); + return err; } - trace_mlx5_esw_vport_qos_config(dev, vport, bw_share, max_rate); + if (node->type == SCHED_NODE_TYPE_VPORTS_TSAR) + trace_mlx5_esw_node_qos_config(dev, node, node->ix, bw_share, max_rate); + else if (node->type == SCHED_NODE_TYPE_VPORT) + trace_mlx5_esw_vport_qos_config(dev, node->vport, bw_share, max_rate); return 0; } -static u32 esw_qos_calculate_group_min_rate_divider(struct mlx5_esw_rate_group *group) +static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, + struct mlx5_esw_sched_node *parent) { - u32 fw_max_bw_share = MLX5_CAP_QOS(group->esw->dev, max_tsar_bw_share); - struct mlx5_vport *vport; + struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes; + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_esw_sched_node *node; u32 max_guarantee = 0; - /* Find max min_rate across all vports in this group. + /* Find max min_rate across all nodes. * This will correspond to fw_max_bw_share in the final bw_share calculation. */ - list_for_each_entry(vport, &group->members, qos.group_entry) { - if (vport->qos.min_rate > max_guarantee) - max_guarantee = vport->qos.min_rate; + list_for_each_entry(node, nodes, entry) { + if (node->esw == esw && node->ix != esw->qos.root_tsar_ix && + node->min_rate > max_guarantee) + max_guarantee = node->min_rate; } if (max_guarantee) return max_t(u32, max_guarantee / fw_max_bw_share, 1); - /* If vports max min_rate divider is 0 but their group has bw_share - * configured, then set bw_share for vports to minimal value. + /* If nodes max min_rate divider is 0 but their parent has bw_share + * configured, then set bw_share for nodes to minimal value. */ - if (group->bw_share) - return 1; - - /* A divider of 0 sets bw_share for all group vports to 0, - * effectively disabling min guarantees. - */ - return 0; -} - -static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw) -{ - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - struct mlx5_esw_rate_group *group; - u32 max_guarantee = 0; - - /* Find max min_rate across all esw groups. - * This will correspond to fw_max_bw_share in the final bw_share calculation. - */ - list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) { - if (group->esw == esw && group->tsar_ix != esw->qos.root_tsar_ix && - group->min_rate > max_guarantee) - max_guarantee = group->min_rate; - } - if (max_guarantee) - return max_t(u32, max_guarantee / fw_max_bw_share, 1); + if (parent && parent->bw_share) + return 1; - /* If no group has min_rate configured, a divider of 0 sets all - * groups' bw_share to 0, effectively disabling min guarantees. + /* If the node nodes has min_rate configured, a divider of 0 sets all + * nodes' bw_share to 0, effectively disabling min guarantees. */ return 0; } @@ -200,58 +208,50 @@ static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) return min_t(u32, max_t(u32, DIV_ROUND_UP(min_rate, divider), MLX5_MIN_BW_SHARE), fw_max); } -static int esw_qos_normalize_group_min_rate(struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) +static int esw_qos_update_sched_node_bw_share(struct mlx5_esw_sched_node *node, + u32 divider, + struct netlink_ext_ack *extack) { - u32 fw_max_bw_share = MLX5_CAP_QOS(group->esw->dev, max_tsar_bw_share); - u32 divider = esw_qos_calculate_group_min_rate_divider(group); - struct mlx5_vport *vport; + u32 fw_max_bw_share = MLX5_CAP_QOS(node->esw->dev, max_tsar_bw_share); u32 bw_share; int err; - list_for_each_entry(vport, &group->members, qos.group_entry) { - bw_share = esw_qos_calc_bw_share(vport->qos.min_rate, divider, fw_max_bw_share); + bw_share = esw_qos_calc_bw_share(node->min_rate, divider, fw_max_bw_share); - if (bw_share == vport->qos.bw_share) - continue; + if (bw_share == node->bw_share) + return 0; - err = esw_qos_vport_config(vport, vport->qos.max_rate, bw_share, extack); - if (err) - return err; + err = esw_qos_sched_elem_config(node, node->max_rate, bw_share, extack); + if (err) + return err; - vport->qos.bw_share = bw_share; - } + node->bw_share = bw_share; - return 0; + return err; } -static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - u32 divider = esw_qos_calculate_min_rate_divider(esw); - struct mlx5_esw_rate_group *group; - u32 bw_share; - int err; + struct list_head *nodes = parent ? &parent->children : &esw->qos.domain->nodes; + u32 divider = esw_qos_calculate_min_rate_divider(esw, parent); + struct mlx5_esw_sched_node *node; - list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) { - if (group->esw != esw || group->tsar_ix == esw->qos.root_tsar_ix) - continue; - bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); + list_for_each_entry(node, nodes, entry) { + int err; - if (bw_share == group->bw_share) + if (node->esw != esw || node->ix == esw->qos.root_tsar_ix) continue; - err = esw_qos_group_config(group, group->max_rate, bw_share, extack); + err = esw_qos_update_sched_node_bw_share(node, divider, extack); if (err) return err; - group->bw_share = bw_share; - - /* All the group's vports need to be set with default bw_share - * to enable them with QOS - */ - err = esw_qos_normalize_group_min_rate(group, extack); + if (list_empty(&node->children)) + continue; + err = esw_qos_normalize_min_rate(node->esw, node, extack); if (err) return err; } @@ -262,25 +262,25 @@ static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e static int esw_qos_set_vport_min_rate(struct mlx5_vport *vport, u32 min_rate, struct netlink_ext_ack *extack) { - struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; u32 fw_max_bw_share, previous_min_rate; bool min_rate_supported; int err; - esw_assert_qos_lock_held(esw); + esw_assert_qos_lock_held(vport_node->esw); fw_max_bw_share = MLX5_CAP_QOS(vport->dev, max_tsar_bw_share); min_rate_supported = MLX5_CAP_QOS(vport->dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE; if (min_rate && !min_rate_supported) return -EOPNOTSUPP; - if (min_rate == vport->qos.min_rate) + if (min_rate == vport_node->min_rate) return 0; - previous_min_rate = vport->qos.min_rate; - vport->qos.min_rate = min_rate; - err = esw_qos_normalize_group_min_rate(vport->qos.group, extack); + previous_min_rate = vport_node->min_rate; + vport_node->min_rate = min_rate; + err = esw_qos_normalize_min_rate(vport_node->parent->esw, vport_node->parent, extack); if (err) - vport->qos.min_rate = previous_min_rate; + vport_node->min_rate = previous_min_rate; return err; } @@ -288,35 +288,34 @@ static int esw_qos_set_vport_min_rate(struct mlx5_vport *vport, static int esw_qos_set_vport_max_rate(struct mlx5_vport *vport, u32 max_rate, struct netlink_ext_ack *extack) { - struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; u32 act_max_rate = max_rate; bool max_rate_supported; int err; - esw_assert_qos_lock_held(esw); + esw_assert_qos_lock_held(vport_node->esw); max_rate_supported = MLX5_CAP_QOS(vport->dev, esw_rate_limit); if (max_rate && !max_rate_supported) return -EOPNOTSUPP; - if (max_rate == vport->qos.max_rate) + if (max_rate == vport_node->max_rate) return 0; - /* Use parent group limit if new max rate is 0. */ + /* Use parent node limit if new max rate is 0. */ if (!max_rate) - act_max_rate = vport->qos.group->max_rate; - - err = esw_qos_vport_config(vport, act_max_rate, vport->qos.bw_share, extack); + act_max_rate = vport_node->parent->max_rate; + err = esw_qos_sched_elem_config(vport_node, act_max_rate, vport_node->bw_share, extack); if (!err) - vport->qos.max_rate = max_rate; + vport_node->max_rate = max_rate; return err; } -static int esw_qos_set_group_min_rate(struct mlx5_esw_rate_group *group, - u32 min_rate, struct netlink_ext_ack *extack) +static int esw_qos_set_node_min_rate(struct mlx5_esw_sched_node *node, + u32 min_rate, struct netlink_ext_ack *extack) { - struct mlx5_eswitch *esw = group->esw; + struct mlx5_eswitch *esw = node->esw; u32 previous_min_rate; int err; @@ -324,45 +323,45 @@ static int esw_qos_set_group_min_rate(struct mlx5_esw_rate_group *group, MLX5_CAP_QOS(esw->dev, max_tsar_bw_share) < MLX5_MIN_BW_SHARE) return -EOPNOTSUPP; - if (min_rate == group->min_rate) + if (min_rate == node->min_rate) return 0; - previous_min_rate = group->min_rate; - group->min_rate = min_rate; - err = esw_qos_normalize_min_rate(esw, extack); + previous_min_rate = node->min_rate; + node->min_rate = min_rate; + err = esw_qos_normalize_min_rate(esw, NULL, extack); if (err) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch node min rate setting failed"); /* Attempt restoring previous configuration */ - group->min_rate = previous_min_rate; - if (esw_qos_normalize_min_rate(esw, extack)) + node->min_rate = previous_min_rate; + if (esw_qos_normalize_min_rate(esw, NULL, extack)) NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); } return err; } -static int esw_qos_set_group_max_rate(struct mlx5_esw_rate_group *group, - u32 max_rate, struct netlink_ext_ack *extack) +static int esw_qos_set_node_max_rate(struct mlx5_esw_sched_node *node, + u32 max_rate, struct netlink_ext_ack *extack) { - struct mlx5_vport *vport; + struct mlx5_esw_sched_node *vport_node; int err; - if (group->max_rate == max_rate) + if (node->max_rate == max_rate) return 0; - err = esw_qos_group_config(group, max_rate, group->bw_share, extack); + err = esw_qos_sched_elem_config(node, max_rate, node->bw_share, extack); if (err) return err; - group->max_rate = max_rate; + node->max_rate = max_rate; - /* Any unlimited vports in the group should be set with the value of the group. */ - list_for_each_entry(vport, &group->members, qos.group_entry) { - if (vport->qos.max_rate) + /* Any unlimited vports in the node should be set with the value of the node. */ + list_for_each_entry(vport_node, &node->children, entry) { + if (vport_node->max_rate) continue; - err = esw_qos_vport_config(vport, max_rate, vport->qos.bw_share, extack); + err = esw_qos_sched_elem_config(vport_node, max_rate, vport_node->bw_share, extack); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch vport implicit rate limit setting failed"); @@ -371,12 +370,39 @@ static int esw_qos_set_group_max_rate(struct mlx5_esw_rate_group *group, return err; } -static int esw_qos_vport_create_sched_element(struct mlx5_vport *vport, - u32 max_rate, u32 bw_share) +static int esw_qos_create_node_sched_elem(struct mlx5_core_dev *dev, u32 parent_element_id, + u32 *tsar_ix) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + void *attr; + + if (!mlx5_qos_element_type_supported(dev, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, + SCHEDULING_HIERARCHY_E_SWITCH) || + !mlx5_qos_tsar_type_supported(dev, + TSAR_ELEMENT_TSAR_TYPE_DWRR, + SCHEDULING_HIERARCHY_E_SWITCH)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, + parent_element_id); + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); + + return mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + tsar_ix); +} + +static int +esw_qos_vport_create_sched_element(struct mlx5_vport *vport, struct mlx5_esw_sched_node *parent, + u32 max_rate, u32 bw_share, u32 *sched_elem_ix) { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; - struct mlx5_esw_rate_group *group = vport->qos.group; - struct mlx5_core_dev *dev = group->esw->dev; + struct mlx5_core_dev *dev = parent->esw->dev; void *attr; int err; @@ -389,14 +415,14 @@ static int esw_qos_vport_create_sched_element(struct mlx5_vport *vport, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); MLX5_SET(vport_element, attr, vport_number, vport->vport); - MLX5_SET(scheduling_context, sched_ctx, parent_element_id, group->tsar_ix); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent->ix); MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); err = mlx5_create_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, sched_ctx, - &vport->qos.esw_sched_elem_ix); + sched_elem_ix); if (err) { esw_warn(dev, "E-Switch create vport scheduling element failed (vport=%d,err=%d)\n", @@ -407,148 +433,151 @@ static int esw_qos_vport_create_sched_element(struct mlx5_vport *vport, return 0; } -static int esw_qos_update_group_scheduling_element(struct mlx5_vport *vport, - struct mlx5_esw_rate_group *curr_group, - struct mlx5_esw_rate_group *new_group, - struct netlink_ext_ack *extack) +static int esw_qos_update_node_scheduling_element(struct mlx5_vport *vport, + struct mlx5_esw_sched_node *curr_node, + struct mlx5_esw_sched_node *new_node, + struct netlink_ext_ack *extack) { + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; u32 max_rate; int err; - err = mlx5_destroy_scheduling_element_cmd(curr_group->esw->dev, + err = mlx5_destroy_scheduling_element_cmd(curr_node->esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - vport->qos.esw_sched_elem_ix); + vport_node->ix); if (err) { NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy vport scheduling element failed"); return err; } - esw_qos_vport_set_group(vport, new_group); - /* Use new group max rate if vport max rate is unlimited. */ - max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; - err = esw_qos_vport_create_sched_element(vport, max_rate, vport->qos.bw_share); + /* Use new node max rate if vport max rate is unlimited. */ + max_rate = vport_node->max_rate ? vport_node->max_rate : new_node->max_rate; + err = esw_qos_vport_create_sched_element(vport, new_node, max_rate, + vport_node->bw_share, + &vport_node->ix); if (err) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); + NL_SET_ERR_MSG_MOD(extack, "E-Switch vport node set failed."); goto err_sched; } + esw_qos_node_set_parent(vport->qos.sched_node, new_node); + return 0; err_sched: - esw_qos_vport_set_group(vport, curr_group); - max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; - if (esw_qos_vport_create_sched_element(vport, max_rate, vport->qos.bw_share)) - esw_warn(curr_group->esw->dev, "E-Switch vport group restore failed (vport=%d)\n", + max_rate = vport_node->max_rate ? vport_node->max_rate : curr_node->max_rate; + if (esw_qos_vport_create_sched_element(vport, curr_node, max_rate, + vport_node->bw_share, + &vport_node->ix)) + esw_warn(curr_node->esw->dev, "E-Switch vport node restore failed (vport=%d)\n", vport->vport); return err; } -static int esw_qos_vport_update_group(struct mlx5_vport *vport, - struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) +static int esw_qos_vport_update_node(struct mlx5_vport *vport, + struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) { + struct mlx5_esw_sched_node *vport_node = vport->qos.sched_node; struct mlx5_eswitch *esw = vport->dev->priv.eswitch; - struct mlx5_esw_rate_group *new_group, *curr_group; + struct mlx5_esw_sched_node *new_node, *curr_node; int err; esw_assert_qos_lock_held(esw); - curr_group = vport->qos.group; - new_group = group ?: esw->qos.group0; - if (curr_group == new_group) + curr_node = vport_node->parent; + new_node = node ?: esw->qos.node0; + if (curr_node == new_node) return 0; - err = esw_qos_update_group_scheduling_element(vport, curr_group, new_group, extack); + err = esw_qos_update_node_scheduling_element(vport, curr_node, new_node, extack); if (err) return err; - /* Recalculate bw share weights of old and new groups */ - if (vport->qos.bw_share || new_group->bw_share) { - esw_qos_normalize_group_min_rate(curr_group, extack); - esw_qos_normalize_group_min_rate(new_group, extack); + /* Recalculate bw share weights of old and new nodes */ + if (vport_node->bw_share || new_node->bw_share) { + esw_qos_normalize_min_rate(curr_node->esw, curr_node, extack); + esw_qos_normalize_min_rate(new_node->esw, new_node, extack); } return 0; } -static struct mlx5_esw_rate_group * -__esw_qos_alloc_rate_group(struct mlx5_eswitch *esw, u32 tsar_ix) +static struct mlx5_esw_sched_node * +__esw_qos_alloc_node(struct mlx5_eswitch *esw, u32 tsar_ix, enum sched_node_type type, + struct mlx5_esw_sched_node *parent) { - struct mlx5_esw_rate_group *group; + struct list_head *parent_children; + struct mlx5_esw_sched_node *node; - group = kzalloc(sizeof(*group), GFP_KERNEL); - if (!group) + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) return NULL; - group->esw = esw; - group->tsar_ix = tsar_ix; - INIT_LIST_HEAD(&group->members); - list_add_tail(&group->parent_entry, &esw->qos.domain->groups); - return group; + node->esw = esw; + node->ix = tsar_ix; + node->type = type; + node->parent = parent; + INIT_LIST_HEAD(&node->children); + parent_children = parent ? &parent->children : &esw->qos.domain->nodes; + list_add_tail(&node->entry, parent_children); + + return node; } -static void __esw_qos_free_rate_group(struct mlx5_esw_rate_group *group) +static void __esw_qos_free_node(struct mlx5_esw_sched_node *node) { - list_del(&group->parent_entry); - kfree(group); + list_del(&node->entry); + kfree(node); } -static struct mlx5_esw_rate_group * -__esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +static struct mlx5_esw_sched_node * +__esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct mlx5_esw_sched_node *parent, + struct netlink_ext_ack *extack) { - u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; - struct mlx5_esw_rate_group *group; - int tsar_ix, err; - void *attr; + struct mlx5_esw_sched_node *node; + u32 tsar_ix; + int err; - MLX5_SET(scheduling_context, tsar_ctx, element_type, - SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); - MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, - esw->qos.root_tsar_ix); - attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); - err = mlx5_create_scheduling_element_cmd(esw->dev, - SCHEDULING_HIERARCHY_E_SWITCH, - tsar_ctx, - &tsar_ix); + err = esw_qos_create_node_sched_elem(esw->dev, esw->qos.root_tsar_ix, &tsar_ix); if (err) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for node failed"); return ERR_PTR(err); } - group = __esw_qos_alloc_rate_group(esw, tsar_ix); - if (!group) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc group failed"); + node = __esw_qos_alloc_node(esw, tsar_ix, SCHED_NODE_TYPE_VPORTS_TSAR, parent); + if (!node) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch alloc node failed"); err = -ENOMEM; - goto err_alloc_group; + goto err_alloc_node; } - err = esw_qos_normalize_min_rate(esw, extack); + err = esw_qos_normalize_min_rate(esw, NULL, extack); if (err) { - NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch nodes normalization failed"); goto err_min_rate; } - trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); + trace_mlx5_esw_node_qos_create(esw->dev, node, node->ix); - return group; + return node; err_min_rate: - __esw_qos_free_rate_group(group); -err_alloc_group: + __esw_qos_free_node(node); +err_alloc_node: if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, tsar_ix)) - NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for node failed"); return ERR_PTR(err); } static int esw_qos_get(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack); static void esw_qos_put(struct mlx5_eswitch *esw); -static struct mlx5_esw_rate_group * -esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +static struct mlx5_esw_sched_node * +esw_qos_create_vports_sched_node(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - struct mlx5_esw_rate_group *group; + struct mlx5_esw_sched_node *node; int err; esw_assert_qos_lock_held(esw); @@ -559,31 +588,30 @@ esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta if (err) return ERR_PTR(err); - group = __esw_qos_create_rate_group(esw, extack); - if (IS_ERR(group)) + node = __esw_qos_create_vports_sched_node(esw, NULL, extack); + if (IS_ERR(node)) esw_qos_put(esw); - return group; + return node; } -static int __esw_qos_destroy_rate_group(struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) +static int __esw_qos_destroy_node(struct mlx5_esw_sched_node *node, struct netlink_ext_ack *extack) { - struct mlx5_eswitch *esw = group->esw; + struct mlx5_eswitch *esw = node->esw; int err; - trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); + trace_mlx5_esw_node_qos_destroy(esw->dev, node, node->ix); err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, - group->tsar_ix); + node->ix); if (err) NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); - __esw_qos_free_rate_group(group); + __esw_qos_free_node(node); - err = esw_qos_normalize_min_rate(esw, extack); + err = esw_qos_normalize_min_rate(esw, NULL, extack); if (err) - NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + NL_SET_ERR_MSG_MOD(extack, "E-Switch nodes normalization failed"); return err; @@ -591,56 +619,40 @@ static int __esw_qos_destroy_rate_group(struct mlx5_esw_rate_group *group, static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { - u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_core_dev *dev = esw->dev; - void *attr; int err; if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return -EOPNOTSUPP; - if (!mlx5_qos_element_type_supported(dev, - SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR, - SCHEDULING_HIERARCHY_E_SWITCH) || - !mlx5_qos_tsar_type_supported(dev, - TSAR_ELEMENT_TSAR_TYPE_DWRR, - SCHEDULING_HIERARCHY_E_SWITCH)) - return -EOPNOTSUPP; - - MLX5_SET(scheduling_context, tsar_ctx, element_type, - SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); - - attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); - MLX5_SET(tsar_element, attr, tsar_type, TSAR_ELEMENT_TSAR_TYPE_DWRR); - - err = mlx5_create_scheduling_element_cmd(dev, - SCHEDULING_HIERARCHY_E_SWITCH, - tsar_ctx, - &esw->qos.root_tsar_ix); + err = esw_qos_create_node_sched_elem(esw->dev, 0, &esw->qos.root_tsar_ix); if (err) { esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); return err; } if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { - esw->qos.group0 = __esw_qos_create_rate_group(esw, extack); + esw->qos.node0 = __esw_qos_create_vports_sched_node(esw, NULL, extack); } else { - /* The eswitch doesn't support scheduling groups. - * Create a software-only group0 using the root TSAR to attach vport QoS to. + /* The eswitch doesn't support scheduling nodes. + * Create a software-only node0 using the root TSAR to attach vport QoS to. */ - if (!__esw_qos_alloc_rate_group(esw, esw->qos.root_tsar_ix)) - esw->qos.group0 = ERR_PTR(-ENOMEM); + if (!__esw_qos_alloc_node(esw, + esw->qos.root_tsar_ix, + SCHED_NODE_TYPE_VPORTS_TSAR, + NULL)) + esw->qos.node0 = ERR_PTR(-ENOMEM); } - if (IS_ERR(esw->qos.group0)) { - err = PTR_ERR(esw->qos.group0); - esw_warn(dev, "E-Switch create rate group 0 failed (%d)\n", err); - goto err_group0; + if (IS_ERR(esw->qos.node0)) { + err = PTR_ERR(esw->qos.node0); + esw_warn(dev, "E-Switch create rate node 0 failed (%d)\n", err); + goto err_node0; } refcount_set(&esw->qos.refcnt, 1); return 0; -err_group0: +err_node0: if (mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, esw->qos.root_tsar_ix)) esw_warn(esw->dev, "E-Switch destroy root TSAR failed.\n"); @@ -652,11 +664,11 @@ static void esw_qos_destroy(struct mlx5_eswitch *esw) { int err; - if (esw->qos.group0->tsar_ix != esw->qos.root_tsar_ix) - __esw_qos_destroy_rate_group(esw->qos.group0, NULL); + if (esw->qos.node0->ix != esw->qos.root_tsar_ix) + __esw_qos_destroy_node(esw->qos.node0, NULL); else - __esw_qos_free_rate_group(esw->qos.group0); - esw->qos.group0 = NULL; + __esw_qos_free_node(esw->qos.node0); + esw->qos.node0 = NULL; err = mlx5_destroy_scheduling_element_cmd(esw->dev, SCHEDULING_HIERARCHY_E_SWITCH, @@ -691,28 +703,39 @@ static int esw_qos_vport_enable(struct mlx5_vport *vport, u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + u32 sched_elem_ix; int err; esw_assert_qos_lock_held(esw); - if (vport->qos.enabled) + if (vport->qos.sched_node) return 0; err = esw_qos_get(esw, extack); if (err) return err; - INIT_LIST_HEAD(&vport->qos.group_entry); - esw_qos_vport_set_group(vport, esw->qos.group0); - - err = esw_qos_vport_create_sched_element(vport, max_rate, bw_share); + err = esw_qos_vport_create_sched_element(vport, esw->qos.node0, max_rate, bw_share, + &sched_elem_ix); if (err) goto err_out; - vport->qos.enabled = true; + vport->qos.sched_node = __esw_qos_alloc_node(esw, sched_elem_ix, SCHED_NODE_TYPE_VPORT, + esw->qos.node0); + if (!vport->qos.sched_node) { + err = -ENOMEM; + goto err_alloc; + } + + vport->qos.sched_node->vport = vport; + trace_mlx5_esw_vport_qos_create(vport->dev, vport, bw_share, max_rate); return 0; +err_alloc: + if (mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, sched_elem_ix)) + esw_warn(esw->dev, "E-Switch destroy vport scheduling element failed.\n"); err_out: esw_qos_put(esw); @@ -722,27 +745,31 @@ err_out: void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + struct mlx5_esw_sched_node *vport_node; struct mlx5_core_dev *dev; int err; lockdep_assert_held(&esw->state_lock); esw_qos_lock(esw); - if (!vport->qos.enabled) + vport_node = vport->qos.sched_node; + if (!vport_node) goto unlock; - WARN(vport->qos.group != esw->qos.group0, - "Disabling QoS on port before detaching it from group"); + WARN(vport_node->parent != esw->qos.node0, + "Disabling QoS on port before detaching it from node"); + + dev = vport_node->esw->dev; + trace_mlx5_esw_vport_qos_destroy(dev, vport); - dev = vport->qos.group->esw->dev; err = mlx5_destroy_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, - vport->qos.esw_sched_elem_ix); + vport_node->ix); if (err) esw_warn(dev, "E-Switch destroy vport scheduling element failed (vport=%d,err=%d)\n", vport->vport, err); + __esw_qos_free_node(vport_node); memset(&vport->qos, 0, sizeof(vport->qos)); - trace_mlx5_esw_vport_qos_destroy(dev, vport); esw_qos_put(esw); unlock: @@ -773,10 +800,10 @@ bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *m bool enabled; esw_qos_lock(esw); - enabled = vport->qos.enabled; + enabled = !!vport->qos.sched_node; if (enabled) { - *max_rate = vport->qos.max_rate; - *min_rate = vport->qos.min_rate; + *max_rate = vport->qos.sched_node->max_rate; + *min_rate = vport->qos.sched_node->min_rate; } esw_qos_unlock(esw); return enabled; @@ -870,18 +897,18 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 } esw_qos_lock(esw); - if (!vport->qos.enabled) { + if (!vport->qos.sched_node) { /* Eswitch QoS wasn't enabled yet. Enable it and vport QoS. */ - err = esw_qos_vport_enable(vport, rate_mbps, vport->qos.bw_share, NULL); + err = esw_qos_vport_enable(vport, rate_mbps, 0, NULL); } else { - struct mlx5_core_dev *dev = vport->qos.group->esw->dev; + struct mlx5_core_dev *dev = vport->qos.sched_node->parent->esw->dev; MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; err = mlx5_modify_scheduling_element_cmd(dev, SCHEDULING_HIERARCHY_E_SWITCH, ctx, - vport->qos.esw_sched_elem_ix, + vport->qos.sched_node->ix, bitmask); } esw_qos_unlock(esw); @@ -990,8 +1017,8 @@ unlock: int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, u64 tx_share, struct netlink_ext_ack *extack) { - struct mlx5_esw_rate_group *group = priv; - struct mlx5_eswitch *esw = group->esw; + struct mlx5_esw_sched_node *node = priv; + struct mlx5_eswitch *esw = node->esw; int err; err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_share", &tx_share, extack); @@ -999,7 +1026,7 @@ int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void return err; esw_qos_lock(esw); - err = esw_qos_set_group_min_rate(group, tx_share, extack); + err = esw_qos_set_node_min_rate(node, tx_share, extack); esw_qos_unlock(esw); return err; } @@ -1007,8 +1034,8 @@ int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, u64 tx_max, struct netlink_ext_ack *extack) { - struct mlx5_esw_rate_group *group = priv; - struct mlx5_eswitch *esw = group->esw; + struct mlx5_esw_sched_node *node = priv; + struct mlx5_eswitch *esw = node->esw; int err; err = esw_qos_devlink_rate_to_mbps(esw->dev, "tx_max", &tx_max, extack); @@ -1016,7 +1043,7 @@ int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void * return err; esw_qos_lock(esw); - err = esw_qos_set_group_max_rate(group, tx_max, extack); + err = esw_qos_set_node_max_rate(node, tx_max, extack); esw_qos_unlock(esw); return err; } @@ -1024,7 +1051,7 @@ int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void * int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, struct netlink_ext_ack *extack) { - struct mlx5_esw_rate_group *group; + struct mlx5_esw_sched_node *node; struct mlx5_eswitch *esw; int err = 0; @@ -1040,13 +1067,13 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, goto unlock; } - group = esw_qos_create_rate_group(esw, extack); - if (IS_ERR(group)) { - err = PTR_ERR(group); + node = esw_qos_create_vports_sched_node(esw, extack); + if (IS_ERR(node)) { + err = PTR_ERR(node); goto unlock; } - *priv = group; + *priv = node; unlock: esw_qos_unlock(esw); return err; @@ -1055,36 +1082,36 @@ unlock: int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, struct netlink_ext_ack *extack) { - struct mlx5_esw_rate_group *group = priv; - struct mlx5_eswitch *esw = group->esw; + struct mlx5_esw_sched_node *node = priv; + struct mlx5_eswitch *esw = node->esw; int err; esw_qos_lock(esw); - err = __esw_qos_destroy_rate_group(group, extack); + err = __esw_qos_destroy_node(node, extack); esw_qos_put(esw); esw_qos_unlock(esw); return err; } -int mlx5_esw_qos_vport_update_group(struct mlx5_vport *vport, - struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack) +int mlx5_esw_qos_vport_update_node(struct mlx5_vport *vport, + struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = vport->dev->priv.eswitch; int err = 0; - if (group && group->esw != esw) { + if (node && node->esw != esw) { NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported"); return -EOPNOTSUPP; } esw_qos_lock(esw); - if (!vport->qos.enabled && !group) + if (!vport->qos.sched_node && !node) goto unlock; err = esw_qos_vport_enable(vport, 0, 0, extack); if (!err) - err = esw_qos_vport_update_group(vport, group, extack); + err = esw_qos_vport_update_node(vport, node, extack); unlock: esw_qos_unlock(esw); return err; @@ -1095,12 +1122,12 @@ int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, void *priv, void *parent_priv, struct netlink_ext_ack *extack) { - struct mlx5_esw_rate_group *group; + struct mlx5_esw_sched_node *node; struct mlx5_vport *vport = priv; if (!parent) - return mlx5_esw_qos_vport_update_group(vport, NULL, extack); + return mlx5_esw_qos_vport_update_node(vport, NULL, extack); - group = parent_priv; - return mlx5_esw_qos_vport_update_group(vport, group, extack); + node = parent_priv; + return mlx5_esw_qos_vport_update_node(vport, node, extack); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h index b4045efbaf9e..61a6fdd5c267 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -13,6 +13,9 @@ int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *evport, u32 max_rate, u32 min bool mlx5_esw_qos_get_vport_rate(struct mlx5_vport *vport, u32 *max_rate, u32 *min_rate); void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport); +u32 mlx5_esw_qos_vport_get_sched_elem_ix(const struct mlx5_vport *vport); +struct mlx5_esw_sched_node *mlx5_esw_qos_vport_get_parent(const struct mlx5_vport *vport); + int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, u64 tx_share, struct netlink_ext_ack *extack); int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 2bcd42305f46..09719e9b8611 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1061,6 +1061,7 @@ static void mlx5_eswitch_clear_vf_vports_info(struct mlx5_eswitch *esw) unsigned long i; mlx5_esw_for_each_vf_vport(esw, i, vport, esw->esw_funcs.num_vfs) { + kfree(vport->qos.sched_node); memset(&vport->qos, 0, sizeof(vport->qos)); memset(&vport->info, 0, sizeof(vport->info)); vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; @@ -1073,6 +1074,7 @@ static void mlx5_eswitch_clear_ec_vf_vports_info(struct mlx5_eswitch *esw) unsigned long i; mlx5_esw_for_each_ec_vf_vport(esw, i, vport, esw->esw_funcs.num_ec_vfs) { + kfree(vport->qos.sched_node); memset(&vport->qos, 0, sizeof(vport->qos)); memset(&vport->info, 0, sizeof(vport->info)); vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 3b901bd36d4b..14dd42d44e6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -214,15 +214,8 @@ struct mlx5_vport { /* Protected with the E-Switch qos domain lock. */ struct { - /* Initially false, set to true whenever any QoS features are used. */ - bool enabled; - u32 esw_sched_elem_ix; - u32 min_rate; - u32 max_rate; - /* A computed value indicating relative min_rate between vports in a group. */ - u32 bw_share; - struct mlx5_esw_rate_group *group; - struct list_head group_entry; + /* Vport scheduling element node. */ + struct mlx5_esw_sched_node *sched_node; } qos; u16 vport; @@ -370,11 +363,11 @@ struct mlx5_eswitch { refcount_t refcnt; u32 root_tsar_ix; struct mlx5_qos_domain *domain; - /* Contains all vports with QoS enabled but no explicit group. - * Cannot be NULL if QoS is enabled, but may be a fake group - * referencing the root TSAR if the esw doesn't support groups. + /* Contains all vports with QoS enabled but no explicit node. + * Cannot be NULL if QoS is enabled, but may be a fake node + * referencing the root TSAR if the esw doesn't support nodes. */ - struct mlx5_esw_rate_group *group0; + struct mlx5_esw_sched_node *node0; } qos; struct mlx5_esw_bridge_offloads *br_offloads; @@ -434,9 +427,9 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, u16 vport_num, bool setting); int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, u32 max_rate, u32 min_rate); -int mlx5_esw_qos_vport_update_group(struct mlx5_vport *vport, - struct mlx5_esw_rate_group *group, - struct netlink_ext_ack *extack); +int mlx5_esw_qos_vport_update_node(struct mlx5_vport *vport, + struct mlx5_esw_sched_node *node, + struct netlink_ext_ack *extack); int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 964937f17cf5..b30976627c6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -63,7 +63,7 @@ struct mlx5_modify_hdr { enum mlx5_flow_namespace_type ns_type; enum mlx5_flow_resource_owner owner; union { - struct mlx5_fs_dr_action action; + struct mlx5_fs_dr_action fs_dr_action; u32 id; }; }; @@ -73,7 +73,7 @@ struct mlx5_pkt_reformat { int reformat_type; /* from mlx5_ifc */ enum mlx5_flow_resource_owner owner; union { - struct mlx5_fs_dr_action action; + struct mlx5_fs_dr_action fs_dr_action; u32 id; }; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 4f55e55ecb55..566710d34a7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -35,6 +35,7 @@ struct mlx5_fw_reset { enum { MLX5_FW_RST_STATE_IDLE = 0, MLX5_FW_RST_STATE_TOGGLE_REQ = 4, + MLX5_FW_RST_STATE_DROP_MODE = 5, }; enum { @@ -616,6 +617,7 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work) struct mlx5_fw_reset *fw_reset; struct mlx5_core_dev *dev; unsigned long timeout; + int poll_freq = 20; bool reset_action; u8 rst_state; int err; @@ -651,7 +653,12 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work) reset_action = true; break; } - msleep(20); + if (rst_state == MLX5_FW_RST_STATE_DROP_MODE) { + mlx5_core_info(dev, "Sync Reset Drop mode ack\n"); + mlx5_set_fw_rst_ack(dev); + poll_freq = 1000; + } + msleep(poll_freq); } while (!time_after(jiffies, timeout)); if (!reset_action) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 833cb68c744f..4b349d4005e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -256,6 +256,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, { struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain; struct mlx5dr_action_dest *term_actions; + struct mlx5_pkt_reformat *pkt_reformat; struct mlx5dr_match_parameters params; struct mlx5_core_dev *dev = ns->dev; struct mlx5dr_action **fs_dr_actions; @@ -332,18 +333,19 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { bool is_decap; - if (fte->act_dests.action.pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) { + pkt_reformat = fte->act_dests.action.pkt_reformat; + if (pkt_reformat->owner == MLX5_FLOW_RESOURCE_OWNER_FW) { err = -EINVAL; mlx5dr_err(domain, "FW-owned reformat can't be used in SW rule\n"); goto free_actions; } - is_decap = fte->act_dests.action.pkt_reformat->reformat_type == + is_decap = pkt_reformat->reformat_type == MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; if (is_decap) actions[num_actions++] = - fte->act_dests.action.pkt_reformat->action.dr_action; + pkt_reformat->fs_dr_action.dr_action; else delay_encap_set = true; } @@ -370,9 +372,11 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, actions[num_actions++] = tmp_action; } - if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - actions[num_actions++] = - fte->act_dests.action.modify_hdr->action.dr_action; + if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + struct mlx5_modify_hdr *modify_hdr = fte->act_dests.action.modify_hdr; + + actions[num_actions++] = modify_hdr->fs_dr_action.dr_action; + } if (fte->act_dests.action.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { tmp_action = create_action_push_vlan(domain, &fte->act_dests.action.vlan[0]); @@ -395,8 +399,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, } if (delay_encap_set) - actions[num_actions++] = - fte->act_dests.action.pkt_reformat->action.dr_action; + actions[num_actions++] = pkt_reformat->fs_dr_action.dr_action; /* The order of the actions below is not important */ @@ -458,9 +461,11 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, term_actions[num_term_actions].dest = tmp_action; if (dst->dest_attr.vport.flags & - MLX5_FLOW_DEST_VPORT_REFORMAT_ID) + MLX5_FLOW_DEST_VPORT_REFORMAT_ID) { + pkt_reformat = dst->dest_attr.vport.pkt_reformat; term_actions[num_term_actions].reformat = - dst->dest_attr.vport.pkt_reformat->action.dr_action; + pkt_reformat->fs_dr_action.dr_action; + } num_term_actions++; break; @@ -671,7 +676,7 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns } pkt_reformat->owner = MLX5_FLOW_RESOURCE_OWNER_SW; - pkt_reformat->action.dr_action = action; + pkt_reformat->fs_dr_action.dr_action = action; return 0; } @@ -679,7 +684,7 @@ static int mlx5_cmd_dr_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns static void mlx5_cmd_dr_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns, struct mlx5_pkt_reformat *pkt_reformat) { - mlx5dr_action_destroy(pkt_reformat->action.dr_action); + mlx5dr_action_destroy(pkt_reformat->fs_dr_action.dr_action); } static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, @@ -702,7 +707,7 @@ static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, } modify_hdr->owner = MLX5_FLOW_RESOURCE_OWNER_SW; - modify_hdr->action.dr_action = action; + modify_hdr->fs_dr_action.dr_action = action; return 0; } @@ -710,7 +715,7 @@ static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns, static void mlx5_cmd_dr_modify_header_dealloc(struct mlx5_flow_root_namespace *ns, struct mlx5_modify_hdr *modify_hdr) { - mlx5dr_action_destroy(modify_hdr->action.dr_action); + mlx5dr_action_destroy(modify_hdr->fs_dr_action.dr_action); } static int @@ -836,7 +841,7 @@ int mlx5_fs_dr_action_get_pkt_reformat_id(struct mlx5_pkt_reformat *pkt_reformat case MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL: case MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL: case MLX5_REFORMAT_TYPE_INSERT_HDR: - return mlx5dr_action_get_pkt_reformat_id(pkt_reformat->action.dr_action); + return mlx5dr_action_get_pkt_reformat_id(pkt_reformat->fs_dr_action.dr_action); } return -EOPNOTSUPP; } |