summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst44
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/Makefile13
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c164
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h35
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c (renamed from drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c)474
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c154
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c57
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c26
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h123
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c869
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h41
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c310
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h21
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c11
-rw-r--r--include/linux/mlx5/mlx5_ifc.h3
24 files changed, 1815 insertions, 703 deletions
diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
index ef8cb62e82a1..4b59cf2c599f 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst
@@ -656,3 +656,47 @@ Bridge offloads tracepoints:
$ cat /sys/kernel/debug/tracing/trace
...
ip-5387 [000] ...1 573713: mlx5_esw_bridge_vport_cleanup: vport_num=1
+
+Eswitch QoS tracepoints:
+
+- mlx5_esw_vport_qos_create: trace creation of transmit scheduler arbiter for vport::
+
+ $ echo mlx5:mlx5_esw_vport_qos_create >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-23496 [018] .... 73136.838831: mlx5_esw_vport_qos_create: (0000:82:00.0) vport=2 tsar_ix=4 bw_share=0, max_rate=0 group=000000007b576bb3
+
+- mlx5_esw_vport_qos_config: trace configuration of transmit scheduler arbiter for vport::
+
+ $ echo mlx5:mlx5_esw_vport_qos_config >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-26548 [023] .... 75754.223823: mlx5_esw_vport_qos_config: (0000:82:00.0) vport=1 tsar_ix=3 bw_share=34, max_rate=10000 group=000000007b576bb3
+
+- mlx5_esw_vport_qos_destroy: trace deletion of transmit scheduler arbiter for vport::
+
+ $ echo mlx5:mlx5_esw_vport_qos_destroy >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-27418 [004] .... 76546.680901: mlx5_esw_vport_qos_destroy: (0000:82:00.0) vport=1 tsar_ix=3
+
+- mlx5_esw_group_qos_create: trace creation of transmit scheduler arbiter for rate group::
+
+ $ echo mlx5:mlx5_esw_group_qos_create >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-26578 [008] .... 75776.022112: mlx5_esw_group_qos_create: (0000:82:00.0) group=000000008dac63ea tsar_ix=5
+
+- mlx5_esw_group_qos_config: trace configuration of transmit scheduler arbiter for rate group::
+
+ $ echo mlx5:mlx5_esw_group_qos_config >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-27303 [020] .... 76461.455356: mlx5_esw_group_qos_config: (0000:82:00.0) group=000000008dac63ea tsar_ix=5 bw_share=100 max_rate=20000
+
+- mlx5_esw_group_qos_destroy: trace deletion of transmit scheduler arbiter for group::
+
+ $ echo mlx5:mlx5_esw_group_qos_destroy >> /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace
+ ...
+ <...>-27418 [006] .... 76547.187258: mlx5_esw_group_qos_destroy: (0000:82:00.0) group=000000007b576bb3 tsar_ix=1
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 4fccc9bc0328..63032cd6efb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -44,19 +44,22 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \
lib/fs_chains.o en/tc_tun.o \
esw/indir_table.o en/tc_tun_encap.o \
en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
- en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
+ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \
+ en/tc/post_act.o
mlx5_core-$(CONFIG_MLX5_TC_CT) += en/tc_ct.o
+mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o
#
# Core extra
#
mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
- ecpf.o rdma.o esw/legacy.o
+ ecpf.o rdma.o esw/legacy.o \
+ esw/devlink_port.o esw/vporttbl.o esw/qos.o
+
mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \
esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
- esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o \
- esw/devlink_port.o esw/vporttbl.o
-mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += esw/sample.o
+ esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
+
mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o en/rep/bridge.o
mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 6f4d7c7f06e0..e84287ffc7ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -7,6 +7,7 @@
#include "fw_reset.h"
#include "fs_core.h"
#include "eswitch.h"
+#include "esw/qos.h"
#include "sf/dev/dev.h"
#include "sf/sf.h"
@@ -292,6 +293,13 @@ static const struct devlink_ops mlx5_devlink_ops = {
.eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get,
.port_function_hw_addr_get = mlx5_devlink_port_function_hw_addr_get,
.port_function_hw_addr_set = mlx5_devlink_port_function_hw_addr_set,
+ .rate_leaf_tx_share_set = mlx5_esw_devlink_rate_leaf_tx_share_set,
+ .rate_leaf_tx_max_set = mlx5_esw_devlink_rate_leaf_tx_max_set,
+ .rate_node_tx_share_set = mlx5_esw_devlink_rate_node_tx_share_set,
+ .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set,
+ .rate_node_new = mlx5_esw_devlink_rate_node_new,
+ .rate_node_del = mlx5_esw_devlink_rate_node_del,
+ .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
#endif
#ifdef CONFIG_MLX5_SF_MANAGER
.port_new = mlx5_devlink_sf_port_new,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index e348c276eaa1..41684a6c44e9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -7,6 +7,8 @@
#include "mod_hdr.h"
#include "lib/fs_ttc.h"
+struct mlx5e_post_act;
+
enum {
MLX5E_TC_FT_LEVEL = 0,
MLX5E_TC_TTC_FT_LEVEL,
@@ -19,6 +21,7 @@ struct mlx5e_tc_table {
struct mutex t_lock;
struct mlx5_flow_table *t;
struct mlx5_fs_chains *chains;
+ struct mlx5e_post_act *post_act;
struct rhashtable ht;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index 059799e4f483..51a4d80f7fa3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -17,7 +17,7 @@
#include "en/mapping.h"
#include "en/tc_tun.h"
#include "lib/port_tun.h"
-#include "esw/sample.h"
+#include "en/tc/sample.h"
struct mlx5e_rep_indr_block_priv {
struct net_device *netdev;
@@ -516,7 +516,6 @@ void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
mlx5e_rep_indr_block_unbind);
}
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
struct mlx5e_tc_update_priv *tc_priv,
u32 tunnel_id)
@@ -609,12 +608,13 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
return true;
}
-static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
- struct mlx5e_tc_update_priv *tc_priv)
+static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1,
+ struct mlx5e_tc_update_priv *tc_priv)
{
struct mlx5e_priv *priv = netdev_priv(skb->dev);
u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
if (chain) {
struct mlx5_rep_uplink_priv *uplink_priv;
struct mlx5e_rep_priv *uplink_rpriv;
@@ -636,9 +636,25 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1,
zone_restore_id))
return false;
}
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
}
-#endif /* CONFIG_NET_TC_SKB_EXT */
+
+static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
+ struct mlx5_mapped_obj *mapped_obj,
+ struct mlx5e_tc_update_priv *tc_priv)
+{
+ if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
+ netdev_dbg(priv->netdev,
+ "Failed to restore tunnel info for sampled packet\n");
+ return;
+ }
+#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
+ mlx5e_tc_sample_skb(skb, mapped_obj);
+#endif /* CONFIG_MLX5_TC_SAMPLE */
+ mlx5_rep_tc_post_napi_receive(tc_priv);
+}
bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
struct sk_buff *skb,
@@ -647,7 +663,7 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
struct mlx5_mapped_obj mapped_obj;
struct mlx5_eswitch *esw;
struct mlx5e_priv *priv;
- u32 reg_c0, reg_c1;
+ u32 reg_c0;
int err;
reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
@@ -659,8 +675,6 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
*/
skb->mark = 0;
- reg_c1 = be32_to_cpu(cqe->ft_metadata);
-
priv = netdev_priv(skb->dev);
esw = priv->mdev->priv.eswitch;
err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj);
@@ -671,18 +685,14 @@ bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
return false;
}
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN)
- return mlx5e_restore_skb(skb, mapped_obj.chain, reg_c1, tc_priv);
-#endif /* CONFIG_NET_TC_SKB_EXT */
-#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
- if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
- mlx5_esw_sample_skb(skb, &mapped_obj);
+ if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
+ u32 reg_c1 = be32_to_cpu(cqe->ft_metadata);
+
+ return mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, tc_priv);
+ } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) {
+ mlx5e_restore_skb_sample(priv, skb, &mapped_obj, tc_priv);
return false;
- }
-#endif /* CONFIG_MLX5_TC_SAMPLE */
- if (mapped_obj.type != MLX5_MAPPED_OBJ_SAMPLE &&
- mapped_obj.type != MLX5_MAPPED_OBJ_CHAIN) {
+ } else {
netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
return false;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
new file mode 100644
index 000000000000..a3e43e898a56
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+// Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+
+#include "en_tc.h"
+#include "post_act.h"
+#include "mlx5_core.h"
+
+struct mlx5e_post_act {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_fs_chains *chains;
+ struct mlx5_flow_table *ft;
+ struct mlx5e_priv *priv;
+ struct xarray ids;
+};
+
+struct mlx5e_post_act_handle {
+ enum mlx5_flow_namespace_type ns_type;
+ struct mlx5_flow_attr *attr;
+ struct mlx5_flow_handle *rule;
+ u32 id;
+};
+
+#define MLX5_POST_ACTION_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
+#define MLX5_POST_ACTION_MAX GENMASK(MLX5_POST_ACTION_BITS - 1, 0)
+#define MLX5_POST_ACTION_MASK MLX5_POST_ACTION_MAX
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type)
+{
+ struct mlx5e_post_act *post_act;
+ int err;
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ignore_flow_level)) {
+ mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+ err = -EOPNOTSUPP;
+ goto err_check;
+ } else if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
+ mlx5_core_warn(priv->mdev, "firmware level support is missing\n");
+ err = -EOPNOTSUPP;
+ goto err_check;
+ }
+
+ post_act = kzalloc(sizeof(*post_act), GFP_KERNEL);
+ if (!post_act) {
+ err = -ENOMEM;
+ goto err_check;
+ }
+ post_act->ft = mlx5_chains_create_global_table(chains);
+ if (IS_ERR(post_act->ft)) {
+ err = PTR_ERR(post_act->ft);
+ mlx5_core_warn(priv->mdev, "failed to create post action table, err: %d\n", err);
+ goto err_ft;
+ }
+ post_act->chains = chains;
+ post_act->ns_type = ns_type;
+ post_act->priv = priv;
+ xa_init_flags(&post_act->ids, XA_FLAGS_ALLOC1);
+ return post_act;
+
+err_ft:
+ kfree(post_act);
+err_check:
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act)
+{
+ if (IS_ERR_OR_NULL(post_act))
+ return;
+
+ xa_destroy(&post_act->ids);
+ mlx5_chains_destroy_global_table(post_act->chains, post_act->ft);
+ kfree(post_act);
+}
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr)
+{
+ u32 attr_sz = ns_to_attr_sz(post_act->ns_type);
+ struct mlx5e_post_act_handle *handle = NULL;
+ struct mlx5_flow_attr *post_attr = NULL;
+ struct mlx5_flow_spec *spec = NULL;
+ int err;
+
+ handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ post_attr = mlx5_alloc_flow_attr(post_act->ns_type);
+ if (!handle || !spec || !post_attr) {
+ kfree(post_attr);
+ kvfree(spec);
+ kfree(handle);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ memcpy(post_attr, attr, attr_sz);
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = post_act->ft;
+ post_attr->inner_match_level = MLX5_MATCH_NONE;
+ post_attr->outer_match_level = MLX5_MATCH_NONE;
+ post_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
+
+ handle->ns_type = post_act->ns_type;
+ /* Splits were handled before post action */
+ if (handle->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ post_attr->esw_attr->split_count = 0;
+
+ err = xa_alloc(&post_act->ids, &handle->id, post_attr,
+ XA_LIMIT(1, MLX5_POST_ACTION_MAX), GFP_KERNEL);
+ if (err)
+ goto err_xarray;
+
+ /* Post action rule matches on fte_id and executes original rule's
+ * tc rule action
+ */
+ mlx5e_tc_match_to_reg_match(spec, FTEID_TO_REG,
+ handle->id, MLX5_POST_ACTION_MASK);
+
+ handle->rule = mlx5_tc_rule_insert(post_act->priv, spec, post_attr);
+ if (IS_ERR(handle->rule)) {
+ err = PTR_ERR(handle->rule);
+ netdev_warn(post_act->priv->netdev, "Failed to add post action rule");
+ goto err_rule;
+ }
+ handle->attr = post_attr;
+
+ kvfree(spec);
+ return handle;
+
+err_rule:
+ xa_erase(&post_act->ids, handle->id);
+err_xarray:
+ kfree(post_attr);
+ kvfree(spec);
+ kfree(handle);
+ return ERR_PTR(err);
+}
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle)
+{
+ mlx5_tc_rule_delete(post_act->priv, handle->rule, handle->attr);
+ xa_erase(&post_act->ids, handle->id);
+ kfree(handle->attr);
+ kfree(handle);
+}
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act)
+{
+ return post_act->ft;
+}
+
+/* Allocate a header modify action to write the post action handle fte id to a register. */
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts)
+{
+ return mlx5e_tc_match_to_reg_set(dev, acts, handle->ns_type, FTEID_TO_REG, handle->id);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
new file mode 100644
index 000000000000..b530ec1981a5
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_POST_ACTION_H__
+#define __MLX5_POST_ACTION_H__
+
+#include "en.h"
+#include "lib/fs_chains.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_priv;
+struct mlx5e_tc_mod_hdr_acts;
+
+struct mlx5e_post_act *
+mlx5e_tc_post_act_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
+ enum mlx5_flow_namespace_type ns_type);
+
+void
+mlx5e_tc_post_act_destroy(struct mlx5e_post_act *post_act);
+
+struct mlx5e_post_act_handle *
+mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *attr);
+
+void
+mlx5e_tc_post_act_del(struct mlx5e_post_act *post_act, struct mlx5e_post_act_handle *handle);
+
+struct mlx5_flow_table *
+mlx5e_tc_post_act_get_ft(struct mlx5e_post_act *post_act);
+
+int
+mlx5e_tc_post_act_set_handle(struct mlx5_core_dev *dev,
+ struct mlx5e_post_act_handle *handle,
+ struct mlx5e_tc_mod_hdr_acts *acts);
+
+#endif /* __MLX5_POST_ACTION_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
index d3ad78aa9d45..6552ecee3f9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c
@@ -4,7 +4,8 @@
#include <linux/skbuff.h>
#include <net/psample.h>
#include "en/mapping.h"
-#include "esw/sample.h"
+#include "en/tc/post_act.h"
+#include "sample.h"
#include "eswitch.h"
#include "en_tc.h"
#include "fs_core.h"
@@ -17,17 +18,18 @@ static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP,
};
-struct mlx5_esw_psample {
- struct mlx5e_priv *priv;
+struct mlx5e_tc_psample {
+ struct mlx5_eswitch *esw;
struct mlx5_flow_table *termtbl;
struct mlx5_flow_handle *termtbl_rule;
DECLARE_HASHTABLE(hashtbl, 8);
struct mutex ht_lock; /* protect hashtbl */
DECLARE_HASHTABLE(restore_hashtbl, 8);
struct mutex restore_lock; /* protect restore_hashtbl */
+ struct mlx5e_post_act *post_act;
};
-struct mlx5_sampler {
+struct mlx5e_sampler {
struct hlist_node hlist;
u32 sampler_id;
u32 sample_ratio;
@@ -36,29 +38,32 @@ struct mlx5_sampler {
int count;
};
-struct mlx5_sample_flow {
- struct mlx5_sampler *sampler;
- struct mlx5_sample_restore *restore;
+struct mlx5e_sample_flow {
+ struct mlx5e_sampler *sampler;
+ struct mlx5e_sample_restore *restore;
struct mlx5_flow_attr *pre_attr;
struct mlx5_flow_handle *pre_rule;
- struct mlx5_flow_handle *rule;
+ struct mlx5_flow_attr *post_attr;
+ struct mlx5_flow_handle *post_rule;
+ struct mlx5e_post_act_handle *post_act_handle;
};
-struct mlx5_sample_restore {
+struct mlx5e_sample_restore {
struct hlist_node hlist;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_flow_handle *rule;
+ struct mlx5e_post_act_handle *post_act_handle;
u32 obj_id;
int count;
};
static int
-sampler_termtbl_create(struct mlx5_esw_psample *esw_psample)
+sampler_termtbl_create(struct mlx5e_tc_psample *tc_psample)
{
- struct mlx5_core_dev *dev = esw_psample->priv->mdev;
- struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_eswitch *esw = tc_psample->esw;
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_destination dest = {};
+ struct mlx5_core_dev *dev = esw->dev;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_act act = {};
int err;
@@ -79,20 +84,20 @@ sampler_termtbl_create(struct mlx5_esw_psample *esw_psample)
ft_attr.prio = FDB_SLOW_PATH;
ft_attr.max_fte = 1;
ft_attr.level = 1;
- esw_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
- if (IS_ERR(esw_psample->termtbl)) {
- err = PTR_ERR(esw_psample->termtbl);
+ tc_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
+ if (IS_ERR(tc_psample->termtbl)) {
+ err = PTR_ERR(tc_psample->termtbl);
mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err);
return err;
}
act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
dest.vport.num = esw->manager_vport;
- esw_psample->termtbl_rule = mlx5_add_flow_rules(esw_psample->termtbl, NULL, &act, &dest, 1);
- if (IS_ERR(esw_psample->termtbl_rule)) {
- err = PTR_ERR(esw_psample->termtbl_rule);
+ tc_psample->termtbl_rule = mlx5_add_flow_rules(tc_psample->termtbl, NULL, &act, &dest, 1);
+ if (IS_ERR(tc_psample->termtbl_rule)) {
+ err = PTR_ERR(tc_psample->termtbl_rule);
mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err);
- mlx5_destroy_flow_table(esw_psample->termtbl);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
return err;
}
@@ -100,14 +105,14 @@ sampler_termtbl_create(struct mlx5_esw_psample *esw_psample)
}
static void
-sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample)
+sampler_termtbl_destroy(struct mlx5e_tc_psample *tc_psample)
{
- mlx5_del_flow_rules(esw_psample->termtbl_rule);
- mlx5_destroy_flow_table(esw_psample->termtbl);
+ mlx5_del_flow_rules(tc_psample->termtbl_rule);
+ mlx5_destroy_flow_table(tc_psample->termtbl);
}
static int
-sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5_sampler *sampler)
+sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5e_sampler *sampler)
{
u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {};
u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
@@ -163,16 +168,16 @@ sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 def
return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2;
}
-static struct mlx5_sampler *
-sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_table_id)
+static struct mlx5e_sampler *
+sampler_get(struct mlx5e_tc_psample *tc_psample, u32 sample_ratio, u32 default_table_id)
{
- struct mlx5_sampler *sampler;
+ struct mlx5e_sampler *sampler;
u32 hash_key;
int err;
- mutex_lock(&esw_psample->ht_lock);
+ mutex_lock(&tc_psample->ht_lock);
hash_key = sampler_hash(sample_ratio, default_table_id);
- hash_for_each_possible(esw_psample->hashtbl, sampler, hlist, hash_key)
+ hash_for_each_possible(tc_psample->hashtbl, sampler, hlist, hash_key)
if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id,
sample_ratio, default_table_id))
goto add_ref;
@@ -183,42 +188,49 @@ sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_
goto err_alloc;
}
- sampler->sample_table_id = esw_psample->termtbl->id;
+ sampler->sample_table_id = tc_psample->termtbl->id;
sampler->default_table_id = default_table_id;
sampler->sample_ratio = sample_ratio;
- err = sampler_obj_create(esw_psample->priv->mdev, sampler);
+ err = sampler_obj_create(tc_psample->esw->dev, sampler);
if (err)
goto err_create;
- hash_add(esw_psample->hashtbl, &sampler->hlist, hash_key);
+ hash_add(tc_psample->hashtbl, &sampler->hlist, hash_key);
add_ref:
sampler->count++;
- mutex_unlock(&esw_psample->ht_lock);
+ mutex_unlock(&tc_psample->ht_lock);
return sampler;
err_create:
kfree(sampler);
err_alloc:
- mutex_unlock(&esw_psample->ht_lock);
+ mutex_unlock(&tc_psample->ht_lock);
return ERR_PTR(err);
}
static void
-sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler)
+sampler_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sampler *sampler)
{
- mutex_lock(&esw_psample->ht_lock);
+ mutex_lock(&tc_psample->ht_lock);
if (--sampler->count == 0) {
hash_del(&sampler->hlist);
- sampler_obj_destroy(esw_psample->priv->mdev, sampler->sampler_id);
+ sampler_obj_destroy(tc_psample->esw->dev, sampler->sampler_id);
kfree(sampler);
}
- mutex_unlock(&esw_psample->ht_lock);
+ mutex_unlock(&tc_psample->ht_lock);
}
+/* obj_id is used to restore the sample parameters.
+ * Set fte_id in original flow table, then match it in the default table.
+ * Only set it for NICs can preserve reg_c or decap action. For other cases,
+ * use the same match in the default table.
+ * Use one header rewrite for both obj_id and fte_id.
+ */
static struct mlx5_modify_hdr *
-sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id)
+sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id,
+ struct mlx5e_post_act_handle *handle)
{
struct mlx5e_tc_mod_hdr_acts mod_acts = {};
struct mlx5_modify_hdr *modify_hdr;
@@ -229,6 +241,12 @@ sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id)
if (err)
goto err_set_regc0;
+ if (handle) {
+ err = mlx5e_tc_post_act_set_handle(mdev, handle, &mod_acts);
+ if (err)
+ goto err_post_act;
+ }
+
modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
mod_acts.num_actions,
mod_acts.actions);
@@ -241,23 +259,40 @@ sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id)
return modify_hdr;
err_modify_hdr:
+err_post_act:
dealloc_mod_hdr_actions(&mod_acts);
err_set_regc0:
return ERR_PTR(err);
}
-static struct mlx5_sample_restore *
-sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id)
+static u32
+restore_hash(u32 obj_id, struct mlx5e_post_act_handle *post_act_handle)
{
- struct mlx5_core_dev *mdev = esw_psample->priv->mdev;
- struct mlx5_eswitch *esw = mdev->priv.eswitch;
- struct mlx5_sample_restore *restore;
+ return jhash_2words(obj_id, hash32_ptr(post_act_handle), 0);
+}
+
+static bool
+restore_equal(struct mlx5e_sample_restore *restore, u32 obj_id,
+ struct mlx5e_post_act_handle *post_act_handle)
+{
+ return restore->obj_id == obj_id && restore->post_act_handle == post_act_handle;
+}
+
+static struct mlx5e_sample_restore *
+sample_restore_get(struct mlx5e_tc_psample *tc_psample, u32 obj_id,
+ struct mlx5e_post_act_handle *post_act_handle)
+{
+ struct mlx5_eswitch *esw = tc_psample->esw;
+ struct mlx5_core_dev *mdev = esw->dev;
+ struct mlx5e_sample_restore *restore;
struct mlx5_modify_hdr *modify_hdr;
+ u32 hash_key;
int err;
- mutex_lock(&esw_psample->restore_lock);
- hash_for_each_possible(esw_psample->restore_hashtbl, restore, hlist, obj_id)
- if (restore->obj_id == obj_id)
+ mutex_lock(&tc_psample->restore_lock);
+ hash_key = restore_hash(obj_id, post_act_handle);
+ hash_for_each_possible(tc_psample->restore_hashtbl, restore, hlist, hash_key)
+ if (restore_equal(restore, obj_id, post_act_handle))
goto add_ref;
restore = kzalloc(sizeof(*restore), GFP_KERNEL);
@@ -266,8 +301,9 @@ sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id)
goto err_alloc;
}
restore->obj_id = obj_id;
+ restore->post_act_handle = post_act_handle;
- modify_hdr = sample_metadata_rule_get(mdev, obj_id);
+ modify_hdr = sample_modify_hdr_get(mdev, obj_id, post_act_handle);
if (IS_ERR(modify_hdr)) {
err = PTR_ERR(modify_hdr);
goto err_modify_hdr;
@@ -280,10 +316,10 @@ sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id)
goto err_restore;
}
- hash_add(esw_psample->restore_hashtbl, &restore->hlist, obj_id);
+ hash_add(tc_psample->restore_hashtbl, &restore->hlist, hash_key);
add_ref:
restore->count++;
- mutex_unlock(&esw_psample->restore_lock);
+ mutex_unlock(&tc_psample->restore_lock);
return restore;
err_restore:
@@ -291,26 +327,26 @@ err_restore:
err_modify_hdr:
kfree(restore);
err_alloc:
- mutex_unlock(&esw_psample->restore_lock);
+ mutex_unlock(&tc_psample->restore_lock);
return ERR_PTR(err);
}
static void
-sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_restore *restore)
+sample_restore_put(struct mlx5e_tc_psample *tc_psample, struct mlx5e_sample_restore *restore)
{
- mutex_lock(&esw_psample->restore_lock);
+ mutex_lock(&tc_psample->restore_lock);
if (--restore->count == 0)
hash_del(&restore->hlist);
- mutex_unlock(&esw_psample->restore_lock);
+ mutex_unlock(&tc_psample->restore_lock);
if (!restore->count) {
mlx5_del_flow_rules(restore->rule);
- mlx5_modify_header_dealloc(esw_psample->priv->mdev, restore->modify_hdr);
+ mlx5_modify_header_dealloc(tc_psample->esw->dev, restore->modify_hdr);
kfree(restore);
}
}
-void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
{
u32 trunc_size = mapped_obj->sample.trunc_size;
struct psample_group psample_group = {};
@@ -325,6 +361,87 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
}
+static int
+add_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr,
+ u32 *default_tbl_id)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ u32 attr_sz = ns_to_attr_sz(MLX5_FLOW_NAMESPACE_FDB);
+ struct mlx5_vport_tbl_attr per_vport_tbl_attr;
+ struct mlx5_flow_table *default_tbl;
+ struct mlx5_flow_attr *post_attr;
+ int err;
+
+ /* Allocate default table per vport, chain and prio. Otherwise, there is
+ * only one default table for the same sampler object. Rules with different
+ * prio and chain may overlap. For CT sample action, per vport default
+ * table is needed to resotre the metadata.
+ */
+ per_vport_tbl_attr.chain = attr->chain;
+ per_vport_tbl_attr.prio = attr->prio;
+ per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
+ per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
+ if (IS_ERR(default_tbl)) {
+ err = PTR_ERR(default_tbl);
+ goto err_default_tbl;
+ }
+ *default_tbl_id = default_tbl->id;
+
+ post_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
+ if (!post_attr) {
+ err = -ENOMEM;
+ goto err_attr;
+ }
+ sample_flow->post_attr = post_attr;
+ memcpy(post_attr, attr, attr_sz);
+ /* Perform the original matches on the default table.
+ * Offload all actions except the sample action.
+ */
+ post_attr->chain = 0;
+ post_attr->prio = 0;
+ post_attr->ft = default_tbl;
+ post_attr->flags = MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
+
+ /* When offloading sample and encap action, if there is no valid
+ * neigh data struct, a slow path rule is offloaded first. Source
+ * port metadata match is set at that time. A per vport table is
+ * already allocated. No need to match it again. So clear the source
+ * port metadata match.
+ */
+ mlx5_eswitch_clear_rule_source_port(esw, spec);
+ sample_flow->post_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, post_attr);
+ if (IS_ERR(sample_flow->post_rule)) {
+ err = PTR_ERR(sample_flow->post_rule);
+ goto err_rule;
+ }
+ return 0;
+
+err_rule:
+ kfree(post_attr);
+err_attr:
+ mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
+err_default_tbl:
+ return err;
+}
+
+static void
+del_post_rule(struct mlx5_eswitch *esw, struct mlx5e_sample_flow *sample_flow,
+ struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ struct mlx5_vport_tbl_attr tbl_attr;
+
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule, sample_flow->post_attr);
+ kfree(sample_flow->post_attr);
+ tbl_attr.chain = attr->chain;
+ tbl_attr.prio = attr->prio;
+ tbl_attr.vport = esw_attr->in_rep->vport;
+ tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ mlx5_esw_vporttbl_put(esw, &tbl_attr);
+}
+
/* For the following typical flow table:
*
* +-------------------------------+
@@ -342,8 +459,9 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
* +---------------------+
* + original match +
* +---------------------+
- * |
- * v
+ * | set fte_id (if reg_c preserve cap)
+ * | do decap (if required)
+ * v
* +------------------------------------------------+
* + Flow Sampler Object +
* +------------------------------------------------+
@@ -353,80 +471,82 @@ void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj
* +------------------------------------------------+
* | |
* v v
- * +-----------------------------+ +----------------------------------------+
- * + sample table + + default table per <vport, chain, prio> +
- * +-----------------------------+ +----------------------------------------+
- * + forward to management vport + + original match +
- * +-----------------------------+ +----------------------------------------+
- * + other actions +
- * +----------------------------------------+
+ * +-----------------------------+ +-------------------+
+ * + sample table + + default table +
+ * +-----------------------------+ +-------------------+
+ * + forward to management vport + |
+ * +-----------------------------+ |
+ * +-------+------+
+ * | |reg_c preserve cap
+ * | |or decap action
+ * v v
+ * +-----------------+ +-------------+
+ * + per vport table + + post action +
+ * +-----------------+ +-------------+
+ * + original match +
+ * +-----------------+
+ * + other actions +
+ * +-----------------+
*/
struct mlx5_flow_handle *
-mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr)
+ struct mlx5_flow_attr *attr,
+ u32 tunnel_id)
{
+ struct mlx5e_post_act_handle *post_act_handle = NULL;
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
- struct mlx5_vport_tbl_attr per_vport_tbl_attr;
struct mlx5_esw_flow_attr *pre_esw_attr;
struct mlx5_mapped_obj restore_obj = {};
- struct mlx5_sample_flow *sample_flow;
- struct mlx5_sample_attr *sample_attr;
- struct mlx5_flow_table *default_tbl;
+ struct mlx5e_sample_flow *sample_flow;
+ struct mlx5e_sample_attr *sample_attr;
struct mlx5_flow_attr *pre_attr;
struct mlx5_eswitch *esw;
+ u32 default_tbl_id;
u32 obj_id;
int err;
- if (IS_ERR_OR_NULL(esw_psample))
+ if (IS_ERR_OR_NULL(tc_psample))
return ERR_PTR(-EOPNOTSUPP);
/* If slow path flag is set, eg. when the neigh is invalid for encap,
* don't offload sample action.
*/
- esw = esw_psample->priv->mdev->priv.eswitch;
+ esw = tc_psample->esw;
if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
if (!sample_flow)
return ERR_PTR(-ENOMEM);
- esw_attr->sample->sample_flow = sample_flow;
-
- /* Allocate default table per vport, chain and prio. Otherwise, there is
- * only one default table for the same sampler object. Rules with different
- * prio and chain may overlap. For CT sample action, per vport default
- * table is needed to resotre the metadata.
- */
- per_vport_tbl_attr.chain = attr->chain;
- per_vport_tbl_attr.prio = attr->prio;
- per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
- per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
- default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
- if (IS_ERR(default_tbl)) {
- err = PTR_ERR(default_tbl);
- goto err_default_tbl;
- }
+ sample_attr = attr->sample_attr;
+ sample_attr->sample_flow = sample_flow;
- /* Perform the original matches on the default table.
- * Offload all actions except the sample action.
- */
- esw_attr->sample->sample_default_tbl = default_tbl;
- /* When offloading sample and encap action, if there is no valid
- * neigh data struct, a slow path rule is offloaded first. Source
- * port metadata match is set at that time. A per vport table is
- * already allocated. No need to match it again. So clear the source
- * port metadata match.
+ /* For NICs with reg_c_preserve support or decap action, use
+ * post action instead of the per vport, chain and prio table.
+ * Only match the fte id instead of the same match in the
+ * original flow table.
*/
- mlx5_eswitch_clear_rule_source_port(esw, spec);
- sample_flow->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
- if (IS_ERR(sample_flow->rule)) {
- err = PTR_ERR(sample_flow->rule);
- goto err_offload_rule;
+ if (MLX5_CAP_GEN(esw->dev, reg_c_preserve) ||
+ attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) {
+ struct mlx5_flow_table *ft;
+
+ ft = mlx5e_tc_post_act_get_ft(tc_psample->post_act);
+ default_tbl_id = ft->id;
+ post_act_handle = mlx5e_tc_post_act_add(tc_psample->post_act, attr);
+ if (IS_ERR(post_act_handle)) {
+ err = PTR_ERR(post_act_handle);
+ goto err_post_act;
+ }
+ sample_flow->post_act_handle = post_act_handle;
+ } else {
+ err = add_post_rule(esw, sample_flow, spec, attr, &default_tbl_id);
+ if (err)
+ goto err_post_rule;
}
/* Create sampler object. */
- sample_flow->sampler = sampler_get(esw_psample, esw_attr->sample->rate, default_tbl->id);
+ sample_flow->sampler = sampler_get(tc_psample, sample_attr->rate, default_tbl_id);
if (IS_ERR(sample_flow->sampler)) {
err = PTR_ERR(sample_flow->sampler);
goto err_sampler;
@@ -434,16 +554,17 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
/* Create an id mapping reg_c0 value to sample object. */
restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
- restore_obj.sample.group_id = esw_attr->sample->group_num;
- restore_obj.sample.rate = esw_attr->sample->rate;
- restore_obj.sample.trunc_size = esw_attr->sample->trunc_size;
+ restore_obj.sample.group_id = sample_attr->group_num;
+ restore_obj.sample.rate = sample_attr->rate;
+ restore_obj.sample.trunc_size = sample_attr->trunc_size;
+ restore_obj.sample.tunnel_id = tunnel_id;
err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
if (err)
goto err_obj_id;
- esw_attr->sample->restore_obj_id = obj_id;
+ sample_attr->restore_obj_id = obj_id;
/* Create sample restore context. */
- sample_flow->restore = sample_restore_get(esw_psample, obj_id);
+ sample_flow->restore = sample_restore_get(tc_psample, obj_id, post_act_handle);
if (IS_ERR(sample_flow->restore)) {
err = PTR_ERR(sample_flow->restore);
goto err_sample_restore;
@@ -455,21 +576,23 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
if (!pre_attr) {
err = -ENOMEM;
- goto err_alloc_flow_attr;
- }
- sample_attr = kzalloc(sizeof(*sample_attr), GFP_KERNEL);
- if (!sample_attr) {
- err = -ENOMEM;
- goto err_alloc_sample_attr;
+ goto err_alloc_pre_flow_attr;
}
- pre_esw_attr = pre_attr->esw_attr;
pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+ /* For decap action, do decap in the original flow table instead of the
+ * default flow table.
+ */
+ if (tunnel_id)
+ pre_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
+ pre_attr->inner_match_level = attr->inner_match_level;
+ pre_attr->outer_match_level = attr->outer_match_level;
pre_attr->chain = attr->chain;
pre_attr->prio = attr->prio;
- pre_esw_attr->sample = sample_attr;
- pre_esw_attr->sample->sampler_id = sample_flow->sampler->sampler_id;
+ pre_attr->sample_attr = attr->sample_attr;
+ sample_attr->sampler_id = sample_flow->sampler->sampler_id;
+ pre_esw_attr = pre_attr->esw_attr;
pre_esw_attr->in_mdev = esw_attr->in_mdev;
pre_esw_attr->in_rep = esw_attr->in_rep;
sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr);
@@ -479,108 +602,113 @@ mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
}
sample_flow->pre_attr = pre_attr;
- return sample_flow->rule;
+ return sample_flow->post_rule;
err_pre_offload_rule:
- kfree(sample_attr);
-err_alloc_sample_attr:
kfree(pre_attr);
-err_alloc_flow_attr:
- sample_restore_put(esw_psample, sample_flow->restore);
+err_alloc_pre_flow_attr:
+ sample_restore_put(tc_psample, sample_flow->restore);
err_sample_restore:
mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id);
err_obj_id:
- sampler_put(esw_psample, sample_flow->sampler);
+ sampler_put(tc_psample, sample_flow->sampler);
err_sampler:
- /* For sample offload, rule is added in default_tbl. No need to call
- * mlx5_esw_chains_put_table()
- */
- attr->prio = 0;
- attr->chain = 0;
- mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
-err_offload_rule:
- mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
-err_default_tbl:
+ if (!post_act_handle)
+ del_post_rule(esw, sample_flow, attr);
+err_post_rule:
+ if (post_act_handle)
+ mlx5e_tc_post_act_del(tc_psample->post_act, post_act_handle);
+err_post_act:
kfree(sample_flow);
return ERR_PTR(err);
}
void
-mlx5_esw_sample_unoffload(struct mlx5_esw_psample *esw_psample,
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *tc_psample,
struct mlx5_flow_handle *rule,
struct mlx5_flow_attr *attr)
{
struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
- struct mlx5_sample_flow *sample_flow;
+ struct mlx5e_sample_flow *sample_flow;
struct mlx5_vport_tbl_attr tbl_attr;
- struct mlx5_flow_attr *pre_attr;
struct mlx5_eswitch *esw;
- if (IS_ERR_OR_NULL(esw_psample))
+ if (IS_ERR_OR_NULL(tc_psample))
return;
/* If slow path flag is set, sample action is not offloaded.
* No need to delete sample rule.
*/
- esw = esw_psample->priv->mdev->priv.eswitch;
+ esw = tc_psample->esw;
if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
return;
}
- sample_flow = esw_attr->sample->sample_flow;
- pre_attr = sample_flow->pre_attr;
- memset(pre_attr, 0, sizeof(*pre_attr));
- esw = esw_psample->priv->mdev->priv.eswitch;
- mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, pre_attr);
- mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
-
- sample_restore_put(esw_psample, sample_flow->restore);
- mapping_remove(esw->offloads.reg_c0_obj_pool, esw_attr->sample->restore_obj_id);
- sampler_put(esw_psample, sample_flow->sampler);
- tbl_attr.chain = attr->chain;
- tbl_attr.prio = attr->prio;
- tbl_attr.vport = esw_attr->in_rep->vport;
- tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
- mlx5_esw_vporttbl_put(esw, &tbl_attr);
+ /* The following delete order can't be changed, otherwise,
+ * will hit fw syndromes.
+ */
+ sample_flow = attr->sample_attr->sample_flow;
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, sample_flow->pre_attr);
+ if (!sample_flow->post_act_handle)
+ mlx5_eswitch_del_offloaded_rule(esw, sample_flow->post_rule,
+ sample_flow->post_attr);
+
+ sample_restore_put(tc_psample, sample_flow->restore);
+ mapping_remove(esw->offloads.reg_c0_obj_pool, attr->sample_attr->restore_obj_id);
+ sampler_put(tc_psample, sample_flow->sampler);
+ if (sample_flow->post_act_handle) {
+ mlx5e_tc_post_act_del(tc_psample->post_act, sample_flow->post_act_handle);
+ } else {
+ tbl_attr.chain = attr->chain;
+ tbl_attr.prio = attr->prio;
+ tbl_attr.vport = esw_attr->in_rep->vport;
+ tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
+ mlx5_esw_vporttbl_put(esw, &tbl_attr);
+ kfree(sample_flow->post_attr);
+ }
- kfree(pre_attr->esw_attr->sample);
- kfree(pre_attr);
+ kfree(sample_flow->pre_attr);
kfree(sample_flow);
}
-struct mlx5_esw_psample *
-mlx5_esw_sample_init(struct mlx5e_priv *priv)
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act)
{
- struct mlx5_esw_psample *esw_psample;
+ struct mlx5e_tc_psample *tc_psample;
int err;
- esw_psample = kzalloc(sizeof(*esw_psample), GFP_KERNEL);
- if (!esw_psample)
+ tc_psample = kzalloc(sizeof(*tc_psample), GFP_KERNEL);
+ if (!tc_psample)
return ERR_PTR(-ENOMEM);
- esw_psample->priv = priv;
- err = sampler_termtbl_create(esw_psample);
+ if (IS_ERR_OR_NULL(post_act)) {
+ err = PTR_ERR(post_act);
+ goto err_post_act;
+ }
+ tc_psample->post_act = post_act;
+ tc_psample->esw = esw;
+ err = sampler_termtbl_create(tc_psample);
if (err)
- goto err_termtbl;
+ goto err_post_act;
- mutex_init(&esw_psample->ht_lock);
- mutex_init(&esw_psample->restore_lock);
+ mutex_init(&tc_psample->ht_lock);
+ mutex_init(&tc_psample->restore_lock);
- return esw_psample;
+ return tc_psample;
-err_termtbl:
- kfree(esw_psample);
+err_post_act:
+ kfree(tc_psample);
return ERR_PTR(err);
}
void
-mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample)
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample)
{
- if (IS_ERR_OR_NULL(esw_psample))
+ if (IS_ERR_OR_NULL(tc_psample))
return;
- mutex_destroy(&esw_psample->restore_lock);
- mutex_destroy(&esw_psample->ht_lock);
- sampler_termtbl_destroy(esw_psample);
- kfree(esw_psample);
+ mutex_destroy(&tc_psample->restore_lock);
+ mutex_destroy(&tc_psample->ht_lock);
+ sampler_termtbl_destroy(tc_psample);
+ kfree(tc_psample);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
new file mode 100644
index 000000000000..db0146df9b30
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_TC_SAMPLE_H__
+#define __MLX5_EN_TC_SAMPLE_H__
+
+#include "eswitch.h"
+
+struct mlx5_flow_attr;
+struct mlx5e_tc_psample;
+struct mlx5e_post_act;
+
+struct mlx5e_sample_attr {
+ u32 group_num;
+ u32 rate;
+ u32 trunc_size;
+ u32 restore_obj_id;
+ u32 sampler_id;
+ struct mlx5e_sample_flow *sample_flow;
+};
+
+void mlx5e_tc_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
+
+struct mlx5_flow_handle *
+mlx5e_tc_sample_offload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_flow_attr *attr,
+ u32 tunnel_id);
+
+void
+mlx5e_tc_sample_unoffload(struct mlx5e_tc_psample *sample_priv,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_flow_attr *attr);
+
+struct mlx5e_tc_psample *
+mlx5e_tc_sample_init(struct mlx5_eswitch *esw, struct mlx5e_post_act *post_act);
+
+void
+mlx5e_tc_sample_cleanup(struct mlx5e_tc_psample *tc_psample);
+
+#endif /* __MLX5_EN_TC_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index b1707b86aa16..6c949abcd2e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -19,6 +19,7 @@
#include "en/tc_ct.h"
#include "en/mod_hdr.h"
#include "en/mapping.h"
+#include "en/tc/post_act.h"
#include "en.h"
#include "en_tc.h"
#include "en_rep.h"
@@ -32,10 +33,6 @@
#define MLX5_CT_STATE_RELATED_BIT BIT(5)
#define MLX5_CT_STATE_INVALID_BIT BIT(6)
-#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen)
-#define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
-#define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
-
#define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
#define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
@@ -46,14 +43,13 @@ struct mlx5_tc_ct_priv {
struct mlx5_core_dev *dev;
const struct net_device *netdev;
struct mod_hdr_tbl *mod_hdr_tbl;
- struct idr fte_ids;
struct xarray tuple_ids;
struct rhashtable zone_ht;
struct rhashtable ct_tuples_ht;
struct rhashtable ct_tuples_nat_ht;
struct mlx5_flow_table *ct;
struct mlx5_flow_table *ct_nat;
- struct mlx5_flow_table *post_ct;
+ struct mlx5e_post_act *post_act;
struct mutex control_lock; /* guards parallel adds/dels */
struct mapping_ctx *zone_mapping;
struct mapping_ctx *labels_mapping;
@@ -64,11 +60,9 @@ struct mlx5_tc_ct_priv {
struct mlx5_ct_flow {
struct mlx5_flow_attr *pre_ct_attr;
- struct mlx5_flow_attr *post_ct_attr;
struct mlx5_flow_handle *pre_ct_rule;
- struct mlx5_flow_handle *post_ct_rule;
+ struct mlx5e_post_act_handle *post_act_handle;
struct mlx5_ct_ft *ft;
- u32 fte_id;
u32 chain_mapping;
};
@@ -768,7 +762,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
attr->dest_chain = 0;
- attr->dest_ft = ct_priv->post_ct;
+ attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
attr->outer_match_level = MLX5_MATCH_L4;
attr->counter = entry->counter->counter;
@@ -1432,7 +1426,7 @@ static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
ctstate |= MLX5_CT_STATE_NAT_BIT;
mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
- dest.ft = ct_priv->post_ct;
+ dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@@ -1716,9 +1710,9 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* | do decap
* v
* +---------------------+
- * + pre_ct/pre_ct_nat + if matches +---------------------+
- * + zone+nat match +---------------->+ post_ct (see below) +
- * +---------------------+ set zone +---------------------+
+ * + pre_ct/pre_ct_nat + if matches +-------------------------+
+ * + zone+nat match +---------------->+ post_act (see below) +
+ * +---------------------+ set zone +-------------------------+
* | set zone
* v
* +--------------------+
@@ -1732,7 +1726,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
* | do nat (if needed)
* v
* +--------------+
- * + post_ct + original filter actions
+ * + post_act + original filter actions
* + fte_id match +------------------------>
* +--------------+
*/
@@ -1746,19 +1740,15 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
- struct mlx5_flow_spec *post_ct_spec = NULL;
+ struct mlx5e_post_act_handle *handle;
struct mlx5_flow_attr *pre_ct_attr;
struct mlx5_modify_hdr *mod_hdr;
- struct mlx5_flow_handle *rule;
struct mlx5_ct_flow *ct_flow;
int chain_mapping = 0, err;
struct mlx5_ct_ft *ft;
- u32 fte_id = 1;
- post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
- if (!post_ct_spec || !ct_flow) {
- kvfree(post_ct_spec);
+ if (!ct_flow) {
kfree(ct_flow);
return ERR_PTR(-ENOMEM);
}
@@ -1773,14 +1763,13 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
ct_flow->ft = ft;
- err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
- MLX5_FTE_ID_MAX, GFP_KERNEL);
- if (err) {
- netdev_warn(priv->netdev,
- "Failed to allocate fte id, err: %d\n", err);
- goto err_idr;
+ handle = mlx5e_tc_post_act_add(ct_priv->post_act, attr);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ ct_dbg("Failed to allocate post action handle");
+ goto err_post_act_handle;
}
- ct_flow->fte_id = fte_id;
+ ct_flow->post_act_handle = handle;
/* Base flow attributes of both rules on original rule attribute */
ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
@@ -1789,15 +1778,8 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
goto err_alloc_pre;
}
- ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
- if (!ct_flow->post_ct_attr) {
- err = -ENOMEM;
- goto err_alloc_post;
- }
-
pre_ct_attr = ct_flow->pre_ct_attr;
memcpy(pre_ct_attr, attr, attr_sz);
- memcpy(ct_flow->post_ct_attr, attr, attr_sz);
/* Modify the original rule's action to fwd and modify, leave decap */
pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
@@ -1823,10 +1805,9 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
goto err_mapping;
}
- err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
- FTEID_TO_REG, fte_id);
+ err = mlx5e_tc_post_act_set_handle(priv->mdev, handle, &pre_mod_acts);
if (err) {
- ct_dbg("Failed to set fte_id register mapping");
+ ct_dbg("Failed to set post action handle");
goto err_mapping;
}
@@ -1857,33 +1838,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
}
pre_ct_attr->modify_hdr = mod_hdr;
- /* Post ct rule matches on fte_id and executes original rule's
- * tc rule action
- */
- mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
- fte_id, MLX5_FTE_ID_MASK);
-
- /* Put post_ct rule on post_ct flow table */
- ct_flow->post_ct_attr->chain = 0;
- ct_flow->post_ct_attr->prio = 0;
- ct_flow->post_ct_attr->ft = ct_priv->post_ct;
-
- /* Splits were handled before CT */
- if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
- ct_flow->post_ct_attr->esw_attr->split_count = 0;
-
- ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
- ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
- ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
- rule = mlx5_tc_rule_insert(priv, post_ct_spec,
- ct_flow->post_ct_attr);
- ct_flow->post_ct_rule = rule;
- if (IS_ERR(ct_flow->post_ct_rule)) {
- err = PTR_ERR(ct_flow->post_ct_rule);
- ct_dbg("Failed to add post ct rule");
- goto err_insert_post_ct;
- }
-
/* Change original rule point to ct table */
pre_ct_attr->dest_chain = 0;
pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
@@ -1897,28 +1851,21 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
attr->ct_attr.ct_flow = ct_flow;
dealloc_mod_hdr_actions(&pre_mod_acts);
- kvfree(post_ct_spec);
- return rule;
+ return ct_flow->pre_ct_rule;
err_insert_orig:
- mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
- ct_flow->post_ct_attr);
-err_insert_post_ct:
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
err_mapping:
dealloc_mod_hdr_actions(&pre_mod_acts);
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
err_get_chain:
- kfree(ct_flow->post_ct_attr);
-err_alloc_post:
kfree(ct_flow->pre_ct_attr);
err_alloc_pre:
- idr_remove(&ct_priv->fte_ids, fte_id);
-err_idr:
+ mlx5e_tc_post_act_del(ct_priv->post_act, handle);
+err_post_act_handle:
mlx5_tc_ct_del_ft_cb(ct_priv, ft);
err_ft:
- kvfree(post_ct_spec);
kfree(ct_flow);
netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
return ERR_PTR(err);
@@ -2029,16 +1976,13 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
pre_ct_attr);
mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
- if (ct_flow->post_ct_rule) {
- mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
- ct_flow->post_ct_attr);
+ if (ct_flow->post_act_handle) {
mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
- idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
+ mlx5e_tc_post_act_del(ct_priv->post_act, ct_flow->post_act_handle);
mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
}
kfree(ct_flow->pre_ct_attr);
- kfree(ct_flow->post_ct_attr);
kfree(ct_flow);
}
@@ -2064,11 +2008,6 @@ static int
mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
const char **err_msg)
{
- if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
- *err_msg = "firmware level support is missing";
- return -EOPNOTSUPP;
- }
-
if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
/* vlan workaround should be avoided for multi chain rules.
* This is just a sanity check as pop vlan action should
@@ -2098,20 +2037,9 @@ mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
}
static int
-mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
- const char **err_msg)
-{
- if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
- *err_msg = "firmware level support is missing";
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static int
mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act,
const char **err_msg)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -2122,10 +2050,14 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
*err_msg = "tc skb extension missing";
return -EOPNOTSUPP;
#endif
+ if (IS_ERR_OR_NULL(post_act)) {
+ *err_msg = "tc ct offload not supported, post action is missing";
+ return -EOPNOTSUPP;
+ }
+
if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
- else
- return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
+ return 0;
}
#define INIT_ERR_PREFIX "tc ct offload init failed"
@@ -2133,7 +2065,8 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
struct mod_hdr_tbl *mod_hdr,
- enum mlx5_flow_namespace_type ns_type)
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
{
struct mlx5_tc_ct_priv *ct_priv;
struct mlx5_core_dev *dev;
@@ -2142,11 +2075,9 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
int err;
dev = priv->mdev;
- err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
+ err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act, &msg);
if (err) {
- mlx5_core_warn(dev,
- "tc ct offload not supported, %s\n",
- msg);
+ mlx5_core_warn(dev, "tc ct offload not supported, %s\n", msg);
goto err_support;
}
@@ -2194,16 +2125,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
goto err_ct_nat_tbl;
}
- ct_priv->post_ct = mlx5_chains_create_global_table(chains);
- if (IS_ERR(ct_priv->post_ct)) {
- err = PTR_ERR(ct_priv->post_ct);
- mlx5_core_warn(dev,
- "%s, failed to create post ct table err: %d\n",
- INIT_ERR_PREFIX, err);
- goto err_post_ct_tbl;
- }
-
- idr_init(&ct_priv->fte_ids);
+ ct_priv->post_act = post_act;
mutex_init(&ct_priv->control_lock);
rhashtable_init(&ct_priv->zone_ht, &zone_params);
rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
@@ -2211,8 +2133,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
return ct_priv;
-err_post_ct_tbl:
- mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
err_ct_nat_tbl:
mlx5_chains_destroy_global_table(chains, ct_priv->ct);
err_ct_tbl:
@@ -2237,7 +2157,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
chains = ct_priv->chains;
- mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
mlx5_chains_destroy_global_table(chains, ct_priv->ct);
mapping_destroy(ct_priv->zone_mapping);
@@ -2247,7 +2166,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
rhashtable_destroy(&ct_priv->zone_ht);
mutex_destroy(&ct_priv->control_lock);
- idr_destroy(&ct_priv->fte_ids);
kfree(ct_priv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
index 644cf1641cde..363329f4aac6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h
@@ -92,7 +92,8 @@ struct mlx5_ct_attr {
struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
struct mod_hdr_tbl *mod_hdr,
- enum mlx5_flow_namespace_type ns_type);
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act);
void
mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv);
@@ -132,7 +133,8 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
static inline struct mlx5_tc_ct_priv *
mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
struct mod_hdr_tbl *mod_hdr,
- enum mlx5_flow_namespace_type ns_type)
+ enum mlx5_flow_namespace_type ns_type,
+ struct mlx5e_post_act *post_act)
{
return NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 756f806401d7..48a203a9e7d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -60,6 +60,7 @@ struct mlx5e_neigh_update_table {
struct mlx5_tc_ct_priv;
struct mlx5e_rep_bond;
struct mlx5e_tc_tun_encap;
+struct mlx5e_post_act;
struct mlx5_rep_uplink_priv {
/* Filters DB - instantiated by the uplink representor and shared by
@@ -88,8 +89,9 @@ struct mlx5_rep_uplink_priv {
/* maps tun_enc_opts to a unique id*/
struct mapping_ctx *tunnel_enc_opts_mapping;
+ struct mlx5e_post_act *post_act;
struct mlx5_tc_ct_priv *ct_priv;
- struct mlx5_esw_psample *esw_psample;
+ struct mlx5e_tc_psample *tc_psample;
/* support eswitch vports bonding */
struct mlx5e_rep_bond *bond;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9465a51b6e66..6603d9c823a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -47,6 +47,7 @@
#include <net/bareudp.h>
#include <net/bonding.h>
#include "en.h"
+#include "en/tc/post_act.h"
#include "en_rep.h"
#include "en/rep/tc.h"
#include "en/rep/neigh.h"
@@ -60,7 +61,7 @@
#include "en/mod_hdr.h"
#include "en/tc_priv.h"
#include "en/tc_tun_encap.h"
-#include "esw/sample.h"
+#include "en/tc/sample.h"
#include "lib/devcom.h"
#include "lib/geneve.h"
#include "lib/fs_chains.h"
@@ -246,7 +247,7 @@ get_ct_priv(struct mlx5e_priv *priv)
}
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
-static struct mlx5_esw_psample *
+static struct mlx5e_tc_psample *
get_sample_priv(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -257,7 +258,7 @@ get_sample_priv(struct mlx5e_priv *priv)
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
uplink_priv = &uplink_rpriv->uplink_priv;
- return uplink_priv->esw_psample;
+ return uplink_priv->tc_psample;
}
return NULL;
@@ -1147,7 +1148,8 @@ mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
mod_hdr_acts);
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
} else if (flow_flag_test(flow, SAMPLE)) {
- rule = mlx5_esw_sample_offload(get_sample_priv(flow->priv), spec, attr);
+ rule = mlx5e_tc_sample_offload(get_sample_priv(flow->priv), spec, attr,
+ mlx5e_tc_get_flow_tun_id(flow));
#endif
} else {
rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
@@ -1186,7 +1188,7 @@ void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
if (flow_flag_test(flow, SAMPLE)) {
- mlx5_esw_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
+ mlx5e_tc_sample_unoffload(get_sample_priv(flow->priv), flow->rule[0], attr);
return;
}
#endif
@@ -1550,6 +1552,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
else
mlx5e_detach_mod_hdr(priv, flow);
}
+ kfree(attr->sample_attr);
kvfree(attr->parse_attr);
kvfree(attr->esw_attr->rx_tun_attr);
@@ -1559,7 +1562,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
- kfree(flow->attr->esw_attr->sample);
kfree(flow->attr);
}
@@ -1624,17 +1626,22 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
}
}
-static int flow_has_tc_fwd_action(struct flow_cls_offload *f)
+static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
struct flow_action *flow_action = &rule->action;
const struct flow_action_entry *act;
int i;
+ if (chain)
+ return false;
+
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_GOTO:
return true;
+ case FLOW_ACTION_SAMPLE:
+ return true;
default:
continue;
}
@@ -1875,7 +1882,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
return -EOPNOTSUPP;
needs_mapping = !!flow->attr->chain;
- sets_mapping = !flow->attr->chain && flow_has_tc_fwd_action(f);
+ sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
*match_inner = !needs_mapping;
if ((needs_mapping || sets_mapping) &&
@@ -3716,13 +3723,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5e_sample_attr sample_attr = {};
const struct ip_tunnel_info *info = NULL;
struct mlx5_flow_attr *attr = flow->attr;
int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
bool ft_flow = mlx5e_is_ft_flow(flow);
const struct flow_action_entry *act;
struct mlx5_esw_flow_attr *esw_attr;
- struct mlx5_sample_attr sample = {};
bool encap = false, decap = false;
u32 action = attr->action;
int err, i, if_count = 0;
@@ -3993,10 +4000,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
NL_SET_ERR_MSG_MOD(extack, "Sample action with connection tracking is not supported");
return -EOPNOTSUPP;
}
- sample.rate = act->sample.rate;
- sample.group_num = act->sample.psample_group->group_num;
+ sample_attr.rate = act->sample.rate;
+ sample_attr.group_num = act->sample.psample_group->group_num;
if (act->sample.truncate)
- sample.trunc_size = act->sample.trunc_size;
+ sample_attr.trunc_size = act->sample.trunc_size;
flow_flag_set(flow, SAMPLE);
break;
default:
@@ -4081,10 +4088,10 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
* no errors after parsing.
*/
if (flow_flag_test(flow, SAMPLE)) {
- esw_attr->sample = kzalloc(sizeof(*esw_attr->sample), GFP_KERNEL);
- if (!esw_attr->sample)
+ attr->sample_attr = kzalloc(sizeof(*attr->sample_attr), GFP_KERNEL);
+ if (!attr->sample_attr)
return -ENOMEM;
- *esw_attr->sample = sample;
+ *attr->sample_attr = sample_attr;
}
return 0;
@@ -4682,7 +4689,7 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
rate_mbps = max_t(u32, rate, 1);
}
- err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
+ err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
if (err)
NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
@@ -4895,8 +4902,9 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
goto err_chains;
}
+ tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
- MLX5_FLOW_NAMESPACE_KERNEL);
+ MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
err = register_netdevice_notifier_dev_net(priv->netdev,
@@ -4912,6 +4920,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
err_reg:
mlx5_tc_ct_clean(tc->ct);
+ mlx5e_tc_post_act_destroy(tc->post_act);
mlx5_chains_destroy(tc->chains);
err_chains:
mapping_destroy(chains_mapping);
@@ -4950,6 +4959,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
mutex_destroy(&tc->t_lock);
mlx5_tc_ct_clean(tc->ct);
+ mlx5e_tc_post_act_destroy(tc->post_act);
mapping_destroy(tc->mapping);
mlx5_chains_destroy(tc->chains);
}
@@ -4970,13 +4980,16 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
priv = netdev_priv(rpriv->netdev);
esw = priv->mdev->priv.eswitch;
+ uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
+ MLX5_FLOW_NAMESPACE_FDB);
uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
esw_chains(esw),
&esw->offloads.mod_hdr,
- MLX5_FLOW_NAMESPACE_FDB);
+ MLX5_FLOW_NAMESPACE_FDB,
+ uplink_priv->post_act);
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
- uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev));
+ uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
#endif
mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
@@ -5022,11 +5035,12 @@ err_enc_opts_mapping:
mapping_destroy(uplink_priv->tunnel_mapping);
err_tun_mapping:
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
- mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
#endif
mlx5_tc_ct_clean(uplink_priv->ct_priv);
netdev_warn(priv->netdev,
"Failed to initialize tc (eswitch), err: %d", err);
+ mlx5e_tc_post_act_destroy(uplink_priv->post_act);
return err;
}
@@ -5043,9 +5057,10 @@ void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht)
mapping_destroy(uplink_priv->tunnel_mapping);
#if IS_ENABLED(CONFIG_MLX5_TC_SAMPLE)
- mlx5_esw_sample_cleanup(uplink_priv->esw_psample);
+ mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
#endif
mlx5_tc_ct_clean(uplink_priv->ct_priv);
+ mlx5e_tc_post_act_destroy(uplink_priv->post_act);
}
int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index f7cbeb0b66d2..1a4cd882f0fb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -70,6 +70,7 @@ struct mlx5_flow_attr {
struct mlx5_fc *counter;
struct mlx5_modify_hdr *modify_hdr;
struct mlx5_ct_attr ct_attr;
+ struct mlx5e_sample_attr *sample_attr;
struct mlx5e_tc_flow_parse_attr *parse_attr;
u32 chain;
u16 prio;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index 1703384eca95..20af557ae30c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -91,9 +91,15 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
if (err)
goto reg_err;
+ err = devlink_rate_leaf_create(dl_port, vport);
+ if (err)
+ goto rate_err;
+
vport->dl_port = dl_port;
return 0;
+rate_err:
+ devlink_port_unregister(dl_port);
reg_err:
mlx5_esw_dl_port_free(dl_port);
return err;
@@ -109,6 +115,12 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
vport = mlx5_eswitch_get_vport(esw, vport_num);
if (IS_ERR(vport))
return;
+
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+ devlink_rate_leaf_destroy(vport->dl_port);
+ }
+
devlink_port_unregister(vport->dl_port);
mlx5_esw_dl_port_free(vport->dl_port);
vport->dl_port = NULL;
@@ -148,8 +160,16 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p
if (err)
return err;
+ err = devlink_rate_leaf_create(dl_port, vport);
+ if (err)
+ goto rate_err;
+
vport->dl_port = dl_port;
return 0;
+
+rate_err:
+ devlink_port_unregister(dl_port);
+ return err;
}
void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num)
@@ -159,6 +179,12 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num
vport = mlx5_eswitch_get_vport(esw, vport_num);
if (IS_ERR(vport))
return;
+
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
+ devlink_rate_leaf_destroy(vport->dl_port);
+ }
+
devlink_port_unregister(vport->dl_port);
vport->dl_port = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h
new file mode 100644
index 000000000000..458baf0c6415
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mlx5
+
+#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ)
+#define _MLX5_ESW_TP_
+
+#include <linux/tracepoint.h>
+#include "eswitch.h"
+
+TRACE_EVENT(mlx5_esw_vport_qos_destroy,
+ TP_PROTO(const struct mlx5_vport *vport),
+ TP_ARGS(vport),
+ TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+ __field(unsigned short, vport_id)
+ __field(unsigned int, tsar_ix)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+ __entry->vport_id = vport->vport;
+ __entry->tsar_ix = vport->qos.esw_tsar_ix;
+ ),
+ TP_printk("(%s) vport=%hu tsar_ix=%u\n",
+ __get_str(devname), __entry->vport_id, __entry->tsar_ix
+ )
+);
+
+DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate),
+ TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device))
+ __field(unsigned short, vport_id)
+ __field(unsigned int, tsar_ix)
+ __field(unsigned int, bw_share)
+ __field(unsigned int, max_rate)
+ __field(void *, group)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device));
+ __entry->vport_id = vport->vport;
+ __entry->tsar_ix = vport->qos.esw_tsar_ix;
+ __entry->bw_share = bw_share;
+ __entry->max_rate = max_rate;
+ __entry->group = vport->qos.group;
+ ),
+ TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n",
+ __get_str(devname), __entry->vport_id, __entry->tsar_ix,
+ __entry->bw_share, __entry->max_rate, __entry->group
+ )
+);
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate)
+ );
+
+DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config,
+ TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate),
+ TP_ARGS(vport, bw_share, max_rate)
+ );
+
+DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const void *, group)
+ __field(unsigned int, tsar_ix)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->group = group;
+ __entry->tsar_ix = tsar_ix;
+ ),
+ TP_printk("(%s) group=%p tsar_ix=%u\n",
+ __get_str(devname), __entry->group, __entry->tsar_ix
+ )
+);
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix)
+ );
+
+DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix),
+ TP_ARGS(dev, group, tsar_ix)
+ );
+
+TRACE_EVENT(mlx5_esw_group_qos_config,
+ TP_PROTO(const struct mlx5_core_dev *dev,
+ const struct mlx5_esw_rate_group *group,
+ unsigned int tsar_ix, u32 bw_share, u32 max_rate),
+ TP_ARGS(dev, group, tsar_ix, bw_share, max_rate),
+ TP_STRUCT__entry(__string(devname, dev_name(dev->device))
+ __field(const void *, group)
+ __field(unsigned int, tsar_ix)
+ __field(unsigned int, bw_share)
+ __field(unsigned int, max_rate)
+ ),
+ TP_fast_assign(__assign_str(devname, dev_name(dev->device));
+ __entry->group = group;
+ __entry->tsar_ix = tsar_ix;
+ __entry->bw_share = bw_share;
+ __entry->max_rate = max_rate;
+ ),
+ TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n",
+ __get_str(devname), __entry->group, __entry->tsar_ix,
+ __entry->bw_share, __entry->max_rate
+ )
+);
+#endif /* _MLX5_ESW_TP_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH esw/diag
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE qos_tracepoint
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index d9041b16611d..df277a6cddc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -11,6 +11,7 @@
#include "mlx5_core.h"
#include "eswitch.h"
#include "fs_core.h"
+#include "esw/qos.h"
enum {
LEGACY_VEPA_PRIO = 0,
@@ -508,3 +509,22 @@ unlock:
mutex_unlock(&esw->state_lock);
return err;
}
+
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
+ u32 max_rate, u32 min_rate)
+{
+ struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+ int err;
+
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+ if (IS_ERR(evport))
+ return PTR_ERR(evport);
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_qos_set_vport_min_rate(esw, evport, min_rate, NULL);
+ if (!err)
+ err = mlx5_esw_qos_set_vport_max_rate(esw, evport, max_rate, NULL);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
new file mode 100644
index 000000000000..985e305179d1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -0,0 +1,869 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "eswitch.h"
+#include "esw/qos.h"
+#include "en/port.h"
+#define CREATE_TRACE_POINTS
+#include "diag/qos_tracepoint.h"
+
+/* Minimum supported BW share value by the HW is 1 Mbit/sec */
+#define MLX5_MIN_BW_SHARE 1
+
+#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
+ min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit)
+
+struct mlx5_esw_rate_group {
+ u32 tsar_ix;
+ u32 max_rate;
+ u32 min_rate;
+ u32 bw_share;
+ struct list_head list;
+};
+
+static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx,
+ u32 parent_ix, u32 tsar_ix,
+ u32 max_rate, u32 bw_share)
+{
+ u32 bitmask = 0;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+ bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
+
+ return mlx5_modify_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ tsar_ix,
+ bitmask);
+}
+
+static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+ u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ int err;
+
+ err = esw_qos_tsar_config(dev, sched_ctx,
+ esw->qos.root_tsar_ix, group->tsar_ix,
+ max_rate, bw_share);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed");
+
+ trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate);
+
+ return err;
+}
+
+static int esw_qos_vport_config(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share,
+ struct netlink_ext_ack *extack)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
+ void *vport_elem;
+ int err;
+
+ if (!vport->qos.enabled)
+ return -EIO;
+
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
+ element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+
+ err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
+ max_rate, bw_share);
+ if (err) {
+ esw_warn(esw->dev,
+ "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed");
+ return err;
+ }
+
+ trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate);
+
+ return 0;
+}
+
+static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ bool group_level)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_vport *evport;
+ u32 max_guarantee = 0;
+ unsigned long i;
+
+ if (group_level) {
+ struct mlx5_esw_rate_group *group;
+
+ list_for_each_entry(group, &esw->qos.groups, list) {
+ if (group->min_rate < max_guarantee)
+ continue;
+ max_guarantee = group->min_rate;
+ }
+ } else {
+ mlx5_esw_for_each_vport(esw, i, evport) {
+ if (!evport->enabled || !evport->qos.enabled ||
+ evport->qos.group != group || evport->qos.min_rate < max_guarantee)
+ continue;
+ max_guarantee = evport->qos.min_rate;
+ }
+ }
+
+ if (max_guarantee)
+ return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+
+ /* If vports min rate divider is 0 but their group has bw_share configured, then
+ * need to set bw_share for vports to minimal value.
+ */
+ if (!group_level && !max_guarantee && group->bw_share)
+ return 1;
+ return 0;
+}
+
+static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max)
+{
+ if (divider)
+ return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max);
+
+ return 0;
+}
+
+static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false);
+ struct mlx5_vport *evport;
+ unsigned long i;
+ u32 bw_share;
+ int err;
+
+ mlx5_esw_for_each_vport(esw, i, evport) {
+ if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
+ continue;
+ bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
+
+ if (bw_share == evport->qos.bw_share)
+ continue;
+
+ err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack);
+ if (err)
+ return err;
+
+ evport->qos.bw_share = bw_share;
+ }
+
+ return 0;
+}
+
+static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_esw_rate_group *group;
+ u32 bw_share;
+ int err;
+
+ list_for_each_entry(group, &esw->qos.groups, list) {
+ bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
+
+ if (bw_share == group->bw_share)
+ continue;
+
+ err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack);
+ if (err)
+ return err;
+
+ group->bw_share = bw_share;
+
+ /* All the group's vports need to be set with default bw_share
+ * to enable them with QOS
+ */
+ err = esw_qos_normalize_vports_min_rate(esw, group, extack);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport,
+ u32 min_rate,
+ struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share, previous_min_rate;
+ bool min_rate_supported;
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
+ fw_max_bw_share >= MLX5_MIN_BW_SHARE;
+ if (min_rate && !min_rate_supported)
+ return -EOPNOTSUPP;
+ if (min_rate == evport->qos.min_rate)
+ return 0;
+
+ previous_min_rate = evport->qos.min_rate;
+ evport->qos.min_rate = min_rate;
+ err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
+ if (err)
+ evport->qos.min_rate = previous_min_rate;
+
+ return err;
+}
+
+int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport,
+ u32 max_rate,
+ struct netlink_ext_ack *extack)
+{
+ u32 act_max_rate = max_rate;
+ bool max_rate_supported;
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
+
+ if (max_rate && !max_rate_supported)
+ return -EOPNOTSUPP;
+ if (max_rate == evport->qos.max_rate)
+ return 0;
+
+ /* If parent group has rate limit need to set to group
+ * value when new max rate is 0.
+ */
+ if (evport->qos.group && !max_rate)
+ act_max_rate = evport->qos.group->max_rate;
+
+ err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
+
+ if (!err)
+ evport->qos.max_rate = max_rate;
+
+ return err;
+}
+
+static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group,
+ u32 min_rate, struct netlink_ext_ack *extack)
+{
+ u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 previous_min_rate, divider;
+ int err;
+
+ if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE))
+ return -EOPNOTSUPP;
+
+ if (min_rate == group->min_rate)
+ return 0;
+
+ previous_min_rate = group->min_rate;
+ group->min_rate = min_rate;
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err) {
+ group->min_rate = previous_min_rate;
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed");
+
+ /* Attempt restoring previous configuration */
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ if (esw_qos_normalize_groups_min_rate(esw, divider, extack))
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ u32 max_rate, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport;
+ unsigned long i;
+ int err;
+
+ if (group->max_rate == max_rate)
+ return 0;
+
+ err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack);
+ if (err)
+ return err;
+
+ group->max_rate = max_rate;
+
+ /* Any unlimited vports in the group should be set
+ * with the value of the group.
+ */
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ if (!vport->enabled || !vport->qos.enabled ||
+ vport->qos.group != group || vport->qos.max_rate)
+ continue;
+
+ err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch vport implicit rate limit setting failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
+ void *vport_elem;
+ int err;
+
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ &vport->qos.esw_tsar_ix);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *curr_group,
+ struct mlx5_esw_rate_group *new_group,
+ struct netlink_ext_ack *extack)
+{
+ u32 max_rate;
+ int err;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
+ return err;
+ }
+
+ vport->qos.group = new_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
+
+ /* If vport is unlimited, we set the group's value.
+ * Therefore, if the group is limited it will apply to
+ * the vport as well and if not, vport will remain unlimited.
+ */
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
+ goto err_sched;
+ }
+
+ return 0;
+
+err_sched:
+ vport->qos.group = curr_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
+ if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
+ esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
+ vport->vport);
+
+ return err;
+}
+
+static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *new_group, *curr_group;
+ int err;
+
+ if (!vport->enabled)
+ return -EINVAL;
+
+ curr_group = vport->qos.group;
+ new_group = group ?: esw->qos.group0;
+ if (curr_group == new_group)
+ return 0;
+
+ err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
+ if (err)
+ return err;
+
+ /* Recalculate bw share weights of old and new groups */
+ if (vport->qos.bw_share) {
+ esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
+ esw_qos_normalize_vports_min_rate(esw, new_group, extack);
+ }
+
+ return 0;
+}
+
+static struct mlx5_esw_rate_group *
+esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group;
+ u32 divider;
+ int err;
+
+ if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
+
+ MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
+ esw->qos.root_tsar_ix);
+ err = mlx5_create_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &group->tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed");
+ goto err_sched_elem;
+ }
+
+ list_add_tail(&group->list, &esw->qos.groups);
+
+ divider = esw_qos_calculate_min_rate_divider(esw, group, true);
+ if (divider) {
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed");
+ goto err_min_rate;
+ }
+ }
+ trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix);
+
+ return group;
+
+err_min_rate:
+ list_del(&group->list);
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ group->tsar_ix);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed");
+err_sched_elem:
+ kfree(group);
+ return ERR_PTR(err);
+}
+
+static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ u32 divider;
+ int err;
+
+ list_del(&group->list);
+
+ divider = esw_qos_calculate_min_rate_divider(esw, NULL, true);
+ err = esw_qos_normalize_groups_min_rate(esw, divider, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed");
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ group->tsar_ix);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed");
+
+ trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix);
+ kfree(group);
+ return err;
+}
+
+static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+{
+ switch (type) {
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_TASR;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+ }
+ return false;
+}
+
+void mlx5_esw_qos_create(struct mlx5_eswitch *esw)
+{
+ u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_core_dev *dev = esw->dev;
+ __be32 *attr;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+ return;
+
+ if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+ return;
+
+ mutex_lock(&esw->state_lock);
+ if (esw->qos.enabled)
+ goto unlock;
+
+ MLX5_SET(scheduling_context, tsar_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+ attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+ *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ tsar_ctx,
+ &esw->qos.root_tsar_ix);
+ if (err) {
+ esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err);
+ goto unlock;
+ }
+
+ INIT_LIST_HEAD(&esw->qos.groups);
+ if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
+ esw->qos.group0 = esw_qos_create_rate_group(esw, NULL);
+ if (IS_ERR(esw->qos.group0)) {
+ esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n",
+ PTR_ERR(esw->qos.group0));
+ goto err_group0;
+ }
+ }
+ esw->qos.enabled = true;
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return;
+
+err_group0:
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+ mutex_unlock(&esw->state_lock);
+}
+
+void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
+{
+ struct devlink *devlink = priv_to_devlink(esw->dev);
+ int err;
+
+ devlink_rate_nodes_destroy(devlink);
+ mutex_lock(&esw->state_lock);
+ if (!esw->qos.enabled)
+ goto unlock;
+
+ if (esw->qos.group0)
+ esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL);
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ esw->qos.root_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err);
+
+ esw->qos.enabled = false;
+unlock:
+ mutex_unlock(&esw->state_lock);
+}
+
+int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ if (!esw->qos.enabled)
+ return 0;
+
+ if (vport->qos.enabled)
+ return -EEXIST;
+
+ vport->qos.group = esw->qos.group0;
+
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
+ if (!err) {
+ vport->qos.enabled = true;
+ trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate);
+ }
+
+ return err;
+}
+
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+ int err;
+
+ lockdep_assert_held(&esw->state_lock);
+ if (!esw->qos.enabled || !vport->qos.enabled)
+ return;
+ WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
+ "Disabling QoS on port before detaching it from group");
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err)
+ esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+
+ vport->qos.enabled = false;
+ trace_mlx5_esw_vport_qos_destroy(vport);
+}
+
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps)
+{
+ u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_vport *vport;
+ u32 bitmask;
+
+ vport = mlx5_eswitch_get_vport(esw, vport_num);
+ if (IS_ERR(vport))
+ return PTR_ERR(vport);
+
+ if (!vport->qos.enabled)
+ return -EOPNOTSUPP;
+
+ MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
+ bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+
+ return mlx5_modify_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ ctx,
+ vport->qos.esw_tsar_ix,
+ bitmask);
+}
+
+#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */
+
+/* Converts bytes per second value passed in a pointer into megabits per
+ * second, rewriting last. If converted rate exceed link speed or is not a
+ * fraction of Mbps - returns error.
+ */
+static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name,
+ u64 *rate, struct netlink_ext_ack *extack)
+{
+ u32 link_speed_max, reminder;
+ u64 value;
+ int err;
+
+ err = mlx5e_port_max_linkspeed(mdev, &link_speed_max);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed");
+ return err;
+ }
+
+ value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder);
+ if (reminder) {
+ pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n",
+ name, *rate);
+ NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps");
+ return -EINVAL;
+ }
+
+ if (value > link_speed_max) {
+ pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n",
+ name, value, link_speed_max);
+ NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed");
+ return -EINVAL;
+ }
+
+ *rate = value;
+ return 0;
+}
+
+/* Eswitch devlink rate API */
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = vport->dev->priv.eswitch;
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack)
+{
+ struct mlx5_vport *vport = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = vport->dev->priv.eswitch;
+ if (!mlx5_esw_allowed(esw))
+ return -EPERM;
+
+ err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_esw_rate_group *group = priv;
+ int err;
+
+ err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_set_group_min_rate(esw, group, tx_share, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink);
+ struct mlx5_eswitch *esw = dev->priv.eswitch;
+ struct mlx5_esw_rate_group *group = priv;
+ int err;
+
+ err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack);
+ if (err)
+ return err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_set_group_max_rate(esw, group, tx_max, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_eswitch *esw;
+ int err = 0;
+
+ esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ mutex_lock(&esw->state_lock);
+ if (esw->mode != MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Rate node creation supported only in switchdev mode");
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ group = esw_qos_create_rate_group(esw, extack);
+ if (IS_ERR(group)) {
+ err = PTR_ERR(group);
+ goto unlock;
+ }
+
+ *priv = group;
+unlock:
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group = priv;
+ struct mlx5_eswitch *esw;
+ int err;
+
+ esw = mlx5_devlink_eswitch_get(rate_node->devlink);
+ if (IS_ERR(esw))
+ return PTR_ERR(esw);
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_destroy_rate_group(esw, group, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_vport_update_group(esw, vport, group, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_vport *vport = priv;
+
+ if (!parent)
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
+ vport, NULL, extack);
+
+ group = parent_priv;
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
new file mode 100644
index 000000000000..28451abe2d2f
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#ifndef __MLX5_ESW_QOS_H__
+#define __MLX5_ESW_QOS_H__
+
+#ifdef CONFIG_MLX5_ESWITCH
+
+int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport,
+ u32 min_rate,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
+ struct mlx5_vport *evport,
+ u32 max_rate,
+ struct netlink_ext_ack *extack);
+void mlx5_esw_qos_create(struct mlx5_eswitch *esw);
+void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw);
+int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share);
+void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_share, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv,
+ u64 tx_max, struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
+ struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack);
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
deleted file mode 100644
index 2a3f4be10030..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
-/* Copyright (c) 2021 Mellanox Technologies. */
-
-#ifndef __MLX5_EN_TC_SAMPLE_H__
-#define __MLX5_EN_TC_SAMPLE_H__
-
-#include "en.h"
-#include "eswitch.h"
-
-struct mlx5e_priv;
-struct mlx5_flow_attr;
-struct mlx5_esw_psample;
-
-struct mlx5_sample_attr {
- u32 group_num;
- u32 rate;
- u32 trunc_size;
- u32 restore_obj_id;
- u32 sampler_id;
- struct mlx5_flow_table *sample_default_tbl;
- struct mlx5_sample_flow *sample_flow;
-};
-
-void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj);
-
-struct mlx5_flow_handle *
-mlx5_esw_sample_offload(struct mlx5_esw_psample *sample_priv,
- struct mlx5_flow_spec *spec,
- struct mlx5_flow_attr *attr);
-
-void
-mlx5_esw_sample_unoffload(struct mlx5_esw_psample *sample_priv,
- struct mlx5_flow_handle *rule,
- struct mlx5_flow_attr *attr);
-
-struct mlx5_esw_psample *
-mlx5_esw_sample_init(struct mlx5e_priv *priv);
-
-void
-mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample);
-
-#endif /* __MLX5_EN_TC_SAMPLE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 2fde9f59e8b4..ec136b499204 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -38,6 +38,7 @@
#include <linux/mlx5/mpfs.h>
#include "esw/acl/lgcy.h"
#include "esw/legacy.h"
+#include "esw/qos.h"
#include "mlx5_core.h"
#include "lib/eq.h"
#include "eswitch.h"
@@ -740,201 +741,6 @@ static void esw_vport_change_handler(struct work_struct *work)
mutex_unlock(&esw->state_lock);
}
-static bool element_type_supported(struct mlx5_eswitch *esw, int type)
-{
- const struct mlx5_core_dev *dev = esw->dev;
-
- switch (type) {
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_TASR;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_VPORT;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_VPORT_TC;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
- }
- return false;
-}
-
-/* Vport QoS management */
-static void esw_create_tsar(struct mlx5_eswitch *esw)
-{
- u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_core_dev *dev = esw->dev;
- __be32 *attr;
- int err;
-
- if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
- return;
-
- if (!element_type_supported(esw, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
- return;
-
- if (esw->qos.enabled)
- return;
-
- MLX5_SET(scheduling_context, tsar_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
-
- attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
- *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
-
- err = mlx5_create_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- tsar_ctx,
- &esw->qos.root_tsar_id);
- if (err) {
- esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
- return;
- }
-
- esw->qos.enabled = true;
-}
-
-static void esw_destroy_tsar(struct mlx5_eswitch *esw)
-{
- int err;
-
- if (!esw->qos.enabled)
- return;
-
- err = mlx5_destroy_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- esw->qos.root_tsar_id);
- if (err)
- esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
-
- esw->qos.enabled = false;
-}
-
-static int esw_vport_enable_qos(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u32 initial_max_rate, u32 initial_bw_share)
-{
- u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_core_dev *dev = esw->dev;
- void *vport_elem;
- int err = 0;
-
- if (!esw->qos.enabled)
- return 0;
-
- if (vport->qos.enabled)
- return -EEXIST;
-
- MLX5_SET(scheduling_context, sched_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
- element_attributes);
- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
- esw->qos.root_tsar_id);
- MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
- initial_max_rate);
- MLX5_SET(scheduling_context, sched_ctx, bw_share, initial_bw_share);
-
- err = mlx5_create_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- sched_ctx,
- &vport->qos.esw_tsar_ix);
- if (err) {
- esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
- return err;
- }
-
- vport->qos.enabled = true;
- return 0;
-}
-
-static void esw_vport_disable_qos(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport)
-{
- int err;
-
- if (!vport->qos.enabled)
- return;
-
- err = mlx5_destroy_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- vport->qos.esw_tsar_ix);
- if (err)
- esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
-
- vport->qos.enabled = false;
-}
-
-static int esw_vport_qos_config(struct mlx5_eswitch *esw,
- struct mlx5_vport *vport,
- u32 max_rate, u32 bw_share)
-{
- u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
- struct mlx5_core_dev *dev = esw->dev;
- void *vport_elem;
- u32 bitmask = 0;
- int err = 0;
-
- if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
- return -EOPNOTSUPP;
-
- if (!vport->qos.enabled)
- return -EIO;
-
- MLX5_SET(scheduling_context, sched_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
- element_attributes);
- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
- esw->qos.root_tsar_id);
- MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
- max_rate);
- MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
- bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
- bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE;
-
- err = mlx5_modify_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- sched_ctx,
- vport->qos.esw_tsar_ix,
- bitmask);
- if (err) {
- esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
- return err;
- }
-
- return 0;
-}
-
-int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
- u32 rate_mbps)
-{
- u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
- struct mlx5_vport *vport;
-
- vport = mlx5_eswitch_get_vport(esw, vport_num);
- if (IS_ERR(vport))
- return PTR_ERR(vport);
-
- if (!vport->qos.enabled)
- return -EOPNOTSUPP;
-
- MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps);
-
- return mlx5_modify_scheduling_element_cmd(esw->dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- ctx,
- vport->qos.esw_tsar_ix,
- MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW);
-}
-
static void node_guid_gen_from_mac(u64 *node_guid, const u8 *mac)
{
((u8 *)node_guid)[7] = mac[0];
@@ -976,7 +782,7 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
return err;
/* Attach vport to the eswitch rate limiter */
- esw_vport_enable_qos(esw, vport, vport->qos.max_rate, vport->qos.bw_share);
+ mlx5_esw_qos_vport_enable(esw, vport, vport->qos.max_rate, vport->qos.bw_share);
if (mlx5_esw_is_manager_vport(esw, vport_num))
return 0;
@@ -1013,7 +819,7 @@ static void esw_vport_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
vport_num, 1,
MLX5_VPORT_ADMIN_STATE_DOWN);
- esw_vport_disable_qos(esw, vport);
+ mlx5_esw_qos_vport_disable(esw, vport);
esw_vport_cleanup_acl(esw, vport);
}
@@ -1454,7 +1260,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs)
mlx5_eswitch_update_num_of_vfs(esw, num_vfs);
- esw_create_tsar(esw);
+ mlx5_esw_qos_create(esw);
esw->mode = mode;
@@ -1484,7 +1290,7 @@ abort:
if (mode == MLX5_ESWITCH_OFFLOADS)
mlx5_rescan_drivers(esw->dev);
- esw_destroy_tsar(esw);
+ mlx5_esw_qos_destroy(esw);
mlx5_esw_acls_ns_cleanup(esw);
return err;
}
@@ -1553,7 +1359,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf)
if (old_mode == MLX5_ESWITCH_OFFLOADS)
mlx5_rescan_drivers(esw->dev);
- esw_destroy_tsar(esw);
+ mlx5_esw_qos_destroy(esw);
mlx5_esw_acls_ns_cleanup(esw);
if (clear_vf)
@@ -2050,110 +1856,6 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
return err;
}
-static u32 calculate_vports_min_rate_divider(struct mlx5_eswitch *esw)
-{
- u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- struct mlx5_vport *evport;
- u32 max_guarantee = 0;
- unsigned long i;
-
- mlx5_esw_for_each_vport(esw, i, evport) {
- if (!evport->enabled || evport->qos.min_rate < max_guarantee)
- continue;
- max_guarantee = evport->qos.min_rate;
- }
-
- if (max_guarantee)
- return max_t(u32, max_guarantee / fw_max_bw_share, 1);
- return 0;
-}
-
-static int normalize_vports_min_rate(struct mlx5_eswitch *esw)
-{
- u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- u32 divider = calculate_vports_min_rate_divider(esw);
- struct mlx5_vport *evport;
- u32 vport_max_rate;
- u32 vport_min_rate;
- unsigned long i;
- u32 bw_share;
- int err;
-
- mlx5_esw_for_each_vport(esw, i, evport) {
- if (!evport->enabled)
- continue;
- vport_min_rate = evport->qos.min_rate;
- vport_max_rate = evport->qos.max_rate;
- bw_share = 0;
-
- if (divider)
- bw_share = MLX5_RATE_TO_BW_SHARE(vport_min_rate,
- divider,
- fw_max_bw_share);
-
- if (bw_share == evport->qos.bw_share)
- continue;
-
- err = esw_vport_qos_config(esw, evport, vport_max_rate,
- bw_share);
- if (!err)
- evport->qos.bw_share = bw_share;
- else
- return err;
- }
-
- return 0;
-}
-
-int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
- u32 max_rate, u32 min_rate)
-{
- struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
- u32 fw_max_bw_share;
- u32 previous_min_rate;
- bool min_rate_supported;
- bool max_rate_supported;
- int err = 0;
-
- if (!mlx5_esw_allowed(esw))
- return -EPERM;
- if (IS_ERR(evport))
- return PTR_ERR(evport);
-
- fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share);
- min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) &&
- fw_max_bw_share >= MLX5_MIN_BW_SHARE;
- max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit);
-
- if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported))
- return -EOPNOTSUPP;
-
- mutex_lock(&esw->state_lock);
-
- if (min_rate == evport->qos.min_rate)
- goto set_max_rate;
-
- previous_min_rate = evport->qos.min_rate;
- evport->qos.min_rate = min_rate;
- err = normalize_vports_min_rate(esw);
- if (err) {
- evport->qos.min_rate = previous_min_rate;
- goto unlock;
- }
-
-set_max_rate:
- if (max_rate == evport->qos.max_rate)
- goto unlock;
-
- err = esw_vport_qos_config(esw, evport, max_rate, evport->qos.bw_share);
- if (!err)
- evport->qos.max_rate = max_rate;
-
-unlock:
- mutex_unlock(&esw->state_lock);
- return err;
-}
-
int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
u16 vport_num,
struct ifla_vf_stats *vf_stats)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index d3a5ff4f6140..2c7444101bb9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -46,7 +46,7 @@
#include "lib/fs_chains.h"
#include "sf/sf.h"
#include "en/tc_ct.h"
-#include "esw/sample.h"
+#include "en/tc/sample.h"
enum mlx5_mapped_obj_type {
MLX5_MAPPED_OBJ_CHAIN,
@@ -61,6 +61,7 @@ struct mlx5_mapped_obj {
u32 group_id;
u32 rate;
u32 trunc_size;
+ u32 tunnel_id;
} sample;
};
};
@@ -75,11 +76,6 @@ struct mlx5_mapped_obj {
#define MLX5_MAX_MC_PER_VPORT(dev) \
(1 << MLX5_CAP_GEN(dev, log_max_current_mc_list))
-#define MLX5_MIN_BW_SHARE 1
-
-#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \
- min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit)
-
#define mlx5_esw_has_fwd_fdb(dev) \
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
@@ -181,6 +177,7 @@ struct mlx5_vport {
u32 bw_share;
u32 min_rate;
u32 max_rate;
+ struct mlx5_esw_rate_group *group;
} qos;
u16 vport;
@@ -309,7 +306,9 @@ struct mlx5_eswitch {
struct {
bool enabled;
- u32 root_tsar_id;
+ u32 root_tsar_ix;
+ struct mlx5_esw_rate_group *group0;
+ struct list_head groups; /* Protected by esw->state_lock */
} qos;
struct mlx5_esw_bridge_offloads *br_offloads;
@@ -335,8 +334,7 @@ int mlx5_esw_offloads_vport_metadata_set(struct mlx5_eswitch *esw, bool enable);
u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
-int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
- u32 rate_mbps);
+int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps);
/* E-Switch API */
int mlx5_eswitch_init(struct mlx5_core_dev *dev);
@@ -359,6 +357,10 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
u16 vport_num, bool setting);
int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
u32 max_rate, u32 min_rate);
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack);
int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
@@ -469,7 +471,6 @@ struct mlx5_esw_flow_attr {
} dests[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5_rx_tun_attr *rx_tun_attr;
struct mlx5_pkt_reformat *decap_pkt_reformat;
- struct mlx5_sample_attr *sample;
};
int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 49c7bf94332c..0d461e38add3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -187,12 +187,12 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw,
static int
esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
struct mlx5_flow_act *flow_act,
- struct mlx5_esw_flow_attr *esw_attr,
+ struct mlx5_flow_attr *attr,
int i)
{
flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
- dest[i].sampler_id = esw_attr->sample->sampler_id;
+ dest[i].sampler_id = attr->sample_attr->sampler_id;
return 0;
}
@@ -435,7 +435,7 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) {
- esw_setup_sampler_dest(dest, flow_act, esw_attr, *i);
+ esw_setup_sampler_dest(dest, flow_act, attr, *i);
(*i)++;
} else if (attr->dest_ft) {
esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
@@ -540,10 +540,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_hdr = attr->modify_hdr;
- /* esw_attr->sample is allocated only when there is a sample action */
- if (esw_attr->sample && esw_attr->sample->sample_default_tbl) {
- fdb = esw_attr->sample->sample_default_tbl;
- } else if (split) {
+ if (split) {
fwd_attr.chain = attr->chain;
fwd_attr.prio = attr->prio;
fwd_attr.vport = esw_attr->in_rep->vport;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fce3cbae0b99..f3638d09ba77 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -865,7 +865,8 @@ struct mlx5_ifc_qos_cap_bits {
u8 nic_bw_share[0x1];
u8 nic_rate_limit[0x1];
u8 packet_pacing_uid[0x1];
- u8 reserved_at_c[0x14];
+ u8 log_esw_max_sched_depth[0x4];
+ u8 reserved_at_10[0x10];
u8 reserved_at_20[0xb];
u8 log_max_qos_nic_queue_group[0x5];