diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2023-02-17 23:24:14 +0300 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2023-02-17 23:24:14 +0300 |
commit | 5ef17179da7b779b3029d9a7d3871ba09279d56c (patch) | |
tree | dc687df8392e0c635a282ccc865b3851731cd548 /drivers/net/ethernet | |
parent | a77a52385e9a761f896a88a4162e69fb7ccafe3f (diff) | |
parent | 594cac11ab6a1be8022a3c96d181dde7cfb0b8cf (diff) | |
download | linux-5ef17179da7b779b3029d9a7d3871ba09279d56c.tar.xz |
Merge mlx5-next into rdma.git for-next
Synchronize the shared mlx5 branch with net:
- From Jiri: fixe a deadlock in mlx5_ib's netdev notifier unregister.
- From Mark and Patrisious: add IPsec RoCEv2 support.
- From Or: Rely on firmware to get special mkeys
* branch mlx5-next:
RDMA/mlx5: Use query_special_contexts for mkeys
net/mlx5e: Use query_special_contexts for mkeys
net/mlx5: Change define name for 0x100 lkey value
net/mlx5: Expose bits for querying special mkeys
net/mlx5: Configure IPsec steering for egress RoCEv2 traffic
net/mlx5: Configure IPsec steering for ingress RoCEv2 traffic
net/mlx5: Add IPSec priorities in RDMA namespaces
net/mlx5: Implement new destination type TABLE_TYPE
net/mlx5: Introduce new destination type TABLE_TYPE
RDMA/mlx5: Track netdev to avoid deadlock during netdev notifier unregister
net/mlx5e: Propagate an internal event in case uplink netdev changes
net/mlx5e: Fix trap event handling
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet')
16 files changed, 568 insertions, 36 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index cd4a1ab0ea78..8415a44fb965 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -97,7 +97,7 @@ mlx5_core-$(CONFIG_MLX5_EN_MACSEC) += en_accel/macsec.o en_accel/macsec_fs.o \ mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o en_accel/ipsec_fs.o \ - en_accel/ipsec_offload.o + en_accel/ipsec_offload.o lib/ipsec_fs_roce.o mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \ en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 5bd83c0275f8..c06baa317fbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -263,9 +263,10 @@ static int mlx5_devlink_trap_action_set(struct devlink *devlink, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_devlink_trap_event_ctx trap_event_ctx; enum devlink_trap_action action_orig; struct mlx5_devlink_trap *dl_trap; - int err = 0; + int err; if (is_mdev_switchdev_mode(dev)) { NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode"); @@ -275,26 +276,25 @@ static int mlx5_devlink_trap_action_set(struct devlink *devlink, dl_trap = mlx5_find_trap_by_id(dev, trap->id); if (!dl_trap) { mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id); - err = -EINVAL; - goto out; + return -EINVAL; } - if (action != DEVLINK_TRAP_ACTION_DROP && action != DEVLINK_TRAP_ACTION_TRAP) { - err = -EOPNOTSUPP; - goto out; - } + if (action != DEVLINK_TRAP_ACTION_DROP && action != DEVLINK_TRAP_ACTION_TRAP) + return -EOPNOTSUPP; if (action == dl_trap->trap.action) - goto out; + return 0; action_orig = dl_trap->trap.action; dl_trap->trap.action = action; + trap_event_ctx.trap = &dl_trap->trap; + trap_event_ctx.err = 0; err = mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_TYPE_TRAP, - &dl_trap->trap); - if (err) + &trap_event_ctx); + if (err == NOTIFY_BAD) dl_trap->trap.action = action_orig; -out: - return err; + + return trap_event_ctx.err; } static const struct devlink_ops mlx5_devlink_ops = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index fd033df24856..b84cb70eb3ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -24,6 +24,11 @@ struct mlx5_devlink_trap { struct list_head list; }; +struct mlx5_devlink_trap_event_ctx { + struct mlx5_trap_ctx *trap; + int err; +}; + struct mlx5_core_dev; void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, struct devlink_port *dl_port); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 2732128e7a6e..6d73127b7217 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -275,6 +275,10 @@ const char *parse_fs_dst(struct trace_seq *p, fs_dest_range_field_to_str(dst->range.field), dst->range.min, dst->range.max); break; + case MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE: + trace_seq_printf(p, "flow_table_type=%u id:%u\n", dst->ft->type, + dst->ft->id); + break; case MLX5_FLOW_DESTINATION_TYPE_NONE: trace_seq_printf(p, "none\n"); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 379c6dc9a3be..d2149f0138d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -87,6 +87,7 @@ enum { MLX5E_ACCEL_FS_POL_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, + MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, #endif }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index a92e19c4c499..35992866f83b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -84,6 +84,7 @@ enum mlx5_ipsec_cap { MLX5_IPSEC_CAP_CRYPTO = 1 << 0, MLX5_IPSEC_CAP_ESN = 1 << 1, MLX5_IPSEC_CAP_PACKET_OFFLOAD = 1 << 2, + MLX5_IPSEC_CAP_ROCE = 1 << 3, }; struct mlx5e_priv; @@ -141,6 +142,7 @@ struct mlx5e_ipsec { struct mlx5e_ipsec_tx *tx; struct mlx5e_ipsec_aso *aso; struct notifier_block nb; + struct mlx5_ipsec_fs *roce; }; struct mlx5e_ipsec_esn_state { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 9f19f4b59a70..8cc3d3794ec7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -6,6 +6,7 @@ #include "en/fs.h" #include "ipsec.h" #include "fs_core.h" +#include "lib/ipsec_fs_roce.h" #define NUM_IPSEC_FTE BIT(15) @@ -166,7 +167,8 @@ out: return err; } -static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx) +static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) { mlx5_del_flow_rules(rx->pol.rule); mlx5_destroy_flow_group(rx->pol.group); @@ -179,6 +181,8 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx) mlx5_del_flow_rules(rx->status.rule); mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); mlx5_destroy_flow_table(rx->ft.status); + + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); } static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, @@ -186,18 +190,35 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, { struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(ipsec->fs, false); struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); + struct mlx5_flow_destination default_dest; struct mlx5_flow_destination dest[2]; struct mlx5_flow_table *ft; int err; + default_dest = mlx5_ttc_get_default_dest(ttc, family2tt(family)); + err = mlx5_ipsec_fs_roce_rx_create(mdev, ipsec->roce, ns, &default_dest, + family, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, + MLX5E_NIC_PRIO); + if (err) + return err; + ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, MLX5E_NIC_PRIO, 1); - if (IS_ERR(ft)) - return PTR_ERR(ft); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_fs_ft_status; + } rx->ft.status = ft; - dest[0] = mlx5_ttc_get_default_dest(ttc, family2tt(family)); + ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, family); + if (ft) { + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = ft; + } else { + dest[0] = default_dest; + } + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); err = ipsec_status_rule(mdev, rx, dest); @@ -245,6 +266,8 @@ err_fs_ft: mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); err_add: mlx5_destroy_flow_table(rx->ft.status); +err_fs_ft_status: + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); return err; } @@ -304,14 +327,15 @@ static void rx_ft_put(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_ttc_fwd_default_dest(ttc, family2tt(family)); /* remove FT */ - rx_destroy(mdev, rx); + rx_destroy(mdev, ipsec, rx, family); out: mutex_unlock(&rx->ft.mutex); } /* IPsec TX flow steering */ -static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx) +static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, + struct mlx5_ipsec_fs *roce) { struct mlx5_flow_destination dest = {}; struct mlx5_flow_table *ft; @@ -334,8 +358,15 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx) err = ipsec_miss_create(mdev, tx->ft.pol, &tx->pol, &dest); if (err) goto err_pol_miss; + + err = mlx5_ipsec_fs_roce_tx_create(mdev, roce, tx->ft.pol); + if (err) + goto err_roce; return 0; +err_roce: + mlx5_del_flow_rules(tx->pol.rule); + mlx5_destroy_flow_group(tx->pol.group); err_pol_miss: mlx5_destroy_flow_table(tx->ft.pol); err_pol_ft: @@ -353,9 +384,10 @@ static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev, if (tx->ft.refcnt) goto skip; - err = tx_create(mdev, tx); + err = tx_create(mdev, tx, ipsec->roce); if (err) goto out; + skip: tx->ft.refcnt++; out: @@ -374,6 +406,7 @@ static void tx_ft_put(struct mlx5e_ipsec *ipsec) if (tx->ft.refcnt) goto out; + mlx5_ipsec_fs_roce_tx_destroy(ipsec->roce); mlx5_del_flow_rules(tx->pol.rule); mlx5_destroy_flow_group(tx->pol.group); mlx5_destroy_flow_table(tx->ft.pol); @@ -1008,6 +1041,9 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) if (!ipsec->tx) return; + if (mlx5_ipsec_device_caps(ipsec->mdev) & MLX5_IPSEC_CAP_ROCE) + mlx5_ipsec_fs_roce_cleanup(ipsec->roce); + ipsec_fs_destroy_counters(ipsec); mutex_destroy(&ipsec->tx->ft.mutex); WARN_ON(ipsec->tx->ft.refcnt); @@ -1024,6 +1060,7 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) { + struct mlx5_core_dev *mdev = ipsec->mdev; struct mlx5_flow_namespace *ns; int err = -ENOMEM; @@ -1053,6 +1090,9 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) mutex_init(&ipsec->rx_ipv6->ft.mutex); ipsec->tx->ns = ns; + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ROCE) + ipsec->roce = mlx5_ipsec_fs_roce_init(mdev); + return 0; err_counters: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 8e3614218fc4..b5da6f71ee88 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -42,6 +42,11 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; + if (mlx5_get_roce_state(mdev) && + MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_RX_2_NIC_RX_RDMA && + MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_TX_RDMA_2_NIC_TX) + caps |= MLX5_IPSEC_CAP_ROCE; + if (!caps) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index cff5f2e29e1e..dd9f44dd79cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -179,17 +179,21 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb); + struct mlx5_devlink_trap_event_ctx *trap_event_ctx = data; int err; switch (event) { case MLX5_DRIVER_EVENT_TYPE_TRAP: - err = mlx5e_handle_trap_event(priv, data); + err = mlx5e_handle_trap_event(priv, trap_event_ctx->trap); + if (err) { + trap_event_ctx->err = err; + return NOTIFY_BAD; + } break; default: - netdev_warn(priv->netdev, "Sync event: Unknown event %ld\n", event); - err = -EINVAL; + return NOTIFY_DONE; } - return err; + return NOTIFY_OK; } static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv) @@ -661,6 +665,26 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) mlx5e_rq_shampo_hd_free(rq); } +static __be32 mlx5e_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + int res; + + if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey)) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + MLX5_SET(query_special_contexts_in, in, opcode, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + res = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); + if (res) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + res = MLX5_GET(query_special_contexts_out, out, + terminate_scatter_list_mkey); + return cpu_to_be32(res); +} + static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5e_rq_param *rqp, @@ -825,7 +849,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, /* check if num_frags is not a pow of two */ if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { wqe->data[f].byte_count = 0; - wqe->data[f].lkey = cpu_to_be32(MLX5_INVALID_LKEY); + wqe->data[f].lkey = mlx5e_get_terminate_scatter_list_mkey(mdev); wqe->data[f].addr = 0; } } @@ -5957,7 +5981,7 @@ static int mlx5e_probe(struct auxiliary_device *adev, } mlx5e_dcbnl_init_app(priv); - mlx5_uplink_netdev_set(mdev, netdev); + mlx5_core_uplink_netdev_set(mdev, netdev); mlx5e_params_print_info(mdev, &priv->channels.params); return 0; @@ -5977,6 +6001,7 @@ static void mlx5e_remove(struct auxiliary_device *adev) struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); pm_message_t state = {}; + mlx5_core_uplink_netdev_set(priv->mdev, NULL); mlx5e_dcbnl_delete_app(priv); unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 9459e56ee90a..718cf09c28ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -424,6 +424,7 @@ int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_b return blocking_notifier_chain_register(&events->sw_nh, nb); } +EXPORT_SYMBOL(mlx5_blocking_notifier_register); int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) { @@ -431,6 +432,7 @@ int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier return blocking_notifier_chain_unregister(&events->sw_nh, nb); } +EXPORT_SYMBOL(mlx5_blocking_notifier_unregister); int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event, void *data) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 32d4c967469c..a3a9cc6f15ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -653,6 +653,12 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, id = dst->dest_attr.sampler_id; ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; break; + case MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE: + MLX5_SET(dest_format_struct, in_dests, + destination_table_type, dst->dest_attr.ft->type); + id = dst->dest_attr.ft->id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE; + break; default: id = dst->dest_attr.tir_num; ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5a85d8c1e797..cb28cdb59c17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -111,8 +111,10 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 8 +/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, + * IPsec RoCE policy + */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 9 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) @@ -219,19 +221,30 @@ static struct init_tree_node egress_root_fs = { }; enum { + RDMA_RX_IPSEC_PRIO, RDMA_RX_COUNTERS_PRIO, RDMA_RX_BYPASS_PRIO, RDMA_RX_KERNEL_PRIO, }; +#define RDMA_RX_IPSEC_NUM_PRIOS 1 +#define RDMA_RX_IPSEC_NUM_LEVELS 2 +#define RDMA_RX_IPSEC_MIN_LEVEL (RDMA_RX_IPSEC_NUM_LEVELS) + #define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS #define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1) #define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2) static struct init_tree_node rdma_rx_root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 3, + .ar_size = 4, .children = (struct init_tree_node[]) { + [RDMA_RX_IPSEC_PRIO] = + ADD_PRIO(0, RDMA_RX_IPSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_RX_IPSEC_NUM_PRIOS, + RDMA_RX_IPSEC_NUM_LEVELS))), [RDMA_RX_COUNTERS_PRIO] = ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0, FS_CHAINING_CAPS, @@ -254,15 +267,20 @@ static struct init_tree_node rdma_rx_root_fs = { enum { RDMA_TX_COUNTERS_PRIO, + RDMA_TX_IPSEC_PRIO, RDMA_TX_BYPASS_PRIO, }; #define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS #define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1) +#define RDMA_TX_IPSEC_NUM_PRIOS 1 +#define RDMA_TX_IPSEC_PRIO_NUM_LEVELS 1 +#define RDMA_TX_IPSEC_MIN_LEVEL (RDMA_TX_COUNTERS_MIN_LEVEL + RDMA_TX_IPSEC_NUM_PRIOS) + static struct init_tree_node rdma_tx_root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 2, + .ar_size = 3, .children = (struct init_tree_node[]) { [RDMA_TX_COUNTERS_PRIO] = ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0, @@ -270,6 +288,13 @@ static struct init_tree_node rdma_tx_root_fs = { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS, RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))), + [RDMA_TX_IPSEC_PRIO] = + ADD_PRIO(0, RDMA_TX_IPSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_TX_IPSEC_NUM_PRIOS, + RDMA_TX_IPSEC_PRIO_NUM_LEVELS))), + [RDMA_TX_BYPASS_PRIO] = ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0, FS_CHAINING_CAPS_RDMA_TX, @@ -449,7 +474,8 @@ static bool is_fwd_dest_type(enum mlx5_flow_destination_type type) type == MLX5_FLOW_DESTINATION_TYPE_VPORT || type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER || type == MLX5_FLOW_DESTINATION_TYPE_TIR || - type == MLX5_FLOW_DESTINATION_TYPE_RANGE; + type == MLX5_FLOW_DESTINATION_TYPE_RANGE || + type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; } static bool check_valid_spec(const struct mlx5_flow_spec *spec) @@ -2367,6 +2393,14 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, root_ns = steering->rdma_tx_root_ns; prio = RDMA_TX_COUNTERS_PRIO; break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_IPSEC_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC: + root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_IPSEC_PRIO; + break; default: /* Must be NIC RX */ WARN_ON(!is_nic_rx_ns(type)); root_ns = steering->root_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c new file mode 100644 index 000000000000..2c53589b765d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "fs_core.h" +#include "lib/ipsec_fs_roce.h" +#include "mlx5_core.h" + +struct mlx5_ipsec_miss { + struct mlx5_flow_group *group; + struct mlx5_flow_handle *rule; +}; + +struct mlx5_ipsec_rx_roce { + struct mlx5_flow_group *g; + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_ipsec_miss roce_miss; + + struct mlx5_flow_table *ft_rdma; + struct mlx5_flow_namespace *ns_rdma; +}; + +struct mlx5_ipsec_tx_roce { + struct mlx5_flow_group *g; + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_flow_namespace *ns; +}; + +struct mlx5_ipsec_fs { + struct mlx5_ipsec_rx_roce ipv4_rx; + struct mlx5_ipsec_rx_roce ipv6_rx; + struct mlx5_ipsec_tx_roce tx; +}; + +static void ipsec_fs_roce_setup_udp_dport(struct mlx5_flow_spec *spec, + u16 dport) +{ + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, dport); +} + +static int +ipsec_fs_roce_rx_rule_setup(struct mlx5_core_dev *mdev, + struct mlx5_flow_destination *default_dst, + struct mlx5_ipsec_rx_roce *roce) +{ + struct mlx5_flow_destination dst = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + ipsec_fs_roce_setup_udp_dport(spec, ROCE_V2_UDP_DPORT); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = roce->ft_rdma; + rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add RX RoCE IPsec rule err=%d\n", + err); + goto fail_add_rule; + } + + roce->rule = rule; + + memset(spec, 0, sizeof(*spec)); + rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, default_dst, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add RX RoCE IPsec miss rule err=%d\n", + err); + goto fail_add_default_rule; + } + + roce->roce_miss.rule = rule; + + kvfree(spec); + return 0; + +fail_add_default_rule: + mlx5_del_flow_rules(roce->rule); +fail_add_rule: + kvfree(spec); + return err; +} + +static int ipsec_fs_roce_tx_rule_setup(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_tx_roce *roce, + struct mlx5_flow_table *pol_ft) +{ + struct mlx5_flow_destination dst = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + int err = 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = pol_ft; + rule = mlx5_add_flow_rules(roce->ft, NULL, &flow_act, &dst, + 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add TX RoCE IPsec rule err=%d\n", + err); + goto out; + } + roce->rule = rule; + +out: + return err; +} + +void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce) +{ + struct mlx5_ipsec_tx_roce *tx_roce; + + if (!ipsec_roce) + return; + + tx_roce = &ipsec_roce->tx; + + mlx5_del_flow_rules(tx_roce->rule); + mlx5_destroy_flow_group(tx_roce->g); + mlx5_destroy_flow_table(tx_roce->ft); +} + +#define MLX5_TX_ROCE_GROUP_SIZE BIT(0) + +int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_table *pol_ft) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_ipsec_tx_roce *roce; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + int ix = 0; + int err; + u32 *in; + + if (!ipsec_roce) + return 0; + + roce = &ipsec_roce->tx; + + in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + ft_attr.max_fte = 1; + ft = mlx5_create_flow_table(roce->ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx ft err=%d\n", err); + return err; + } + + roce->ft = ft; + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx group err=%d\n", err); + goto fail; + } + roce->g = g; + + err = ipsec_fs_roce_tx_rule_setup(mdev, roce, pol_ft); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx rules err=%d\n", err); + goto rule_fail; + } + + return 0; + +rule_fail: + mlx5_destroy_flow_group(roce->g); +fail: + mlx5_destroy_flow_table(ft); + return err; +} + +struct mlx5_flow_table *mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_roce, u32 family) +{ + struct mlx5_ipsec_rx_roce *rx_roce; + + if (!ipsec_roce) + return NULL; + + rx_roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + return rx_roce->ft; +} + +void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, u32 family) +{ + struct mlx5_ipsec_rx_roce *rx_roce; + + if (!ipsec_roce) + return; + + rx_roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + mlx5_del_flow_rules(rx_roce->roce_miss.rule); + mlx5_del_flow_rules(rx_roce->rule); + mlx5_destroy_flow_table(rx_roce->ft_rdma); + mlx5_destroy_flow_group(rx_roce->roce_miss.group); + mlx5_destroy_flow_group(rx_roce->g); + mlx5_destroy_flow_table(rx_roce->ft); +} + +#define MLX5_RX_ROCE_GROUP_SIZE BIT(0) + +int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_namespace *ns, + struct mlx5_flow_destination *default_dst, + u32 family, u32 level, u32 prio) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_ipsec_rx_roce *roce; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + if (!ipsec_roce) + return 0; + + roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + ft_attr.max_fte = 2; + ft_attr.level = level; + ft_attr.prio = prio; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at nic err=%d\n", err); + return err; + } + + roce->ft = ft; + + in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto fail_nomem; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx group at nic err=%d\n", err); + goto fail_group; + } + roce->g = g; + + memset(in, 0, MLX5_ST_SZ_BYTES(create_flow_group_in)); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx miss group at nic err=%d\n", err); + goto fail_mgroup; + } + roce->roce_miss.group = g; + + memset(&ft_attr, 0, sizeof(ft_attr)); + if (family == AF_INET) + ft_attr.level = 1; + ft = mlx5_create_flow_table(roce->ns_rdma, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at rdma err=%d\n", err); + goto fail_rdma_table; + } + + roce->ft_rdma = ft; + + err = ipsec_fs_roce_rx_rule_setup(mdev, default_dst, roce); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx rules err=%d\n", err); + goto fail_setup_rule; + } + + kvfree(in); + return 0; + +fail_setup_rule: + mlx5_destroy_flow_table(roce->ft_rdma); +fail_rdma_table: + mlx5_destroy_flow_group(roce->roce_miss.group); +fail_mgroup: + mlx5_destroy_flow_group(roce->g); +fail_group: + kvfree(in); +fail_nomem: + mlx5_destroy_flow_table(roce->ft); + return err; +} + +void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce) +{ + kfree(ipsec_roce); +} + +struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_ipsec_fs *roce_ipsec; + struct mlx5_flow_namespace *ns; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC); + if (!ns) { + mlx5_core_err(mdev, "Failed to get RoCE rx ns\n"); + return NULL; + } + + roce_ipsec = kzalloc(sizeof(*roce_ipsec), GFP_KERNEL); + if (!roce_ipsec) + return NULL; + + roce_ipsec->ipv4_rx.ns_rdma = ns; + roce_ipsec->ipv6_rx.ns_rdma = ns; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC); + if (!ns) { + mlx5_core_err(mdev, "Failed to get RoCE tx ns\n"); + goto err_tx; + } + + roce_ipsec->tx.ns = ns; + + return roce_ipsec; + +err_tx: + kfree(roce_ipsec); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h new file mode 100644 index 000000000000..9712d705fe48 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LIB_IPSEC_H__ +#define __MLX5_LIB_IPSEC_H__ + +struct mlx5_ipsec_fs; + +struct mlx5_flow_table * +mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_roce, u32 family); +void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, + u32 family); +int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_namespace *ns, + struct mlx5_flow_destination *default_dst, + u32 family, u32 level, u32 prio); +void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce); +int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_table *pol_ft); +void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce); +struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev); + +#endif /* __MLX5_LIB_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 032adb21ad4b..bfd3a1121ed8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -96,11 +96,6 @@ static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) return devlink_net(priv_to_devlink(dev)); } -static inline void mlx5_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev) -{ - mdev->mlx5e_res.uplink_netdev = netdev; -} - static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev) { return mdev->mlx5e_res.uplink_netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index df134f6d32dc..72716d1f8b37 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -336,6 +336,24 @@ static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) } } +void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *dev, struct net_device *netdev) +{ + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + dev->mlx5e_res.uplink_netdev = netdev; + mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, + netdev); + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); +} + +void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) +{ + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, + dev->mlx5e_res.uplink_netdev); + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); +} +EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); + static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, enum mlx5_cap_mode cap_mode) @@ -1608,6 +1626,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) lockdep_register_key(&dev->lock_key); mutex_init(&dev->intf_state_mutex); lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); + mutex_init(&dev->mlx5e_res.uplink_netdev_lock); mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); @@ -1696,6 +1715,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock); mutex_destroy(&dev->intf_state_mutex); lockdep_unregister_key(&dev->lock_key); } |