diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5')
103 files changed, 4148 insertions, 1822 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 26685fd0fdaa..bb1d7b039a7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -85,7 +85,7 @@ config MLX5_BRIDGE config MLX5_CLS_ACT bool "MLX5 TC classifier action support" - depends on MLX5_ESWITCH && NET_CLS_ACT + depends on MLX5_ESWITCH && NET_CLS_ACT && NET_TC_SKB_EXT default y help mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT), @@ -100,7 +100,7 @@ config MLX5_CLS_ACT config MLX5_TC_CT bool "MLX5 TC connection tracking offload support" - depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT + depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT default y help Say Y here if you want to support offloading connection tracking rules diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index cd4a1ab0ea78..8d4e25cc54ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -47,7 +47,7 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \ en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o \ en/tc/post_act.o en/tc/int_port.o en/tc/meter.o \ - en/tc/post_meter.o + en/tc/post_meter.o en/tc/act_stats.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/act/trap.o \ en/tc/act/accept.o en/tc/act/mark.o en/tc/act/goto.o \ @@ -97,7 +97,7 @@ mlx5_core-$(CONFIG_MLX5_EN_MACSEC) += en_accel/macsec.o en_accel/macsec_fs.o \ mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o en_accel/ipsec_fs.o \ - en_accel/ipsec_offload.o + en_accel/ipsec_offload.o lib/ipsec_fs_roce.o mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \ en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c837103a9ee3..b00e33ed05e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -47,6 +47,25 @@ #define CREATE_TRACE_POINTS #include "diag/cmd_tracepoint.h" +struct mlx5_ifc_mbox_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_mbox_in_bits { + u8 opcode[0x10]; + u8 uid[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + enum { CMD_IF_REV = 5, }; @@ -70,6 +89,27 @@ enum { MLX5_CMD_DELIVERY_STAT_CMD_DESCR_ERR = 0x10, }; +static u16 in_to_opcode(void *in) +{ + return MLX5_GET(mbox_in, in, opcode); +} + +/* Returns true for opcodes that might be triggered very frequently and throttle + * the command interface. Limit their command slots usage. + */ +static bool mlx5_cmd_is_throttle_opcode(u16 op) +{ + switch (op) { + case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: + case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: + case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_SYNC_CRYPTO: + return true; + } + return false; +} + static struct mlx5_cmd_work_ent * cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, struct mlx5_cmd_msg *out, void *uout, int uout_size, @@ -91,6 +131,7 @@ cmd_alloc_ent(struct mlx5_cmd *cmd, struct mlx5_cmd_msg *in, ent->context = context; ent->cmd = cmd; ent->page_queue = page_queue; + ent->op = in_to_opcode(in->first.data); refcount_set(&ent->refcnt, 1); return ent; @@ -483,6 +524,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE: case MLX5_CMD_OP_SAVE_VHCA_STATE: case MLX5_CMD_OP_LOAD_VHCA_STATE: + case MLX5_CMD_OP_SYNC_CRYPTO: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -ENOLINK; @@ -685,6 +727,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_VHCA_MIGRATION_STATE); MLX5_COMMAND_STR_CASE(SAVE_VHCA_STATE); MLX5_COMMAND_STR_CASE(LOAD_VHCA_STATE); + MLX5_COMMAND_STR_CASE(SYNC_CRYPTO); default: return "unknown command opcode"; } } @@ -752,25 +795,6 @@ static int cmd_status_to_err(u8 status) } } -struct mlx5_ifc_mbox_out_bits { - u8 status[0x8]; - u8 reserved_at_8[0x18]; - - u8 syndrome[0x20]; - - u8 reserved_at_40[0x40]; -}; - -struct mlx5_ifc_mbox_in_bits { - u8 opcode[0x10]; - u8 uid[0x10]; - - u8 reserved_at_20[0x10]; - u8 op_mod[0x10]; - - u8 reserved_at_40[0x40]; -}; - void mlx5_cmd_out_err(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out) { u32 syndrome = MLX5_GET(mbox_out, out, syndrome); @@ -788,11 +812,12 @@ static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out) u16 opcode, op_mod; u16 uid; - opcode = MLX5_GET(mbox_in, in, opcode); + opcode = in_to_opcode(in); op_mod = MLX5_GET(mbox_in, in, op_mod); uid = MLX5_GET(mbox_in, in, uid); - if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY) + if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY && + opcode != MLX5_CMD_OP_CREATE_UCTX) mlx5_cmd_out_err(dev, opcode, op_mod, out); } @@ -800,7 +825,7 @@ int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out) { /* aborted due to PCI error or via reset flow mlx5_cmd_trigger_completions() */ if (err == -ENXIO) { - u16 opcode = MLX5_GET(mbox_in, in, opcode); + u16 opcode = in_to_opcode(in); u32 syndrome; u8 status; @@ -829,9 +854,9 @@ static void dump_command(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent, int input) { struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; - u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode); struct mlx5_cmd_mailbox *next = msg->next; int n = mlx5_calc_cmd_blocks(msg); + u16 op = ent->op; int data_only; u32 offset = 0; int dump_len; @@ -883,11 +908,6 @@ static void dump_command(struct mlx5_core_dev *dev, mlx5_core_dbg(dev, "cmd[%d]: end dump\n", ent->idx); } -static u16 msg_to_opcode(struct mlx5_cmd_msg *in) -{ - return MLX5_GET(mbox_in, in->first.data, opcode); -} - static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); static void cb_timeout_handler(struct work_struct *work) @@ -905,13 +925,13 @@ static void cb_timeout_handler(struct work_struct *work) /* Maybe got handled by eq recover ? */ if (!test_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state)) { mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, recovered after timeout\n", ent->idx, - mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); goto out; /* phew, already handled */ } ent->ret = -ETIMEDOUT; mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) Async, timeout. Will cause a leak of a command resource\n", - ent->idx, mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + ent->idx, mlx5_command_str(ent->op), ent->op); mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); out: @@ -985,7 +1005,6 @@ static void cmd_work_handler(struct work_struct *work) ent->lay = lay; memset(lay, 0, sizeof(*lay)); memcpy(lay->in, ent->in->first.data, sizeof(lay->in)); - ent->op = be32_to_cpu(lay->in[0]) >> 16; if (ent->in->next) lay->in_ptr = cpu_to_be64(ent->in->next->dma); lay->inlen = cpu_to_be32(ent->in->len); @@ -1098,12 +1117,12 @@ static void wait_func_handle_exec_timeout(struct mlx5_core_dev *dev, */ if (wait_for_completion_timeout(&ent->done, timeout)) { mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) recovered after timeout\n", ent->idx, - mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); return; } mlx5_core_warn(dev, "cmd[%d]: %s(0x%x) No done completion\n", ent->idx, - mlx5_command_str(msg_to_opcode(ent->in)), msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); ent->ret = -ETIMEDOUT; mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, true); @@ -1130,12 +1149,10 @@ out_err: if (err == -ETIMEDOUT) { mlx5_core_warn(dev, "%s(0x%x) timeout. Will cause a leak of a command resource\n", - mlx5_command_str(msg_to_opcode(ent->in)), - msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); } else if (err == -ECANCELED) { mlx5_core_warn(dev, "%s(0x%x) canceled on out of queue timeout.\n", - mlx5_command_str(msg_to_opcode(ent->in)), - msg_to_opcode(ent->in)); + mlx5_command_str(ent->op), ent->op); } mlx5_core_dbg(dev, "err %d, delivery status %s(%d)\n", err, deliv_status_to_str(ent->status), ent->status); @@ -1169,7 +1186,6 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, u8 status = 0; int err = 0; s64 ds; - u16 op; if (callback && page_queue) return -EINVAL; @@ -1209,9 +1225,8 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, goto out_free; ds = ent->ts2 - ent->ts1; - op = MLX5_GET(mbox_in, in->first.data, opcode); - if (op < MLX5_CMD_OP_MAX) { - stats = &cmd->stats[op]; + if (ent->op < MLX5_CMD_OP_MAX) { + stats = &cmd->stats[ent->op]; spin_lock_irq(&stats->lock); stats->sum += ds; ++stats->n; @@ -1219,7 +1234,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, } mlx5_core_dbg_mask(dev, 1 << MLX5_CMD_TIME, "fw exec time for %s is %lld nsec\n", - mlx5_command_str(op), ds); + mlx5_command_str(ent->op), ds); out_free: status = ent->status; @@ -1816,7 +1831,7 @@ cache_miss: static int is_manage_pages(void *in) { - return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES; + return in_to_opcode(in) == MLX5_CMD_OP_MANAGE_PAGES; } /* Notes: @@ -1827,8 +1842,9 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context, bool force_polling) { - u16 opcode = MLX5_GET(mbox_in, in, opcode); struct mlx5_cmd_msg *inb, *outb; + u16 opcode = in_to_opcode(in); + bool throttle_op; int pages_queue; gfp_t gfp; u8 token; @@ -1837,13 +1853,21 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, opcode)) return -ENXIO; + throttle_op = mlx5_cmd_is_throttle_opcode(opcode); + if (throttle_op) { + /* atomic context may not sleep */ + if (callback) + return -EINVAL; + down(&dev->cmd.throttle_sem); + } + pages_queue = is_manage_pages(in); gfp = callback ? GFP_ATOMIC : GFP_KERNEL; inb = alloc_msg(dev, in_size, gfp); if (IS_ERR(inb)) { err = PTR_ERR(inb); - return err; + goto out_up; } token = alloc_token(&dev->cmd); @@ -1877,6 +1901,9 @@ out_out: mlx5_free_cmd_msg(dev, outb); out_in: free_msg(dev, inb); +out_up: + if (throttle_op) + up(&dev->cmd.throttle_sem); return err; } @@ -1950,8 +1977,8 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false); - u16 opcode = MLX5_GET(mbox_in, in, opcode); u16 op_mod = MLX5_GET(mbox_in, in, op_mod); + u16 opcode = in_to_opcode(in); return cmd_status_err(dev, err, opcode, op_mod, out); } @@ -1996,8 +2023,8 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true); - u16 opcode = MLX5_GET(mbox_in, in, opcode); u16 op_mod = MLX5_GET(mbox_in, in, op_mod); + u16 opcode = in_to_opcode(in); err = cmd_status_err(dev, err, opcode, op_mod, out); return mlx5_cmd_check(dev, err, in, out); @@ -2049,7 +2076,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size, work->ctx = ctx; work->user_callback = callback; - work->opcode = MLX5_GET(mbox_in, in, opcode); + work->opcode = in_to_opcode(in); work->op_mod = MLX5_GET(mbox_in, in, op_mod); work->out = out; if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight))) @@ -2220,6 +2247,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev) sema_init(&cmd->sem, cmd->max_reg_cmds); sema_init(&cmd->pages_sem, 1); + sema_init(&cmd->throttle_sem, DIV_ROUND_UP(cmd->max_reg_cmds, 2)); cmd_h = (u32)((u64)(cmd->dma) >> 32); cmd_l = (u32)(cmd->dma); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 3e232a65a0c3..bb95b40d25eb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -245,8 +245,9 @@ void mlx5_pages_debugfs_init(struct mlx5_core_dev *dev) pages = dev->priv.dbg.pages_debugfs; debugfs_create_u32("fw_pages_total", 0400, pages, &dev->priv.fw_pages); - debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.vfs_pages); - debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.host_pf_pages); + debugfs_create_u32("fw_pages_vfs", 0400, pages, &dev->priv.page_counters[MLX5_VF]); + debugfs_create_u32("fw_pages_sfs", 0400, pages, &dev->priv.page_counters[MLX5_SF]); + debugfs_create_u32("fw_pages_host_pf", 0400, pages, &dev->priv.page_counters[MLX5_HOST_PF]); debugfs_create_u32("fw_pages_alloc_failed", 0400, pages, &dev->priv.fw_pages_alloc_failed); debugfs_create_u32("fw_pages_give_dropped", 0400, pages, &dev->priv.give_pages_dropped); debugfs_create_u32("fw_pages_reclaim_discard", 0400, pages, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 0571e40c6ee5..445fe30c3d0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -59,6 +59,9 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev) if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) return false; + if (mlx5_core_is_management_pf(dev)) + return false; + if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return false; @@ -111,9 +114,9 @@ static bool is_eth_enabled(struct mlx5_core_dev *dev) union devlink_param_value val; int err; - err = devlink_param_driverinit_value_get(priv_to_devlink(dev), - DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, - &val); + err = devl_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + &val); return err ? false : val.vbool; } @@ -144,9 +147,9 @@ static bool is_vnet_enabled(struct mlx5_core_dev *dev) union devlink_param_value val; int err; - err = devlink_param_driverinit_value_get(priv_to_devlink(dev), - DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, - &val); + err = devl_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + &val); return err ? false : val.vbool; } @@ -198,6 +201,9 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev) if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return false; + if (mlx5_core_is_management_pf(dev)) + return false; + if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) return false; @@ -215,9 +221,9 @@ static bool is_ib_enabled(struct mlx5_core_dev *dev) union devlink_param_value val; int err; - err = devlink_param_driverinit_value_get(priv_to_devlink(dev), - DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, - &val); + err = devl_param_driverinit_value_get(priv_to_devlink(dev), + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + &val); return err ? false : val.vbool; } @@ -343,7 +349,6 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); priv->flags &= ~MLX5_PRIV_FLAGS_DETACH; - priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; for (i = 0; i < ARRAY_SIZE(mlx5_adev_devices); i++) { if (!priv->adev[i]) { bool is_supported = false; @@ -372,10 +377,6 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) /* Pay attention that this is not PCI driver that * mlx5_core_dev is connected, but auxiliary driver. - * - * Here we can race of module unload with devlink - * reload, but we don't need to take extra lock because - * we are holding global mlx5_intf_mutex. */ if (!adev->dev.driver) continue; @@ -391,12 +392,11 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) break; } } - priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; mutex_unlock(&mlx5_intf_mutex); return ret; } -void mlx5_detach_device(struct mlx5_core_dev *dev) +void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend) { struct mlx5_priv *priv = &dev->priv; struct auxiliary_device *adev; @@ -406,7 +406,6 @@ void mlx5_detach_device(struct mlx5_core_dev *dev) devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&mlx5_intf_mutex); - priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; for (i = ARRAY_SIZE(mlx5_adev_devices) - 1; i >= 0; i--) { if (!priv->adev[i]) continue; @@ -426,7 +425,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev) adrv = to_auxiliary_drv(adev->dev.driver); - if (adrv->suspend) { + if (adrv->suspend && suspend) { adrv->suspend(adev, pm); continue; } @@ -435,7 +434,6 @@ skip_suspend: del_adev(&priv->adev[i]->adev); priv->adev[i] = NULL; } - priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; priv->flags |= MLX5_PRIV_FLAGS_DETACH; mutex_unlock(&mlx5_intf_mutex); } @@ -534,22 +532,16 @@ del_adev: int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; - int err = 0; lockdep_assert_held(&mlx5_intf_mutex); if (priv->flags & MLX5_PRIV_FLAGS_DETACH) return 0; - priv->flags |= MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; delete_drivers(dev); if (priv->flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) - goto out; - - err = add_drivers(dev); + return 0; -out: - priv->flags &= ~MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW; - return err; + return add_drivers(dev); } bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev *peer_dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 5bd83c0275f8..c5d2fdcabd56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -7,6 +7,7 @@ #include "fw_reset.h" #include "fs_core.h" #include "eswitch.h" +#include "lag/lag.h" #include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" @@ -104,7 +105,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli if (err) return err; - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, true); err = mlx5_health_wait_pci_up(dev); if (err) NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); @@ -156,13 +157,18 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return -EOPNOTSUPP; } + if (mlx5_core_is_mp_slave(dev)) { + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported for multi port slave"); + return -EOPNOTSUPP; + } + if (pci_num_vf(pdev)) { NL_SET_ERR_MSG_MOD(extack, "reload while VFs are present is unfavorable"); } switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, false); break; case DEVLINK_RELOAD_ACTION_FW_ACTIVATE: if (limit == DEVLINK_RELOAD_LIMIT_NO_RESET) @@ -263,9 +269,10 @@ static int mlx5_devlink_trap_action_set(struct devlink *devlink, struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_devlink_trap_event_ctx trap_event_ctx; enum devlink_trap_action action_orig; struct mlx5_devlink_trap *dl_trap; - int err = 0; + int err; if (is_mdev_switchdev_mode(dev)) { NL_SET_ERR_MSG_MOD(extack, "Devlink traps can't be set in switchdev mode"); @@ -275,26 +282,25 @@ static int mlx5_devlink_trap_action_set(struct devlink *devlink, dl_trap = mlx5_find_trap_by_id(dev, trap->id); if (!dl_trap) { mlx5_core_err(dev, "Devlink trap: Set action on invalid trap id 0x%x", trap->id); - err = -EINVAL; - goto out; + return -EINVAL; } - if (action != DEVLINK_TRAP_ACTION_DROP && action != DEVLINK_TRAP_ACTION_TRAP) { - err = -EOPNOTSUPP; - goto out; - } + if (action != DEVLINK_TRAP_ACTION_DROP && action != DEVLINK_TRAP_ACTION_TRAP) + return -EOPNOTSUPP; if (action == dl_trap->trap.action) - goto out; + return 0; action_orig = dl_trap->trap.action; dl_trap->trap.action = action; + trap_event_ctx.trap = &dl_trap->trap; + trap_event_ctx.err = 0; err = mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_TYPE_TRAP, - &dl_trap->trap); - if (err) + &trap_event_ctx); + if (err == NOTIFY_BAD) dl_trap->trap.action = action_orig; -out: - return err; + + return trap_event_ctx.err; } static const struct devlink_ops mlx5_devlink_ops = { @@ -396,70 +402,6 @@ void mlx5_devlink_free(struct devlink *devlink) devlink_free(devlink); } -static int mlx5_devlink_fs_mode_validate(struct devlink *devlink, u32 id, - union devlink_param_value val, - struct netlink_ext_ack *extack) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - char *value = val.vstr; - int err = 0; - - if (!strcmp(value, "dmfs")) { - return 0; - } else if (!strcmp(value, "smfs")) { - u8 eswitch_mode; - bool smfs_cap; - - eswitch_mode = mlx5_eswitch_mode(dev); - smfs_cap = mlx5_fs_dr_is_supported(dev); - - if (!smfs_cap) { - err = -EOPNOTSUPP; - NL_SET_ERR_MSG_MOD(extack, - "Software managed steering is not supported by current device"); - } - - else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { - NL_SET_ERR_MSG_MOD(extack, - "Software managed steering is not supported when eswitch offloads enabled."); - err = -EOPNOTSUPP; - } - } else { - NL_SET_ERR_MSG_MOD(extack, - "Bad parameter: supported values are [\"dmfs\", \"smfs\"]"); - err = -EINVAL; - } - - return err; -} - -static int mlx5_devlink_fs_mode_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - enum mlx5_flow_steering_mode mode; - - if (!strcmp(ctx->val.vstr, "smfs")) - mode = MLX5_FLOW_STEERING_MODE_SMFS; - else - mode = MLX5_FLOW_STEERING_MODE_DMFS; - dev->priv.steering->mode = mode; - - return 0; -} - -static int mlx5_devlink_fs_mode_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) - strcpy(ctx->val.vstr, "smfs"); - else - strcpy(ctx->val.vstr, "dmfs"); - return 0; -} - static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack) @@ -496,68 +438,54 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id return 0; } -static int mlx5_devlink_esw_port_metadata_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) +static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) { struct mlx5_core_dev *dev = devlink_priv(devlink); if (!MLX5_ESWITCH_MANAGER(dev)) return -EOPNOTSUPP; - return mlx5_esw_offloads_vport_metadata_set(dev->priv.eswitch, ctx->val.vbool); + if (ctx->val.vbool) + return mlx5_lag_mpesw_enable(dev); + + mlx5_lag_mpesw_disable(dev); + return 0; } -static int mlx5_devlink_esw_port_metadata_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) +static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) { struct mlx5_core_dev *dev = devlink_priv(devlink); if (!MLX5_ESWITCH_MANAGER(dev)) return -EOPNOTSUPP; - ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch); + ctx->val.vbool = mlx5_lag_is_mpesw(dev); return 0; } -static int mlx5_devlink_esw_port_metadata_validate(struct devlink *devlink, u32 id, - union devlink_param_value val, - struct netlink_ext_ack *extack) +static int mlx5_devlink_esw_multiport_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); - u8 esw_mode; if (!MLX5_ESWITCH_MANAGER(dev)) { NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported"); return -EOPNOTSUPP; } - esw_mode = mlx5_eswitch_mode(dev); - if (esw_mode == MLX5_ESWITCH_OFFLOADS) { + + if (mlx5_eswitch_mode(dev) != MLX5_ESWITCH_OFFLOADS) { NL_SET_ERR_MSG_MOD(extack, - "E-Switch must either disabled or non switchdev mode"); + "E-Switch must be in switchdev mode"); return -EBUSY; } - return 0; -} - -#endif - -static int mlx5_devlink_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - mlx5_fw_reset_enable_remote_dev_reset_set(dev, ctx->val.vbool); return 0; } -static int mlx5_devlink_enable_remote_dev_reset_get(struct devlink *devlink, u32 id, - struct devlink_param_gset_ctx *ctx) -{ - struct mlx5_core_dev *dev = devlink_priv(devlink); - - ctx->val.vbool = mlx5_fw_reset_enable_remote_dev_reset_get(dev); - return 0; -} +#endif static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, union devlink_param_value val, @@ -567,11 +495,6 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id, } static const struct devlink_param mlx5_devlink_params[] = { - DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, - "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, - BIT(DEVLINK_PARAM_CMODE_RUNTIME), - mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set, - mlx5_devlink_fs_mode_validate), DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_enable_roce_validate), #ifdef CONFIG_MLX5_ESWITCH @@ -580,16 +503,13 @@ static const struct devlink_param mlx5_devlink_params[] = { BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_large_group_num_validate), - DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, - "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL, + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, + "esw_multiport", DEVLINK_PARAM_TYPE_BOOL, BIT(DEVLINK_PARAM_CMODE_RUNTIME), - mlx5_devlink_esw_port_metadata_get, - mlx5_devlink_esw_port_metadata_set, - mlx5_devlink_esw_port_metadata_validate), + mlx5_devlink_esw_multiport_get, + mlx5_devlink_esw_multiport_set, + mlx5_devlink_esw_multiport_validate), #endif - DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), - mlx5_devlink_enable_remote_dev_reset_get, - mlx5_devlink_enable_remote_dev_reset_set, NULL), DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_eq_depth_validate), DEVLINK_PARAM_GENERIC(EVENT_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), @@ -602,33 +522,34 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) union devlink_param_value value; value.vbool = MLX5_CAP_GEN(dev, roce); - devlink_param_driverinit_value_set(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, - value); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + value); #ifdef CONFIG_MLX5_ESWITCH value.vu32 = ESW_OFFLOADS_DEFAULT_NUM_GROUPS; - devlink_param_driverinit_value_set(devlink, - MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, - value); + devl_param_driverinit_value_set(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + value); #endif value.vu32 = MLX5_COMP_EQ_SIZE; - devlink_param_driverinit_value_set(devlink, - DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, - value); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + value); value.vu32 = MLX5_NUM_ASYNC_EQE; - devlink_param_driverinit_value_set(devlink, - DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, - value); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + value); } -static const struct devlink_param enable_eth_param = +static const struct devlink_param mlx5_devlink_eth_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), - NULL, NULL, NULL); + NULL, NULL, NULL), +}; -static int mlx5_devlink_eth_param_register(struct devlink *devlink) +static int mlx5_devlink_eth_params_register(struct devlink *devlink) { struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; @@ -637,25 +558,27 @@ static int mlx5_devlink_eth_param_register(struct devlink *devlink) if (!mlx5_eth_supported(dev)) return 0; - err = devlink_param_register(devlink, &enable_eth_param); + err = devl_params_register(devlink, mlx5_devlink_eth_params, + ARRAY_SIZE(mlx5_devlink_eth_params)); if (err) return err; value.vbool = true; - devlink_param_driverinit_value_set(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, - value); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, + value); return 0; } -static void mlx5_devlink_eth_param_unregister(struct devlink *devlink) +static void mlx5_devlink_eth_params_unregister(struct devlink *devlink) { struct mlx5_core_dev *dev = devlink_priv(devlink); if (!mlx5_eth_supported(dev)) return; - devlink_param_unregister(devlink, &enable_eth_param); + devl_params_unregister(devlink, mlx5_devlink_eth_params, + ARRAY_SIZE(mlx5_devlink_eth_params)); } static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id, @@ -670,11 +593,12 @@ static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id, return 0; } -static const struct devlink_param enable_rdma_param = +static const struct devlink_param mlx5_devlink_rdma_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_RDMA, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), - NULL, NULL, mlx5_devlink_enable_rdma_validate); + NULL, NULL, mlx5_devlink_enable_rdma_validate), +}; -static int mlx5_devlink_rdma_param_register(struct devlink *devlink) +static int mlx5_devlink_rdma_params_register(struct devlink *devlink) { union devlink_param_value value; int err; @@ -682,30 +606,33 @@ static int mlx5_devlink_rdma_param_register(struct devlink *devlink) if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return 0; - err = devlink_param_register(devlink, &enable_rdma_param); + err = devl_params_register(devlink, mlx5_devlink_rdma_params, + ARRAY_SIZE(mlx5_devlink_rdma_params)); if (err) return err; value.vbool = true; - devlink_param_driverinit_value_set(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, - value); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, + value); return 0; } -static void mlx5_devlink_rdma_param_unregister(struct devlink *devlink) +static void mlx5_devlink_rdma_params_unregister(struct devlink *devlink) { if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return; - devlink_param_unregister(devlink, &enable_rdma_param); + devl_params_unregister(devlink, mlx5_devlink_rdma_params, + ARRAY_SIZE(mlx5_devlink_rdma_params)); } -static const struct devlink_param enable_vnet_param = +static const struct devlink_param mlx5_devlink_vnet_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_VNET, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), - NULL, NULL, NULL); + NULL, NULL, NULL), +}; -static int mlx5_devlink_vnet_param_register(struct devlink *devlink) +static int mlx5_devlink_vnet_params_register(struct devlink *devlink) { struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; @@ -714,56 +641,58 @@ static int mlx5_devlink_vnet_param_register(struct devlink *devlink) if (!mlx5_vnet_supported(dev)) return 0; - err = devlink_param_register(devlink, &enable_vnet_param); + err = devl_params_register(devlink, mlx5_devlink_vnet_params, + ARRAY_SIZE(mlx5_devlink_vnet_params)); if (err) return err; value.vbool = true; - devlink_param_driverinit_value_set(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, - value); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, + value); return 0; } -static void mlx5_devlink_vnet_param_unregister(struct devlink *devlink) +static void mlx5_devlink_vnet_params_unregister(struct devlink *devlink) { struct mlx5_core_dev *dev = devlink_priv(devlink); if (!mlx5_vnet_supported(dev)) return; - devlink_param_unregister(devlink, &enable_vnet_param); + devl_params_unregister(devlink, mlx5_devlink_vnet_params, + ARRAY_SIZE(mlx5_devlink_vnet_params)); } static int mlx5_devlink_auxdev_params_register(struct devlink *devlink) { int err; - err = mlx5_devlink_eth_param_register(devlink); + err = mlx5_devlink_eth_params_register(devlink); if (err) return err; - err = mlx5_devlink_rdma_param_register(devlink); + err = mlx5_devlink_rdma_params_register(devlink); if (err) goto rdma_err; - err = mlx5_devlink_vnet_param_register(devlink); + err = mlx5_devlink_vnet_params_register(devlink); if (err) goto vnet_err; return 0; vnet_err: - mlx5_devlink_rdma_param_unregister(devlink); + mlx5_devlink_rdma_params_unregister(devlink); rdma_err: - mlx5_devlink_eth_param_unregister(devlink); + mlx5_devlink_eth_params_unregister(devlink); return err; } static void mlx5_devlink_auxdev_params_unregister(struct devlink *devlink) { - mlx5_devlink_vnet_param_unregister(devlink); - mlx5_devlink_rdma_param_unregister(devlink); - mlx5_devlink_eth_param_unregister(devlink); + mlx5_devlink_vnet_params_unregister(devlink); + mlx5_devlink_rdma_params_unregister(devlink); + mlx5_devlink_eth_params_unregister(devlink); } static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id, @@ -791,11 +720,12 @@ static int mlx5_devlink_max_uc_list_validate(struct devlink *devlink, u32 id, return 0; } -static const struct devlink_param max_uc_list_param = +static const struct devlink_param mlx5_devlink_max_uc_list_params[] = { DEVLINK_PARAM_GENERIC(MAX_MACS, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), - NULL, NULL, mlx5_devlink_max_uc_list_validate); + NULL, NULL, mlx5_devlink_max_uc_list_validate), +}; -static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink) +static int mlx5_devlink_max_uc_list_params_register(struct devlink *devlink) { struct mlx5_core_dev *dev = devlink_priv(devlink); union devlink_param_value value; @@ -804,26 +734,28 @@ static int mlx5_devlink_max_uc_list_param_register(struct devlink *devlink) if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) return 0; - err = devlink_param_register(devlink, &max_uc_list_param); + err = devl_params_register(devlink, mlx5_devlink_max_uc_list_params, + ARRAY_SIZE(mlx5_devlink_max_uc_list_params)); if (err) return err; value.vu32 = 1 << MLX5_CAP_GEN(dev, log_max_current_uc_list); - devlink_param_driverinit_value_set(devlink, - DEVLINK_PARAM_GENERIC_ID_MAX_MACS, - value); + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); return 0; } static void -mlx5_devlink_max_uc_list_param_unregister(struct devlink *devlink) +mlx5_devlink_max_uc_list_params_unregister(struct devlink *devlink) { struct mlx5_core_dev *dev = devlink_priv(devlink); if (!MLX5_CAP_GEN_MAX(dev, log_max_current_uc_list_wr_supported)) return; - devlink_param_unregister(devlink, &max_uc_list_param); + devl_params_unregister(devlink, mlx5_devlink_max_uc_list_params, + ARRAY_SIZE(mlx5_devlink_max_uc_list_params)); } #define MLX5_TRAP_DROP(_id, _group_id) \ @@ -869,13 +801,12 @@ void mlx5_devlink_traps_unregister(struct devlink *devlink) ARRAY_SIZE(mlx5_trap_groups_arr)); } -int mlx5_devlink_register(struct devlink *devlink) +int mlx5_devlink_params_register(struct devlink *devlink) { - struct mlx5_core_dev *dev = devlink_priv(devlink); int err; - err = devlink_params_register(devlink, mlx5_devlink_params, - ARRAY_SIZE(mlx5_devlink_params)); + err = devl_params_register(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); if (err) return err; @@ -885,27 +816,24 @@ int mlx5_devlink_register(struct devlink *devlink) if (err) goto auxdev_reg_err; - err = mlx5_devlink_max_uc_list_param_register(devlink); + err = mlx5_devlink_max_uc_list_params_register(devlink); if (err) goto max_uc_list_err; - if (!mlx5_core_is_mp_slave(dev)) - devlink_set_features(devlink, DEVLINK_F_RELOAD); - return 0; max_uc_list_err: mlx5_devlink_auxdev_params_unregister(devlink); auxdev_reg_err: - devlink_params_unregister(devlink, mlx5_devlink_params, - ARRAY_SIZE(mlx5_devlink_params)); + devl_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); return err; } -void mlx5_devlink_unregister(struct devlink *devlink) +void mlx5_devlink_params_unregister(struct devlink *devlink) { - mlx5_devlink_max_uc_list_param_unregister(devlink); + mlx5_devlink_max_uc_list_params_unregister(devlink); mlx5_devlink_auxdev_params_unregister(devlink); - devlink_params_unregister(devlink, mlx5_devlink_params, - ARRAY_SIZE(mlx5_devlink_params)); + devl_params_unregister(devlink, mlx5_devlink_params, + ARRAY_SIZE(mlx5_devlink_params)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index fd033df24856..212b12424146 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -11,6 +11,7 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, }; struct mlx5_trap_ctx { @@ -24,6 +25,11 @@ struct mlx5_devlink_trap { struct list_head list; }; +struct mlx5_devlink_trap_event_ctx { + struct mlx5_trap_ctx *trap; + int err; +}; + struct mlx5_core_dev; void mlx5_devlink_trap_report(struct mlx5_core_dev *dev, int trap_id, struct sk_buff *skb, struct devlink_port *dl_port); @@ -35,7 +41,7 @@ void mlx5_devlink_traps_unregister(struct devlink *devlink); struct devlink *mlx5_devlink_alloc(struct device *dev); void mlx5_devlink_free(struct devlink *devlink); -int mlx5_devlink_register(struct devlink *devlink); -void mlx5_devlink_unregister(struct devlink *devlink); +int mlx5_devlink_params_register(struct devlink *devlink); +void mlx5_devlink_params_unregister(struct devlink *devlink); #endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 2732128e7a6e..6d73127b7217 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -275,6 +275,10 @@ const char *parse_fs_dst(struct trace_seq *p, fs_dest_range_field_to_str(dst->range.field), dst->range.min, dst->range.max); break; + case MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE: + trace_seq_printf(p, "flow_table_type=%u id:%u\n", dst->ft->type, + dst->ft->id); + break; case MLX5_FLOW_DESTINATION_TYPE_NONE: trace_seq_printf(p, "none\n"); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 21831386b26e..f40497823e65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -64,6 +64,7 @@ static int mlx5_query_mtrc_caps(struct mlx5_fw_tracer *tracer) MLX5_GET(mtrc_cap, out, num_string_trace); tracer->str_db.num_string_db = MLX5_GET(mtrc_cap, out, num_string_db); tracer->owner = !!MLX5_GET(mtrc_cap, out, trace_owner); + tracer->str_db.loaded = false; for (i = 0; i < tracer->str_db.num_string_db; i++) { mtrc_cap_sp = MLX5_ADDR_OF(mtrc_cap, out, string_db_param[i]); @@ -233,6 +234,8 @@ static int mlx5_fw_tracer_allocate_strings_db(struct mlx5_fw_tracer *tracer) int i; for (i = 0; i < num_string_db; i++) { + if (!string_db_size_out[i]) + continue; tracer->str_db.buffer[i] = kzalloc(string_db_size_out[i], GFP_KERNEL); if (!tracer->str_db.buffer[i]) goto free_strings_db; @@ -278,6 +281,8 @@ static void mlx5_tracer_read_strings_db(struct work_struct *work) } for (i = 0; i < num_string_db; i++) { + if (!tracer->str_db.size_out[i]) + continue; offset = 0; MLX5_SET(mtrc_stdb, in, string_db_index, i); num_of_reads = tracer->str_db.size_out[i] / @@ -384,6 +389,8 @@ static struct tracer_string_format *mlx5_tracer_get_string(struct mlx5_fw_tracer str_ptr = tracer_event->string_event.string_param; for (i = 0; i < tracer->str_db.num_string_db; i++) { + if (!tracer->str_db.size_out[i]) + continue; if (str_ptr > tracer->str_db.base_address_out[i] && str_ptr < tracer->str_db.base_address_out[i] + tracer->str_db.size_out[i]) { @@ -459,6 +466,7 @@ static void poll_trace(struct mlx5_fw_tracer *tracer, tracer_event->event_id = MLX5_GET(tracer_event, trace, event_id); tracer_event->lost_event = MLX5_GET(tracer_event, trace, lost); + tracer_event->out = trace; switch (tracer_event->event_id) { case TRACER_EVENT_TYPE_TIMESTAMP: @@ -581,6 +589,26 @@ void mlx5_tracer_print_trace(struct tracer_string_format *str_frmt, mlx5_tracer_clean_message(str_frmt); } +static int mlx5_tracer_handle_raw_string(struct mlx5_fw_tracer *tracer, + struct tracer_event *tracer_event) +{ + struct tracer_string_format *cur_string; + + cur_string = mlx5_tracer_message_insert(tracer, tracer_event); + if (!cur_string) + return -1; + + cur_string->event_id = tracer_event->event_id; + cur_string->timestamp = tracer_event->string_event.timestamp; + cur_string->lost = tracer_event->lost_event; + cur_string->string = "0x%08x%08x"; + cur_string->num_of_params = 2; + cur_string->params[0] = upper_32_bits(*tracer_event->out); + cur_string->params[1] = lower_32_bits(*tracer_event->out); + list_add_tail(&cur_string->list, &tracer->ready_strings_list); + return 0; +} + static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, struct tracer_event *tracer_event) { @@ -589,7 +617,7 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, if (tracer_event->string_event.tdsn == 0) { cur_string = mlx5_tracer_get_string(tracer, tracer_event); if (!cur_string) - return -1; + return mlx5_tracer_handle_raw_string(tracer, tracer_event); cur_string->num_of_params = mlx5_tracer_get_num_of_params(cur_string->string); cur_string->last_param_num = 0; @@ -602,9 +630,9 @@ static int mlx5_tracer_handle_string_trace(struct mlx5_fw_tracer *tracer, } else { cur_string = mlx5_tracer_message_get(tracer, tracer_event); if (!cur_string) { - pr_debug("%s Got string event for unknown string tdsm: %d\n", + pr_debug("%s Got string event for unknown string tmsn: %d\n", __func__, tracer_event->string_event.tmsn); - return -1; + return mlx5_tracer_handle_raw_string(tracer, tracer_event); } cur_string->last_param_num += 1; if (cur_string->last_param_num > TRACER_MAX_PARAMS) { @@ -756,6 +784,7 @@ static int mlx5_fw_tracer_set_mtrc_conf(struct mlx5_fw_tracer *tracer) if (err) mlx5_core_warn(dev, "FWTracer: Failed to set tracer configurations %d\n", err); + tracer->buff.consumer_index = 0; return err; } @@ -820,7 +849,6 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); if (tracer->owner) { tracer->owner = false; - tracer->buff.consumer_index = 0; return; } @@ -930,6 +958,14 @@ unlock: return err; } +static void mlx5_fw_tracer_update_db(struct work_struct *work) +{ + struct mlx5_fw_tracer *tracer = + container_of(work, struct mlx5_fw_tracer, update_db_work); + + mlx5_fw_tracer_reload(tracer); +} + /* Create software resources (Buffers, etc ..) */ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) { @@ -957,6 +993,8 @@ struct mlx5_fw_tracer *mlx5_fw_tracer_create(struct mlx5_core_dev *dev) INIT_WORK(&tracer->ownership_change_work, mlx5_fw_tracer_ownership_change); INIT_WORK(&tracer->read_fw_strings_work, mlx5_tracer_read_strings_db); INIT_WORK(&tracer->handle_traces_work, mlx5_fw_tracer_handle_traces); + INIT_WORK(&tracer->update_db_work, mlx5_fw_tracer_update_db); + mutex_init(&tracer->state_lock); err = mlx5_query_mtrc_caps(tracer); @@ -1003,11 +1041,15 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) if (IS_ERR_OR_NULL(tracer)) return 0; - dev = tracer->dev; - if (!tracer->str_db.loaded) queue_work(tracer->work_queue, &tracer->read_fw_strings_work); + mutex_lock(&tracer->state_lock); + if (test_and_set_bit(MLX5_TRACER_STATE_UP, &tracer->state)) + goto unlock; + + dev = tracer->dev; + err = mlx5_core_alloc_pd(dev, &tracer->buff.pdn); if (err) { mlx5_core_warn(dev, "FWTracer: Failed to allocate PD %d\n", err); @@ -1028,6 +1070,8 @@ int mlx5_fw_tracer_init(struct mlx5_fw_tracer *tracer) mlx5_core_warn(dev, "FWTracer: Failed to start tracer %d\n", err); goto err_notifier_unregister; } +unlock: + mutex_unlock(&tracer->state_lock); return 0; err_notifier_unregister: @@ -1037,6 +1081,7 @@ err_dealloc_pd: mlx5_core_dealloc_pd(dev, tracer->buff.pdn); err_cancel_work: cancel_work_sync(&tracer->read_fw_strings_work); + mutex_unlock(&tracer->state_lock); return err; } @@ -1046,17 +1091,27 @@ void mlx5_fw_tracer_cleanup(struct mlx5_fw_tracer *tracer) if (IS_ERR_OR_NULL(tracer)) return; + mutex_lock(&tracer->state_lock); + if (!test_and_clear_bit(MLX5_TRACER_STATE_UP, &tracer->state)) + goto unlock; + mlx5_core_dbg(tracer->dev, "FWTracer: Cleanup, is owner ? (%d)\n", tracer->owner); mlx5_eq_notifier_unregister(tracer->dev, &tracer->nb); cancel_work_sync(&tracer->ownership_change_work); cancel_work_sync(&tracer->handle_traces_work); + /* It is valid to get here from update_db_work. Hence, don't wait for + * update_db_work to finished. + */ + cancel_work(&tracer->update_db_work); if (tracer->owner) mlx5_fw_tracer_ownership_release(tracer); mlx5_core_destroy_mkey(tracer->dev, tracer->buff.mkey); mlx5_core_dealloc_pd(tracer->dev, tracer->buff.pdn); +unlock: + mutex_unlock(&tracer->state_lock); } /* Free software resources (Buffers, etc ..) */ @@ -1073,6 +1128,7 @@ void mlx5_fw_tracer_destroy(struct mlx5_fw_tracer *tracer) mlx5_fw_tracer_clean_saved_traces_array(tracer); mlx5_fw_tracer_free_strings_db(tracer); mlx5_fw_tracer_destroy_log_buf(tracer); + mutex_destroy(&tracer->state_lock); destroy_workqueue(tracer->work_queue); kvfree(tracer); } @@ -1082,6 +1138,8 @@ static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer) struct mlx5_core_dev *dev; int err; + if (test_and_set_bit(MLX5_TRACER_RECREATE_DB, &tracer->state)) + return 0; cancel_work_sync(&tracer->read_fw_strings_work); mlx5_fw_tracer_clean_ready_list(tracer); mlx5_fw_tracer_clean_print_hash(tracer); @@ -1092,17 +1150,18 @@ static int mlx5_fw_tracer_recreate_strings_db(struct mlx5_fw_tracer *tracer) err = mlx5_query_mtrc_caps(tracer); if (err) { mlx5_core_dbg(dev, "FWTracer: Failed to query capabilities %d\n", err); - return err; + goto out; } err = mlx5_fw_tracer_allocate_strings_db(tracer); if (err) { mlx5_core_warn(dev, "FWTracer: Allocate strings DB failed %d\n", err); - return err; + goto out; } mlx5_fw_tracer_init_saved_traces_array(tracer); - - return 0; +out: + clear_bit(MLX5_TRACER_RECREATE_DB, &tracer->state); + return err; } int mlx5_fw_tracer_reload(struct mlx5_fw_tracer *tracer) @@ -1142,6 +1201,9 @@ static int fw_tracer_event(struct notifier_block *nb, unsigned long action, void case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE: queue_work(tracer->work_queue, &tracer->handle_traces_work); break; + case MLX5_TRACER_SUBTYPE_STRINGS_DB_UPDATE: + queue_work(tracer->work_queue, &tracer->update_db_work); + break; default: mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n", eqe->sub_type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h index 4762b55b0b0e..5c548bb74f07 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.h @@ -63,6 +63,11 @@ struct mlx5_fw_trace_data { char msg[TRACE_STR_MSG]; }; +enum mlx5_fw_tracer_state { + MLX5_TRACER_STATE_UP = BIT(0), + MLX5_TRACER_RECREATE_DB = BIT(1), +}; + struct mlx5_fw_tracer { struct mlx5_core_dev *dev; struct mlx5_nb nb; @@ -104,6 +109,9 @@ struct mlx5_fw_tracer { struct work_struct handle_traces_work; struct hlist_head hash[MESSAGE_HASH_SIZE]; struct list_head ready_strings_list; + struct work_struct update_db_work; + struct mutex state_lock; /* Synchronize update work with reload flows */ + unsigned long state; }; struct tracer_string_format { @@ -158,6 +166,7 @@ struct tracer_event { struct tracer_string_event string_event; struct tracer_timestamp_event timestamp_event; }; + u64 *out; }; struct mlx5_ifc_tracer_event_bits { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 464eb3a18450..9a3878f9e582 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -75,6 +75,10 @@ int mlx5_ec_init(struct mlx5_core_dev *dev) if (!mlx5_core_is_ecpf(dev)) return 0; + /* Management PF don't have a peer PF */ + if (mlx5_core_is_management_pf(dev)) + return 0; + return mlx5_host_pf_init(dev); } @@ -85,9 +89,13 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) if (!mlx5_core_is_ecpf(dev)) return; + /* Management PF don't have a peer PF */ + if (mlx5_core_is_management_pf(dev)) + return; + mlx5_host_pf_cleanup(dev); - err = mlx5_wait_for_pages(dev, &dev->priv.host_pf_pages); + err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]); if (err) mlx5_core_warn(dev, "Timeout reclaiming external host PF pages err(%d)\n", err); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 2d77fb8a8a01..88460b7796e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -247,7 +247,7 @@ struct mlx5e_rx_wqe_ll { }; struct mlx5e_rx_wqe_cyc { - struct mlx5_wqe_data_seg data[0]; + DECLARE_FLEX_ARRAY(struct mlx5_wqe_data_seg, data); }; struct mlx5e_umr_wqe { @@ -454,6 +454,7 @@ struct mlx5e_txqsq { struct mlx5_clock *clock; struct net_device *netdev; struct mlx5_core_dev *mdev; + struct mlx5e_channel *channel; struct mlx5e_priv *priv; /* control path */ @@ -626,10 +627,11 @@ struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); typedef struct sk_buff * (*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, - u16 cqe_bcnt, u32 head_offset, u32 page_idx); + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, + u32 head_offset, u32 page_idx); typedef struct sk_buff * (*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, - u32 cqe_bcnt); + struct mlx5_cqe64 *cqe, u32 cqe_bcnt); typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool); @@ -968,6 +970,12 @@ struct mlx5e_priv { struct mlx5e_scratchpad scratchpad; struct mlx5e_htb *htb; struct mlx5e_mqprio_rl *mqprio_rl; + struct dentry *dfs_root; +}; + +struct mlx5e_dev { + struct mlx5e_priv *priv; + struct devlink_port dl_port; }; struct mlx5e_rx_handlers { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c index 83adaabf59f5..c6b6e290fd79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.c @@ -4,6 +4,31 @@ #include "en/devlink.h" #include "eswitch.h" +static const struct devlink_ops mlx5e_devlink_ops = { +}; + +struct mlx5e_dev *mlx5e_create_devlink(struct device *dev, + struct mlx5_core_dev *mdev) +{ + struct mlx5e_dev *mlx5e_dev; + struct devlink *devlink; + + devlink = devlink_alloc_ns(&mlx5e_devlink_ops, sizeof(*mlx5e_dev), + devlink_net(priv_to_devlink(mdev)), dev); + if (!devlink) + return ERR_PTR(-ENOMEM); + devlink_register(devlink); + return devlink_priv(devlink); +} + +void mlx5e_destroy_devlink(struct mlx5e_dev *mlx5e_dev) +{ + struct devlink *devlink = priv_to_devlink(mlx5e_dev); + + devlink_unregister(devlink); + devlink_free(devlink); +} + static void mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_item_id *ppid) { @@ -14,51 +39,36 @@ mlx5e_devlink_get_port_parent_id(struct mlx5_core_dev *dev, struct netdev_phys_i memcpy(ppid->id, &parent_id, sizeof(parent_id)); } -int mlx5e_devlink_port_register(struct mlx5e_priv *priv) +int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, + struct mlx5_core_dev *mdev) { - struct devlink *devlink = priv_to_devlink(priv->mdev); + struct devlink *devlink = priv_to_devlink(mlx5e_dev); struct devlink_port_attrs attrs = {}; struct netdev_phys_item_id ppid = {}; - struct devlink_port *dl_port; unsigned int dl_port_index; - int ret; - if (mlx5_core_is_pf(priv->mdev)) { + if (mlx5_core_is_pf(mdev)) { attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = mlx5_get_dev_index(priv->mdev); - if (MLX5_ESWITCH_MANAGER(priv->mdev)) { - mlx5e_devlink_get_port_parent_id(priv->mdev, &ppid); + attrs.phys.port_number = mlx5_get_dev_index(mdev); + if (MLX5_ESWITCH_MANAGER(mdev)) { + mlx5e_devlink_get_port_parent_id(mdev, &ppid); memcpy(attrs.switch_id.id, ppid.id, ppid.id_len); attrs.switch_id.id_len = ppid.id_len; } - dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, + dl_port_index = mlx5_esw_vport_to_devlink_port_index(mdev, MLX5_VPORT_UPLINK); } else { attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL; - dl_port_index = mlx5_esw_vport_to_devlink_port_index(priv->mdev, 0); + dl_port_index = mlx5_esw_vport_to_devlink_port_index(mdev, 0); } - dl_port = mlx5e_devlink_get_dl_port(priv); - memset(dl_port, 0, sizeof(*dl_port)); - devlink_port_attrs_set(dl_port, &attrs); + devlink_port_attrs_set(&mlx5e_dev->dl_port, &attrs); - if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) - devl_lock(devlink); - ret = devl_port_register(devlink, dl_port, dl_port_index); - if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) - devl_unlock(devlink); - - return ret; + return devlink_port_register(devlink, &mlx5e_dev->dl_port, + dl_port_index); } -void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv) +void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev) { - struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); - struct devlink *devlink = priv_to_devlink(priv->mdev); - - if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) - devl_lock(devlink); - devl_port_unregister(dl_port); - if (!(priv->mdev->priv.flags & MLX5_PRIV_FLAGS_MLX5E_LOCKED_FLOW)) - devl_unlock(devlink); + devlink_port_unregister(&mlx5e_dev->dl_port); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h index 4f238d4fff55..d5ec4461f300 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/devlink.h @@ -7,13 +7,11 @@ #include <net/devlink.h> #include "en.h" -int mlx5e_devlink_port_register(struct mlx5e_priv *priv); -void mlx5e_devlink_port_unregister(struct mlx5e_priv *priv); - -static inline struct devlink_port * -mlx5e_devlink_get_dl_port(struct mlx5e_priv *priv) -{ - return &priv->mdev->mlx5e_res.dl_port; -} +struct mlx5e_dev *mlx5e_create_devlink(struct device *dev, + struct mlx5_core_dev *mdev); +void mlx5e_destroy_devlink(struct mlx5e_dev *mlx5e_dev); +int mlx5e_devlink_port_register(struct mlx5e_dev *mlx5e_dev, + struct mlx5_core_dev *mdev); +void mlx5e_devlink_port_unregister(struct mlx5e_dev *mlx5e_dev); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 379c6dc9a3be..e5a44b0b9616 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -87,6 +87,7 @@ enum { MLX5E_ACCEL_FS_POL_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, MLX5E_ACCEL_FS_ESP_FT_LEVEL, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, + MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, #endif }; @@ -145,7 +146,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_flow_steering *fs, bool ntuple, struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, struct mlx5_core_dev *mdev, - bool state_destroy); + bool state_destroy, + struct dentry *dfs_root); void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs); struct mlx5e_vlan_table *mlx5e_fs_get_vlan(struct mlx5e_flow_steering *fs); void mlx5e_fs_set_tc(struct mlx5e_flow_steering *fs, struct mlx5e_tc_table *tc); @@ -189,6 +191,8 @@ int mlx5e_fs_vlan_rx_kill_vid(struct mlx5e_flow_steering *fs, __be16 proto, u16 vid); void mlx5e_fs_init_l2_addr(struct mlx5e_flow_steering *fs, struct net_device *netdev); +struct dentry *mlx5e_fs_get_debugfs_root(struct mlx5e_flow_steering *fs); + #define fs_err(fs, fmt, ...) \ mlx5_core_err(mlx5e_fs_get_mdev(fs), fmt, ##__VA_ARGS__) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c index 17325c5d6516..cf60f0a3ff23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mod_hdr.c @@ -47,6 +47,7 @@ void mlx5e_mod_hdr_tbl_init(struct mod_hdr_tbl *tbl) void mlx5e_mod_hdr_tbl_destroy(struct mod_hdr_tbl *tbl) { + WARN_ON(!hash_empty(tbl->hlist)); mutex_destroy(&tbl->lock); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 4ad19c981294..a21bd1179477 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -411,9 +411,14 @@ u8 mlx5e_mpwqe_get_log_num_strides(struct mlx5_core_dev *mdev, { enum mlx5e_mpwrq_umr_mode umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk); u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); + u8 log_wqe_size, log_stride_size; - return mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode) - - mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + log_wqe_size = mlx5e_mpwrq_log_wqe_sz(mdev, page_shift, umr_mode); + log_stride_size = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk); + WARN(log_wqe_size < log_stride_size, + "Log WQE size %u < log stride size %u (page shift %u, umr mode %d, xsk on? %d)\n", + log_wqe_size, log_stride_size, page_shift, umr_mode, !!xsk); + return log_wqe_size - log_stride_size; } u8 mlx5e_mpwqe_get_min_wqe_bulk(unsigned int wq_sz) @@ -580,11 +585,16 @@ int mlx5e_mpwrq_validate_xsk(struct mlx5_core_dev *mdev, struct mlx5e_params *pa u8 page_shift = mlx5e_mpwrq_page_shift(mdev, xsk); u16 max_mtu_pkts; - if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) + if (!mlx5e_check_fragmented_striding_rq_cap(mdev, page_shift, umr_mode)) { + mlx5_core_err(mdev, "Striding RQ for XSK can't be activated with page_shift %u and umr_mode %d\n", + page_shift, umr_mode); return -EOPNOTSUPP; + } - if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) + if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, xsk)) { + mlx5_core_err(mdev, "Striding RQ linear mode for XSK can't be activated with current params\n"); return -EINVAL; + } /* Current RQ length is too big for the given frame size, the * needed number of WQEs exceeds the maximum. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 89510cac46c2..505ba41195b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -287,6 +287,78 @@ int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in) return err; } +int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, void *out, int size_out) +{ + u32 in[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + + MLX5_SET(sbpr_reg, in, desc, desc); + MLX5_SET(sbpr_reg, in, dir, dir); + MLX5_SET(sbpr_reg, in, pool, pool_idx); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, size_out, MLX5_REG_SBPR, 0, 0); +} + +int mlx5e_port_set_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, u32 infi_size, u32 size) +{ + u32 out[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + + MLX5_SET(sbpr_reg, in, desc, desc); + MLX5_SET(sbpr_reg, in, dir, dir); + MLX5_SET(sbpr_reg, in, pool, pool_idx); + MLX5_SET(sbpr_reg, in, infi_size, infi_size); + MLX5_SET(sbpr_reg, in, size, size); + MLX5_SET(sbpr_reg, in, mode, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_SBPR, 0, 1); +} + +static int mlx5e_port_query_sbcm(struct mlx5_core_dev *mdev, u32 desc, + u8 pg_buff_idx, u8 dir, void *out, + int size_out) +{ + u32 in[MLX5_ST_SZ_DW(sbcm_reg)] = {}; + + MLX5_SET(sbcm_reg, in, desc, desc); + MLX5_SET(sbcm_reg, in, local_port, 1); + MLX5_SET(sbcm_reg, in, pg_buff, pg_buff_idx); + MLX5_SET(sbcm_reg, in, dir, dir); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, size_out, MLX5_REG_SBCM, 0, 0); +} + +int mlx5e_port_set_sbcm(struct mlx5_core_dev *mdev, u32 desc, u8 pg_buff_idx, + u8 dir, u8 infi_size, u32 max_buff, u8 pool_idx) +{ + u32 out[MLX5_ST_SZ_DW(sbcm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(sbcm_reg)] = {}; + u32 min_buff; + int err; + u8 exc; + + err = mlx5e_port_query_sbcm(mdev, desc, pg_buff_idx, dir, out, + sizeof(out)); + if (err) + return err; + + exc = MLX5_GET(sbcm_reg, out, exc); + min_buff = MLX5_GET(sbcm_reg, out, min_buff); + + MLX5_SET(sbcm_reg, in, desc, desc); + MLX5_SET(sbcm_reg, in, local_port, 1); + MLX5_SET(sbcm_reg, in, pg_buff, pg_buff_idx); + MLX5_SET(sbcm_reg, in, dir, dir); + MLX5_SET(sbcm_reg, in, exc, exc); + MLX5_SET(sbcm_reg, in, min_buff, min_buff); + MLX5_SET(sbcm_reg, in, infi_max, infi_size); + MLX5_SET(sbcm_reg, in, max_buff, max_buff); + MLX5_SET(sbcm_reg, in, pool, pool_idx); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_SBCM, 0, 1); +} + /* buffer[i]: buffer that priority i mapped to */ int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 7a7defe60792..3f474e370828 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -57,6 +57,12 @@ u32 mlx5e_port_speed2linkmodes(struct mlx5_core_dev *mdev, u32 speed, bool mlx5e_ptys_ext_supported(struct mlx5_core_dev *mdev); int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); +int mlx5e_port_query_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, void *out, int size_out); +int mlx5e_port_set_sbpr(struct mlx5_core_dev *mdev, u32 desc, u8 dir, + u8 pool_idx, u32 infi_size, u32 size); +int mlx5e_port_set_sbcm(struct mlx5_core_dev *mdev, u32 desc, u8 pg_buff_idx, + u8 dir, u8 infi_size, u32 max_buff, u8 pool_idx); int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index c9d5d8d93994..7ac1ad9c46de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -73,6 +73,7 @@ int mlx5e_port_query_buffer(struct mlx5e_priv *priv, port_buffer->buffer[i].lossy); } + port_buffer->headroom_size = total_used; port_buffer->port_buffer_size = MLX5_GET(pbmc_reg, out, port_buffer_size) * port_buff_cell_sz; port_buffer->spare_buffer_size = @@ -86,16 +87,204 @@ out: return err; } +struct mlx5e_buffer_pool { + u32 infi_size; + u32 size; + u32 buff_occupancy; +}; + +static int mlx5e_port_query_pool(struct mlx5_core_dev *mdev, + struct mlx5e_buffer_pool *buffer_pool, + u32 desc, u8 dir, u8 pool_idx) +{ + u32 out[MLX5_ST_SZ_DW(sbpr_reg)] = {}; + int err; + + err = mlx5e_port_query_sbpr(mdev, desc, dir, pool_idx, out, + sizeof(out)); + if (err) + return err; + + buffer_pool->size = MLX5_GET(sbpr_reg, out, size); + buffer_pool->infi_size = MLX5_GET(sbpr_reg, out, infi_size); + buffer_pool->buff_occupancy = MLX5_GET(sbpr_reg, out, buff_occupancy); + + return err; +} + +enum { + MLX5_INGRESS_DIR = 0, + MLX5_EGRESS_DIR = 1, +}; + +enum { + MLX5_LOSSY_POOL = 0, + MLX5_LOSSLESS_POOL = 1, +}; + +/* No limit on usage of shared buffer pool (max_buff=0) */ +#define MLX5_SB_POOL_NO_THRESHOLD 0 +/* Shared buffer pool usage threshold when calculated + * dynamically in alpha units. alpha=13 is equivalent to + * HW_alpha of [(1/128) * 2 ^ (alpha-1)] = 32, where HW_alpha + * equates to the following portion of the shared buffer pool: + * [32 / (1 + n * 32)] While *n* is the number of buffers + * that are using the shared buffer pool. + */ +#define MLX5_SB_POOL_THRESHOLD 13 + +/* Shared buffer class management parameters */ +struct mlx5_sbcm_params { + u8 pool_idx; + u8 max_buff; + u8 infi_size; +}; + +static const struct mlx5_sbcm_params sbcm_default = { + .pool_idx = MLX5_LOSSY_POOL, + .max_buff = MLX5_SB_POOL_NO_THRESHOLD, + .infi_size = 0, +}; + +static const struct mlx5_sbcm_params sbcm_lossy = { + .pool_idx = MLX5_LOSSY_POOL, + .max_buff = MLX5_SB_POOL_NO_THRESHOLD, + .infi_size = 1, +}; + +static const struct mlx5_sbcm_params sbcm_lossless = { + .pool_idx = MLX5_LOSSLESS_POOL, + .max_buff = MLX5_SB_POOL_THRESHOLD, + .infi_size = 0, +}; + +static const struct mlx5_sbcm_params sbcm_lossless_no_threshold = { + .pool_idx = MLX5_LOSSLESS_POOL, + .max_buff = MLX5_SB_POOL_NO_THRESHOLD, + .infi_size = 1, +}; + +/** + * select_sbcm_params() - selects the shared buffer pool configuration + * + * @buffer: <input> port buffer to retrieve params of + * @lossless_buff_count: <input> number of lossless buffers in total + * + * The selection is based on the following rules: + * 1. If buffer size is 0, no shared buffer pool is used. + * 2. If buffer is lossy, use lossy shared buffer pool. + * 3. If there are more than 1 lossless buffers, use lossless shared buffer pool + * with threshold. + * 4. If there is only 1 lossless buffer, use lossless shared buffer pool + * without threshold. + * + * @return const struct mlx5_sbcm_params* selected values + */ +static const struct mlx5_sbcm_params * +select_sbcm_params(struct mlx5e_bufferx_reg *buffer, u8 lossless_buff_count) +{ + if (buffer->size == 0) + return &sbcm_default; + + if (buffer->lossy) + return &sbcm_lossy; + + if (lossless_buff_count > 1) + return &sbcm_lossless; + + return &sbcm_lossless_no_threshold; +} + +static int port_update_pool_cfg(struct mlx5_core_dev *mdev, + struct mlx5e_port_buffer *port_buffer) +{ + const struct mlx5_sbcm_params *p; + u8 lossless_buff_count = 0; + int err; + int i; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) + lossless_buff_count += ((port_buffer->buffer[i].size) && + (!(port_buffer->buffer[i].lossy))); + + for (i = 0; i < MLX5E_MAX_BUFFER; i++) { + p = select_sbcm_params(&port_buffer->buffer[i], lossless_buff_count); + err = mlx5e_port_set_sbcm(mdev, 0, i, + MLX5_INGRESS_DIR, + p->infi_size, + p->max_buff, + p->pool_idx); + if (err) + return err; + } + + return 0; +} + +static int port_update_shared_buffer(struct mlx5_core_dev *mdev, + u32 current_headroom_size, + u32 new_headroom_size) +{ + struct mlx5e_buffer_pool lossless_ipool; + struct mlx5e_buffer_pool lossy_epool; + u32 lossless_ipool_size; + u32 shared_buffer_size; + u32 total_buffer_size; + u32 lossy_epool_size; + int err; + + if (!MLX5_CAP_GEN(mdev, sbcam_reg)) + return 0; + + err = mlx5e_port_query_pool(mdev, &lossy_epool, 0, MLX5_EGRESS_DIR, + MLX5_LOSSY_POOL); + if (err) + return err; + + err = mlx5e_port_query_pool(mdev, &lossless_ipool, 0, MLX5_INGRESS_DIR, + MLX5_LOSSLESS_POOL); + if (err) + return err; + + total_buffer_size = current_headroom_size + lossy_epool.size + + lossless_ipool.size; + shared_buffer_size = total_buffer_size - new_headroom_size; + + if (shared_buffer_size < 4) { + pr_err("Requested port buffer is too large, not enough space left for shared buffer\n"); + return -EINVAL; + } + + /* Total shared buffer size is split in a ratio of 3:1 between + * lossy and lossless pools respectively. + */ + lossy_epool_size = (shared_buffer_size / 4) * 3; + lossless_ipool_size = shared_buffer_size / 4; + + mlx5e_port_set_sbpr(mdev, 0, MLX5_EGRESS_DIR, MLX5_LOSSY_POOL, 0, + lossy_epool_size); + mlx5e_port_set_sbpr(mdev, 0, MLX5_INGRESS_DIR, MLX5_LOSSLESS_POOL, 0, + lossless_ipool_size); + return 0; +} + static int port_set_buffer(struct mlx5e_priv *priv, struct mlx5e_port_buffer *port_buffer) { u16 port_buff_cell_sz = priv->dcbx.port_buff_cell_sz; struct mlx5_core_dev *mdev = priv->mdev; int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + u32 new_headroom_size = 0; + u32 current_headroom_size; void *in; int err; int i; + current_headroom_size = port_buffer->headroom_size; + in = kzalloc(sz, GFP_KERNEL); if (!in) return -ENOMEM; @@ -110,6 +299,7 @@ static int port_set_buffer(struct mlx5e_priv *priv, u64 xoff = port_buffer->buffer[i].xoff; u64 xon = port_buffer->buffer[i].xon; + new_headroom_size += size; do_div(size, port_buff_cell_sz); do_div(xoff, port_buff_cell_sz); do_div(xon, port_buff_cell_sz); @@ -119,6 +309,17 @@ static int port_set_buffer(struct mlx5e_priv *priv, MLX5_SET(bufferx_reg, buffer, xon_threshold, xon); } + new_headroom_size /= port_buff_cell_sz; + current_headroom_size /= port_buff_cell_sz; + err = port_update_shared_buffer(priv->mdev, current_headroom_size, + new_headroom_size); + if (err) + goto out; + + err = port_update_pool_cfg(priv->mdev, port_buffer); + if (err) + goto out; + err = mlx5e_port_set_pbmc(mdev, in); out: kfree(in); @@ -174,6 +375,7 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, /** * update_buffer_lossy - Update buffer configuration based on pfc + * @mdev: port function core device * @max_mtu: netdev's max_mtu * @pfc_en: <input> current pfc configuration * @buffer: <input> current prio to buffer mapping @@ -192,7 +394,8 @@ static int update_xoff_threshold(struct mlx5e_port_buffer *port_buffer, * @return: 0 if no error, * sets change to true if buffer configuration was modified. */ -static int update_buffer_lossy(unsigned int max_mtu, +static int update_buffer_lossy(struct mlx5_core_dev *mdev, + unsigned int max_mtu, u8 pfc_en, u8 *buffer, u32 xoff, u16 port_buff_cell_sz, struct mlx5e_port_buffer *port_buffer, bool *change) @@ -229,6 +432,10 @@ static int update_buffer_lossy(unsigned int max_mtu, } if (changed) { + err = port_update_pool_cfg(mdev, port_buffer); + if (err) + return err; + err = update_xoff_threshold(port_buffer, xoff, max_mtu, port_buff_cell_sz); if (err) return err; @@ -293,23 +500,30 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, } if (change & MLX5E_PORT_BUFFER_PFC) { + mlx5e_dbg(HW, priv, "%s: requested PFC per priority bitmask: 0x%x\n", + __func__, pfc->pfc_en); err = mlx5e_port_query_priority2buffer(priv->mdev, buffer); if (err) return err; - err = update_buffer_lossy(max_mtu, pfc->pfc_en, buffer, xoff, port_buff_cell_sz, - &port_buffer, &update_buffer); + err = update_buffer_lossy(priv->mdev, max_mtu, pfc->pfc_en, buffer, xoff, + port_buff_cell_sz, &port_buffer, + &update_buffer); if (err) return err; } if (change & MLX5E_PORT_BUFFER_PRIO2BUFFER) { update_prio2buffer = true; + for (i = 0; i < MLX5E_MAX_BUFFER; i++) + mlx5e_dbg(HW, priv, "%s: requested to map prio[%d] to buffer %d\n", + __func__, i, prio2buffer[i]); + err = fill_pfc_en(priv->mdev, &curr_pfc_en); if (err) return err; - err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff, + err = update_buffer_lossy(priv->mdev, max_mtu, curr_pfc_en, prio2buffer, xoff, port_buff_cell_sz, &port_buffer, &update_buffer); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h index 80af7a5ac604..a6ef118de758 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.h @@ -60,6 +60,7 @@ struct mlx5e_bufferx_reg { struct mlx5e_port_buffer { u32 port_buffer_size; u32 spare_buffer_size; + u32 headroom_size; struct mlx5e_bufferx_reg buffer[MLX5E_MAX_BUFFER]; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 8469e9c38670..9a1bc93b7dc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -771,8 +771,8 @@ void mlx5e_ptp_activate_channel(struct mlx5e_ptp *c) if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { mlx5e_ptp_rx_set_fs(c->priv); mlx5e_activate_rq(&c->rq); - mlx5e_trigger_napi_sched(&c->napi); } + mlx5e_trigger_napi_sched(&c->napi); } void mlx5e_ptp_deactivate_channel(struct mlx5e_ptp *c) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index b6f5c1bcdbcd..016a61c52c45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -120,8 +120,8 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev, priv = netdev_priv(netdev); rpriv = priv->ppriv; - err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, - mdata->metadata_reg_c_0); + err = mlx5_esw_acl_ingress_vport_metadata_update(esw, rpriv->rep->vport, + mdata->metadata_reg_c_0); if (err) goto ingress_err; @@ -167,7 +167,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, /* Reset bond_metadata to zero first then reset all ingress/egress * acls and rx rules of unslave representor's vport */ - mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0); + mlx5_esw_acl_ingress_vport_metadata_update(esw, rpriv->rep->vport, 0); mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport); mlx5e_rep_bond_update(priv, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 8099a21e674c..ce85b48d327d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -438,10 +438,6 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, switch (event) { case SWITCHDEV_FDB_ADD_TO_BRIDGE: - /* only handle the event on native eswtich of representor */ - if (!mlx5_esw_bridge_is_local(dev, rep, esw)) - break; - fdb_info = container_of(info, struct switchdev_notifier_fdb_info, info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index b08339d986d5..e24b46953542 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020 Mellanox Technologies. */ -#include <net/dst_metadata.h> #include <linux/netdevice.h> #include <linux/if_macvlan.h> #include <linux/list.h> @@ -589,7 +588,7 @@ mlx5e_rep_indr_stats_act(struct mlx5e_rep_priv *rpriv, act = mlx5e_tc_act_get(fl_act->id, ns_type); if (!act || !act->stats_action) - return -EOPNOTSUPP; + return mlx5e_tc_fill_action_stats(priv, fl_act); return act->stats_action(priv, fl_act); } @@ -665,232 +664,54 @@ void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) mlx5e_rep_indr_block_unbind); } -static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5e_tc_update_priv *tc_priv, - u32 tunnel_id) -{ - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct tunnel_match_enc_opts enc_opts = {}; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - struct metadata_dst *tun_dst; - struct tunnel_match_key key; - u32 tun_id, enc_opts_id; - struct net_device *dev; - int err; - - enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; - tun_id = tunnel_id >> ENC_OPTS_BITS; - - if (!tun_id) - return true; - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - - err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel for tun_id: %d, err: %d\n", - tun_id, err); - return false; - } - - if (enc_opts_id) { - err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, - enc_opts_id, &enc_opts); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", - enc_opts_id, err); - return false; - } - } - - if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, - key.enc_ip.tos, key.enc_ip.ttl, - key.enc_tp.dst, TUNNEL_KEY, - key32_to_tunnel_id(key.enc_key_id.keyid), - enc_opts.key.len); - } else if (key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, - key.enc_ip.tos, key.enc_ip.ttl, - key.enc_tp.dst, 0, TUNNEL_KEY, - key32_to_tunnel_id(key.enc_key_id.keyid), - enc_opts.key.len); - } else { - netdev_dbg(priv->netdev, - "Couldn't restore tunnel, unsupported addr_type: %d\n", - key.enc_control.addr_type); - return false; - } - - if (!tun_dst) { - netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); - return false; - } - - tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; - - if (enc_opts.key.len) - ip_tunnel_info_opts_set(&tun_dst->u.tun_info, - enc_opts.key.data, - enc_opts.key.len, - enc_opts.key.dst_opt_type); - - skb_dst_set(skb, (struct dst_entry *)tun_dst); - dev = dev_get_by_index(&init_net, key.filter_ifindex); - if (!dev) { - netdev_dbg(priv->netdev, - "Couldn't find tunnel device with ifindex: %d\n", - key.filter_ifindex); - return false; - } - - /* Set fwd_dev so we do dev_put() after datapath */ - tc_priv->fwd_dev = dev; - - skb->dev = dev; - - return true; -} - -static bool mlx5e_restore_skb_chain(struct sk_buff *skb, u32 chain, u32 reg_c1, - struct mlx5e_tc_update_priv *tc_priv) -{ - struct mlx5e_priv *priv = netdev_priv(skb->dev); - u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; - -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - if (chain) { - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - struct tc_skb_ext *tc_skb_ext; - struct mlx5_eswitch *esw; - u32 zone_restore_id; - - tc_skb_ext = tc_skb_ext_alloc(skb); - if (!tc_skb_ext) { - WARN_ON(1); - return false; - } - tc_skb_ext->chain = chain; - zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; - esw = priv->mdev->priv.eswitch; - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - if (!mlx5e_tc_ct_restore_flow(uplink_priv->ct_priv, skb, - zone_restore_id)) - return false; - } -#endif /* CONFIG_NET_TC_SKB_EXT */ - - return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); -} - -static void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) -{ - if (tc_priv->fwd_dev) - dev_put(tc_priv->fwd_dev); -} - -static void mlx5e_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5_mapped_obj *mapped_obj, - struct mlx5e_tc_update_priv *tc_priv) -{ - if (!mlx5e_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { - netdev_dbg(priv->netdev, - "Failed to restore tunnel info for sampled packet\n"); - return; - } - mlx5e_tc_sample_skb(skb, mapped_obj); - mlx5_rep_tc_post_napi_receive(tc_priv); -} - -static bool mlx5e_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, - struct mlx5_mapped_obj *mapped_obj, - struct mlx5e_tc_update_priv *tc_priv, - bool *forward_tx, - u32 reg_c1) -{ - u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_rep_uplink_priv *uplink_priv; - struct mlx5e_rep_priv *uplink_rpriv; - - /* Tunnel restore takes precedence over int port restore */ - if (tunnel_id) - return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id); - - uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); - uplink_priv = &uplink_rpriv->uplink_priv; - - if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, - mapped_obj->int_port_metadata, forward_tx)) { - /* Set fwd_dev for future dev_put */ - tc_priv->fwd_dev = skb->dev; - - return true; - } - - return false; -} - void mlx5e_rep_tc_receive(struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, struct sk_buff *skb) { - u32 reg_c1 = be32_to_cpu(cqe->ft_metadata); + u32 reg_c0, reg_c1, zone_restore_id, tunnel_id; struct mlx5e_tc_update_priv tc_priv = {}; - struct mlx5_mapped_obj mapped_obj; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct mlx5_tc_ct_priv *ct_priv; + struct mapping_ctx *mapping_ctx; struct mlx5_eswitch *esw; - bool forward_tx = false; struct mlx5e_priv *priv; - u32 reg_c0; - int err; reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK); if (!reg_c0 || reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG) goto forward; - /* If reg_c0 is not equal to the default flow tag then skb->mark + /* If mapped_obj_id is not equal to the default flow tag then skb->mark * is not supported and must be reset back to 0. */ skb->mark = 0; priv = netdev_priv(skb->dev); esw = priv->mdev->priv.eswitch; - err = mapping_find(esw->offloads.reg_c0_obj_pool, reg_c0, &mapped_obj); - if (err) { - netdev_dbg(priv->netdev, - "Couldn't find mapped object for reg_c0: %d, err: %d\n", - reg_c0, err); - goto free_skb; - } + mapping_ctx = esw->offloads.reg_c0_obj_pool; + reg_c1 = be32_to_cpu(cqe->ft_metadata); + zone_restore_id = reg_c1 & ESW_ZONE_ID_MASK; + tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK; - if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { - if (!mlx5e_restore_skb_chain(skb, mapped_obj.chain, reg_c1, &tc_priv) && - !mlx5_ipsec_is_rx_flow(cqe)) - goto free_skb; - } else if (mapped_obj.type == MLX5_MAPPED_OBJ_SAMPLE) { - mlx5e_restore_skb_sample(priv, skb, &mapped_obj, &tc_priv); - goto free_skb; - } else if (mapped_obj.type == MLX5_MAPPED_OBJ_INT_PORT_METADATA) { - if (!mlx5e_restore_skb_int_port(priv, skb, &mapped_obj, &tc_priv, - &forward_tx, reg_c1)) - goto free_skb; - } else { - netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + ct_priv = uplink_priv->ct_priv; + + if (!mlx5_ipsec_is_rx_flow(cqe) && + !mlx5e_tc_update_skb(cqe, skb, mapping_ctx, reg_c0, ct_priv, zone_restore_id, tunnel_id, + &tc_priv)) goto free_skb; - } forward: - if (forward_tx) + if (tc_priv.skb_done) + goto free_skb; + + if (tc_priv.forward_tx) dev_queue_xmit(skb); else napi_gro_receive(rq->cq.napi, skb); - mlx5_rep_tc_post_napi_receive(&tc_priv); + if (tc_priv.fwd_dev) + dev_put(tc_priv.fwd_dev); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 1ae15b8536a8..c462fe76495b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -736,10 +736,10 @@ static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) { - struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(dl_port, &mlx5_rx_reporter_ops, + reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + &mlx5_rx_reporter_ops, MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", @@ -754,6 +754,6 @@ void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv) if (!priv->rx_reporter) return; - devlink_port_health_reporter_destroy(priv->rx_reporter); + devlink_health_reporter_destroy(priv->rx_reporter); priv->rx_reporter = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 60bc5b577ab9..34666e2b3871 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -81,6 +81,10 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx) sq->stats->recover++; clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); mlx5e_activate_txqsq(sq); + if (sq->channel) + mlx5e_trigger_napi_icosq(sq->channel); + else + mlx5e_trigger_napi_sched(sq->cq.napi); return 0; out: @@ -590,10 +594,10 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) { - struct devlink_port *dl_port = mlx5e_devlink_get_dl_port(priv); struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(dl_port, &mlx5_tx_reporter_ops, + reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, @@ -609,6 +613,6 @@ void mlx5e_reporter_tx_destroy(struct mlx5e_priv *priv) if (!priv->tx_reporter) return; - devlink_port_health_reporter_destroy(priv->tx_reporter); + devlink_health_reporter_destroy(priv->tx_reporter); priv->tx_reporter = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index 78c427b38048..07cc65596f89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -216,7 +216,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, struct net_device *uplink_dev; struct mlx5e_priv *out_priv; struct mlx5_eswitch *esw; - bool is_uplink_rep; int *ifindexes; int if_count; int err; @@ -231,10 +230,9 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, parse_state->ifindexes[if_count] = out_dev->ifindex; parse_state->if_count++; - is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev); - err = mlx5_lag_do_mirred(priv->mdev, out_dev); - if (err) - return err; + + if (mlx5_lag_mpesw_do_mirred(priv->mdev, out_dev, extack)) + return -EOPNOTSUPP; out_dev = get_fdb_out_dev(uplink_dev, out_dev); if (!out_dev) @@ -275,13 +273,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, esw_attr->dests[esw_attr->out_count].rep = rpriv->rep; esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev; - /* If output device is bond master then rules are not explicit - * so we don't attempt to count them. - */ - if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && - MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) - attr->lag.count = true; - esw_attr->out_count++; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c index b86ac604d0c2..2e0d88b513aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/vlan.c @@ -44,19 +44,17 @@ parse_tc_vlan_action(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, vlan_idx)) { + NL_SET_ERR_MSG_MOD(extack, "firmware vlan actions is not supported"); + return -EOPNOTSUPP; + } + switch (act->id) { case FLOW_ACTION_VLAN_POP: - if (vlan_idx) { - if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) { - NL_SET_ERR_MSG_MOD(extack, "vlan pop action is not supported"); - return -EOPNOTSUPP; - } - + if (vlan_idx) *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP_2; - } else { + else *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; - } break; case FLOW_ACTION_VLAN_PUSH: attr->vlan_vid[vlan_idx] = act->vlan.vid; @@ -65,25 +63,10 @@ parse_tc_vlan_action(struct mlx5e_priv *priv, if (!attr->vlan_proto[vlan_idx]) attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q); - if (vlan_idx) { - if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, - MLX5_FS_VLAN_DEPTH)) { - NL_SET_ERR_MSG_MOD(extack, - "vlan push action is not supported for vlan depth > 1"); - return -EOPNOTSUPP; - } - + if (vlan_idx) *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; - } else { - if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && - (act->vlan.proto != htons(ETH_P_8021Q) || - act->vlan.prio)) { - NL_SET_ERR_MSG_MOD(extack, "vlan push action is not supported"); - return -EOPNOTSUPP; - } - + else *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; - } break; case FLOW_ACTION_VLAN_POP_ETH: parse_state->eth_pop = true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c new file mode 100644 index 000000000000..f71766dca660 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include <linux/rhashtable.h> +#include <net/flow_offload.h> +#include "en/tc_priv.h" +#include "act_stats.h" +#include "en/fs.h" + +struct mlx5e_tc_act_stats_handle { + struct rhashtable ht; + spinlock_t ht_lock; /* protects hashtable */ +}; + +struct mlx5e_tc_act_stats { + unsigned long tc_act_cookie; + + struct mlx5_fc *counter; + u64 lastpackets; + u64 lastbytes; + + struct rhash_head hash; + struct rcu_head rcu_head; +}; + +static const struct rhashtable_params act_counters_ht_params = { + .head_offset = offsetof(struct mlx5e_tc_act_stats, hash), + .key_offset = 0, + .key_len = offsetof(struct mlx5e_tc_act_stats, counter), + .automatic_shrinking = true, +}; + +struct mlx5e_tc_act_stats_handle * +mlx5e_tc_act_stats_create(void) +{ + struct mlx5e_tc_act_stats_handle *handle; + int err; + + handle = kvzalloc(sizeof(*handle), GFP_KERNEL); + if (IS_ERR(handle)) + return ERR_PTR(-ENOMEM); + + err = rhashtable_init(&handle->ht, &act_counters_ht_params); + if (err) + goto err; + + spin_lock_init(&handle->ht_lock); + return handle; +err: + kvfree(handle); + return ERR_PTR(err); +} + +void mlx5e_tc_act_stats_free(struct mlx5e_tc_act_stats_handle *handle) +{ + rhashtable_destroy(&handle->ht); + kvfree(handle); +} + +static int +mlx5e_tc_act_stats_add(struct mlx5e_tc_act_stats_handle *handle, + unsigned long act_cookie, + struct mlx5_fc *counter) +{ + struct mlx5e_tc_act_stats *act_stats, *old_act_stats; + struct rhashtable *ht = &handle->ht; + int err = 0; + + act_stats = kvzalloc(sizeof(*act_stats), GFP_KERNEL); + if (!act_stats) + return -ENOMEM; + + act_stats->tc_act_cookie = act_cookie; + act_stats->counter = counter; + + rcu_read_lock(); + old_act_stats = rhashtable_lookup_get_insert_fast(ht, + &act_stats->hash, + act_counters_ht_params); + if (IS_ERR(old_act_stats)) { + err = PTR_ERR(old_act_stats); + goto err_hash_insert; + } else if (old_act_stats) { + err = -EEXIST; + goto err_hash_insert; + } + rcu_read_unlock(); + + return 0; + +err_hash_insert: + rcu_read_unlock(); + kvfree(act_stats); + return err; +} + +void +mlx5e_tc_act_stats_del_flow(struct mlx5e_tc_act_stats_handle *handle, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_flow_attr *attr; + struct mlx5e_tc_act_stats *act_stats; + int i; + + if (!flow_flag_test(flow, USE_ACT_STATS)) + return; + + list_for_each_entry(attr, &flow->attrs, list) { + for (i = 0; i < attr->tc_act_cookies_count; i++) { + struct rhashtable *ht = &handle->ht; + + spin_lock(&handle->ht_lock); + act_stats = rhashtable_lookup_fast(ht, + &attr->tc_act_cookies[i], + act_counters_ht_params); + if (act_stats && + rhashtable_remove_fast(ht, &act_stats->hash, + act_counters_ht_params) == 0) + kvfree_rcu(act_stats, rcu_head); + + spin_unlock(&handle->ht_lock); + } + } +} + +int +mlx5e_tc_act_stats_add_flow(struct mlx5e_tc_act_stats_handle *handle, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_fc *curr_counter = NULL; + unsigned long last_cookie = 0; + struct mlx5_flow_attr *attr; + int err; + int i; + + if (!flow_flag_test(flow, USE_ACT_STATS)) + return 0; + + list_for_each_entry(attr, &flow->attrs, list) { + if (attr->counter) + curr_counter = attr->counter; + + for (i = 0; i < attr->tc_act_cookies_count; i++) { + /* jump over identical ids (e.g. pedit)*/ + if (last_cookie == attr->tc_act_cookies[i]) + continue; + + err = mlx5e_tc_act_stats_add(handle, attr->tc_act_cookies[i], curr_counter); + if (err) + goto out_err; + last_cookie = attr->tc_act_cookies[i]; + } + } + + return 0; +out_err: + mlx5e_tc_act_stats_del_flow(handle, flow); + return err; +} + +int +mlx5e_tc_act_stats_fill_stats(struct mlx5e_tc_act_stats_handle *handle, + struct flow_offload_action *fl_act) +{ + struct rhashtable *ht = &handle->ht; + struct mlx5e_tc_act_stats *item; + struct mlx5e_tc_act_stats key; + u64 pkts, bytes, lastused; + int err = 0; + + key.tc_act_cookie = fl_act->cookie; + + rcu_read_lock(); + item = rhashtable_lookup(ht, &key, act_counters_ht_params); + if (!item) { + rcu_read_unlock(); + err = -ENOENT; + goto err_out; + } + + mlx5_fc_query_cached_raw(item->counter, + &bytes, &pkts, &lastused); + + flow_stats_update(&fl_act->stats, + bytes - item->lastbytes, + pkts - item->lastpackets, + 0, lastused, FLOW_ACTION_HW_STATS_DELAYED); + + item->lastpackets = pkts; + item->lastbytes = bytes; + rcu_read_unlock(); + + return 0; + +err_out: + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.h new file mode 100644 index 000000000000..002292c2567c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_EN_ACT_STATS_H__ +#define __MLX5_EN_ACT_STATS_H__ + +#include <net/flow_offload.h> +#include "en/tc_priv.h" + +struct mlx5e_tc_act_stats_handle; + +struct mlx5e_tc_act_stats_handle *mlx5e_tc_act_stats_create(void); +void mlx5e_tc_act_stats_free(struct mlx5e_tc_act_stats_handle *handle); + +int +mlx5e_tc_act_stats_add_flow(struct mlx5e_tc_act_stats_handle *handle, + struct mlx5e_tc_flow *flow); + +void +mlx5e_tc_act_stats_del_flow(struct mlx5e_tc_act_stats_handle *handle, + struct mlx5e_tc_flow *flow); + +int +mlx5e_tc_act_stats_fill_stats(struct mlx5e_tc_act_stats_handle *handle, + struct flow_offload_action *fl_act); + +#endif /* __MLX5_EN_ACT_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c index 78af8a3175bf..8218c892b161 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -28,7 +28,7 @@ struct mlx5e_flow_meter_aso_obj { int base_id; int total_meters; - unsigned long meters_map[0]; /* must be at the end of this struct */ + unsigned long meters_map[]; /* must be at the end of this struct */ }; struct mlx5e_flow_meters { @@ -204,13 +204,15 @@ mlx5e_flow_meter_create_aso_obj(struct mlx5e_flow_meters *flow_meters, int *obj_ u32 in[MLX5_ST_SZ_DW(create_flow_meter_aso_obj_in)] = {}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; struct mlx5_core_dev *mdev = flow_meters->mdev; - void *obj; + void *obj, *param; int err; MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO); - MLX5_SET(general_obj_in_cmd_hdr, in, log_obj_range, flow_meters->log_granularity); + param = MLX5_ADDR_OF(general_obj_in_cmd_hdr, in, op_param); + MLX5_SET(general_obj_create_param, param, log_obj_range, + flow_meters->log_granularity); obj = MLX5_ADDR_OF(create_flow_meter_aso_obj_in, in, flow_meter_aso_obj); MLX5_SET(flow_meter_aso_obj, obj, meter_aso_access_pd, flow_meters->pdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c index f2c2c752bd1c..558a776359af 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/sample.c @@ -237,7 +237,7 @@ sample_modify_hdr_get(struct mlx5_core_dev *mdev, u32 obj_id, int err; err = mlx5e_tc_match_to_reg_set(mdev, mod_acts, MLX5_FLOW_NAMESPACE_FDB, - CHAIN_TO_REG, obj_id); + MAPPED_OBJ_TO_REG, obj_id); if (err) goto err_set_regc0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 313df8232db7..314983bc6f08 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -35,6 +35,7 @@ #define MLX5_CT_STATE_REPLY_BIT BIT(4) #define MLX5_CT_STATE_RELATED_BIT BIT(5) #define MLX5_CT_STATE_INVALID_BIT BIT(6) +#define MLX5_CT_STATE_NEW_BIT BIT(7) #define MLX5_CT_LABELS_BITS MLX5_REG_MAPPING_MBITS(LABELS_TO_REG) #define MLX5_CT_LABELS_MASK MLX5_REG_MAPPING_MASK(LABELS_TO_REG) @@ -59,6 +60,7 @@ struct mlx5_tc_ct_debugfs { struct mlx5_tc_ct_priv { struct mlx5_core_dev *dev; + struct mlx5e_priv *priv; const struct net_device *netdev; struct mod_hdr_tbl *mod_hdr_tbl; struct xarray tuple_ids; @@ -85,7 +87,6 @@ struct mlx5_ct_flow { struct mlx5_flow_attr *pre_ct_attr; struct mlx5_flow_handle *pre_ct_rule; struct mlx5_ct_ft *ft; - u32 chain_mapping; }; struct mlx5_ct_zone_rule { @@ -721,12 +722,14 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS); DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr); struct flow_action_entry *meta; + enum ip_conntrack_info ctinfo; u16 ct_state = 0; int err; meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule); if (!meta) return -EOPNOTSUPP; + ctinfo = meta->ct_metadata.cookie & NFCT_INFOMASK; err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels, &attr->ct_attr.ct_labels_id); @@ -742,7 +745,8 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv, ct_state |= MLX5_CT_STATE_NAT_BIT; } - ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT; + ct_state |= MLX5_CT_STATE_TRK_BIT; + ct_state |= ctinfo == IP_CT_NEW ? MLX5_CT_STATE_NEW_BIT : MLX5_CT_STATE_ESTABLISHED_BIT; ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT; err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts, ct_state, @@ -871,6 +875,68 @@ err_attr: return err; } +static int +mlx5_tc_ct_entry_replace_rule(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + bool nat, u8 zone_restore_id) +{ + struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat]; + struct mlx5_flow_attr *attr = zone_rule->attr, *old_attr; + struct mlx5e_mod_hdr_handle *mh; + struct mlx5_ct_fs_rule *rule; + struct mlx5_flow_spec *spec; + int err; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + old_attr = mlx5_alloc_flow_attr(ct_priv->ns_type); + if (!old_attr) { + err = -ENOMEM; + goto err_attr; + } + *old_attr = *attr; + + err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule, &mh, zone_restore_id, + nat, mlx5_tc_ct_entry_has_nat(entry)); + if (err) { + ct_dbg("Failed to create ct entry mod hdr"); + goto err_mod_hdr; + } + + mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule); + mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK); + + rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + ct_dbg("Failed to add replacement ct entry rule, nat: %d", nat); + goto err_rule; + } + + ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule); + zone_rule->rule = rule; + mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, old_attr, zone_rule->mh); + zone_rule->mh = mh; + + kfree(old_attr); + kvfree(spec); + ct_dbg("Replaced ct entry rule in zone %d", entry->tuple.zone); + + return 0; + +err_rule: + mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh); + mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); +err_mod_hdr: + kfree(old_attr); +err_attr: + kvfree(spec); + return err; +} + static bool mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) { @@ -1066,6 +1132,52 @@ err_orig: } static int +mlx5_tc_ct_entry_replace_rules(struct mlx5_tc_ct_priv *ct_priv, + struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, + u8 zone_restore_id) +{ + int err; + + err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, false, + zone_restore_id); + if (err) + return err; + + err = mlx5_tc_ct_entry_replace_rule(ct_priv, flow_rule, entry, true, + zone_restore_id); + if (err) + mlx5_tc_ct_entry_del_rule(ct_priv, entry, false); + return err; +} + +static int +mlx5_tc_ct_block_flow_offload_replace(struct mlx5_ct_ft *ft, struct flow_rule *flow_rule, + struct mlx5_ct_entry *entry, unsigned long cookie) +{ + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; + int err; + + err = mlx5_tc_ct_entry_replace_rules(ct_priv, flow_rule, entry, ft->zone_restore_id); + if (!err) + return 0; + + /* If failed to update the entry, then look it up again under ht_lock + * protection and properly delete it. + */ + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (entry) { + rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); + spin_unlock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_put(entry); + } else { + spin_unlock_bh(&ct_priv->ht_lock); + } + return err; +} + +static int mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, struct flow_cls_offload *flow) { @@ -1083,9 +1195,17 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, spin_lock_bh(&ct_priv->ht_lock); entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); if (entry && refcount_inc_not_zero(&entry->refcnt)) { + if (entry->restore_cookie == meta_action->ct_metadata.cookie) { + spin_unlock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_put(entry); + return -EEXIST; + } + entry->restore_cookie = meta_action->ct_metadata.cookie; spin_unlock_bh(&ct_priv->ht_lock); + + err = mlx5_tc_ct_block_flow_offload_replace(ft, flow_rule, entry, cookie); mlx5_tc_ct_entry_put(entry); - return -EEXIST; + return err; } spin_unlock_bh(&ct_priv->ht_lock); @@ -1323,7 +1443,7 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr, struct netlink_ext_ack *extack) { - bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv; + bool trk, est, untrk, unnew, unest, new, rpl, unrpl, rel, unrel, inv, uninv; struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector_key_ct *mask, *key; u32 ctstate = 0, ctstate_mask = 0; @@ -1369,15 +1489,18 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED; inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID; untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED; + unnew = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_NEW; unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED; unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY; unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED; uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID; ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate |= new ? MLX5_CT_STATE_NEW_BIT : 0; ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0; ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0; + ctstate_mask |= (unnew || new) ? MLX5_CT_STATE_NEW_BIT : 0; ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0; ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0; ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0; @@ -1395,12 +1518,6 @@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv, return -EOPNOTSUPP; } - if (new) { - NL_SET_ERR_MSG_MOD(extack, - "matching on ct_state +new isn't supported"); - return -EOPNOTSUPP; - } - if (mask->ct_zone) mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, key->ct_zone, MLX5_CT_ZONE_MASK); @@ -1441,6 +1558,7 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.zone = act->ct.zone; attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; + attr->ct_attr.act_miss_cookie = act->miss_cookie; return 0; } @@ -1778,7 +1896,7 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) * + ft prio (tc chain) + * + original match + * +---------------------+ - * | set chain miss mapping + * | set act_miss_cookie mapping * | set fte_id * | set tunnel_id * | do decap @@ -1823,7 +1941,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_flow_attr *pre_ct_attr; struct mlx5_modify_hdr *mod_hdr; struct mlx5_ct_flow *ct_flow; - int chain_mapping = 0, err; + int act_miss_mapping = 0, err; struct mlx5_ct_ft *ft; u16 zone; @@ -1858,22 +1976,18 @@ __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv, pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - /* Write chain miss tag for miss in ct table as we - * don't go though all prios of this chain as normal tc rules - * miss. - */ - err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain, - &chain_mapping); + err = mlx5e_tc_action_miss_mapping_get(ct_priv->priv, attr, attr->ct_attr.act_miss_cookie, + &act_miss_mapping); if (err) { - ct_dbg("Failed to get chain register mapping for chain"); - goto err_get_chain; + ct_dbg("Failed to get register mapping for act miss"); + goto err_get_act_miss; } - ct_flow->chain_mapping = chain_mapping; + attr->ct_attr.act_miss_mapping = act_miss_mapping; err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type, - CHAIN_TO_REG, chain_mapping); + MAPPED_OBJ_TO_REG, act_miss_mapping); if (err) { - ct_dbg("Failed to set chain register mapping"); + ct_dbg("Failed to set act miss register mapping"); goto err_mapping; } @@ -1937,8 +2051,8 @@ err_insert_orig: mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); err_mapping: mlx5e_mod_hdr_dealloc(pre_mod_acts); - mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); -err_get_chain: + mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, act_miss_mapping); +err_get_act_miss: kfree(ct_flow->pre_ct_attr); err_alloc_pre: mlx5_tc_ct_del_ft_cb(ct_priv, ft); @@ -1977,7 +2091,7 @@ __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv, mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr); mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr); - mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping); + mlx5e_tc_action_miss_mapping_put(ct_priv->priv, attr, attr->ct_attr.act_miss_mapping); mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft); kfree(ct_flow->pre_ct_attr); @@ -2074,13 +2188,6 @@ mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv, const char *err_msg = NULL; int err = 0; -#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - /* cannot restore chain ID on HW miss */ - - err_msg = "tc skb extension missing"; - err = -EOPNOTSUPP; - goto out_err; -#endif if (IS_ERR_OR_NULL(post_act)) { /* Ignore_flow_level support isn't supported by default for VFs and so post_act * won't be supported. Skip showing error msg. @@ -2157,6 +2264,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, } spin_lock_init(&ct_priv->ht_lock); + ct_priv->priv = priv; ct_priv->ns_type = ns_type; ct_priv->chains = chains; ct_priv->netdev = priv->netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 5bbd6b92840f..5c5ddaa83055 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -28,6 +28,8 @@ struct mlx5_ct_attr { struct mlx5_ct_flow *ct_flow; struct nf_flowtable *nf_ft; u32 ct_labels_id; + u32 act_miss_mapping; + u64 act_miss_cookie; }; #define zone_to_reg_ct {\ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index 2b7fd1c0e643..451fd4342a5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -30,6 +30,7 @@ enum { MLX5E_TC_FLOW_FLAG_TUN_RX = MLX5E_TC_FLOW_BASE + 9, MLX5E_TC_FLOW_FLAG_FAILED = MLX5E_TC_FLOW_BASE + 10, MLX5E_TC_FLOW_FLAG_SAMPLE = MLX5E_TC_FLOW_BASE + 11, + MLX5E_TC_FLOW_FLAG_USE_ACT_STATS = MLX5E_TC_FLOW_BASE + 12, }; struct mlx5e_tc_flow_parse_attr { @@ -95,8 +96,6 @@ struct mlx5e_tc_flow { */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5e_tc_flow *peer_flow; - struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */ - struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */ struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index e6f64d890fb3..00a04fdd756f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -93,11 +93,11 @@ static int get_route_and_out_devs(struct mlx5e_priv *priv, else return -EOPNOTSUPP; - if (!(mlx5e_eswitch_rep(*out_dev) && - mlx5e_is_uplink_rep(netdev_priv(*out_dev)))) + if (!mlx5e_eswitch_uplink_rep(*out_dev)) return -EOPNOTSUPP; - if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev) + if (mlx5e_eswitch_uplink_rep(priv->netdev) && *out_dev != priv->netdev && + !mlx5_lag_is_mpesw(priv->mdev)) return -EOPNOTSUPP; return 0; @@ -745,8 +745,6 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, if (err) goto out; - esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value, - misc_parameters.vxlan_vni); esw_attr->rx_tun_attr->decap_vport = vport_num; } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) { int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 2aaf8ab857b8..780224fd67a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -1349,7 +1349,8 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv, mlx5e_tc_unoffload_from_slow_path(esw, flow); else mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr); - mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); + + mlx5e_tc_detach_mod_hdr(priv, flow, attr); attr->modify_hdr = NULL; esw_attr->dests[flow->tmp_entry_index].flags &= @@ -1405,7 +1406,7 @@ static void mlx5e_reoffload_encap(struct mlx5e_priv *priv, continue; } - err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); + err = mlx5e_tc_attach_mod_hdr(priv, flow, attr); if (err) { mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 853f312cd757..c067d2efab51 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -73,6 +73,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); void mlx5e_free_rx_descs(struct mlx5e_rq *rq); void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq); +static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) +{ + return config->rx_filter == HWTSTAMP_FILTER_ALL; +} + /* TX */ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); @@ -315,7 +320,6 @@ mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) } } -void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more); void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session, u8 max_sq_mpw_wqebbs) @@ -445,7 +449,7 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) { - WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < mlx5e_get_max_sq_wqebbs(mdev)); + WARN_ON_ONCE(PAGE_SIZE / MLX5_SEND_WQE_BB < (u16)mlx5e_get_max_sq_wqebbs(mdev)); /* A WQE must not cross the page boundary, hence two conditions: * 1. Its size must not exceed the page size. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 20507ef2f956..bcd6370de440 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -57,8 +57,9 @@ int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) static inline bool mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, - struct page *page, struct xdp_buff *xdp) + struct xdp_buff *xdp) { + struct page *page = virt_to_page(xdp->data); struct skb_shared_info *sinfo = NULL; struct mlx5e_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; @@ -156,10 +157,39 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, return true; } +static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) +{ + const struct mlx5e_xdp_buff *_ctx = (void *)ctx; + + if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp))) + return -EOPNOTSUPP; + + *timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time, + _ctx->rq->clock, get_cqe_ts(_ctx->cqe)); + return 0; +} + +static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) +{ + const struct mlx5e_xdp_buff *_ctx = (void *)ctx; + + if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))) + return -EOPNOTSUPP; + + *hash = be32_to_cpu(_ctx->cqe->rss_hash_result); + return 0; +} + +const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = { + .xmo_rx_timestamp = mlx5e_xdp_rx_timestamp, + .xmo_rx_hash = mlx5e_xdp_rx_hash, +}; + /* returns true if packet was consumed by xdp */ -bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, - struct bpf_prog *prog, struct xdp_buff *xdp) +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + struct bpf_prog *prog, struct mlx5e_xdp_buff *mxbuf) { + struct xdp_buff *xdp = &mxbuf->xdp; u32 act; int err; @@ -168,7 +198,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, case XDP_PASS: return false; case XDP_TX: - if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, page, xdp))) + if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, xdp))) goto xdp_abort; __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */ return true; @@ -180,7 +210,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, __set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); __set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags); if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL) - mlx5e_page_dma_unmap(rq, page); + mlx5e_page_dma_unmap(rq, virt_to_page(xdp->data)); rq->stats->xdp_redirect++; return true; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index bc2d9034af5b..10bcfa6f88c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -44,10 +44,16 @@ (MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \ sizeof(struct mlx5_wqe_inline_seg)) +struct mlx5e_xdp_buff { + struct xdp_buff xdp; + struct mlx5_cqe64 *cqe; + struct mlx5e_rq *rq; +}; + struct mlx5e_xsk_param; int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); -bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page, - struct bpf_prog *prog, struct xdp_buff *xdp); +bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + struct bpf_prog *prog, struct mlx5e_xdp_buff *mlctx); void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq); bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); @@ -56,6 +62,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq); int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); +extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops; + INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, struct skb_shared_info *sinfo, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index c91b54d9ff27..fab787600459 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -8,6 +8,14 @@ /* RX data path */ +static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp) +{ + /* mlx5e_xdp_buff shares its layout with xdp_buff_xsk + * and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb + */ + return (struct mlx5e_xdp_buff *)xdp; +} + int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix); @@ -22,6 +30,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) goto err; BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk)); + XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff); batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units, rq->mpwqe.pages_per_wqe); @@ -43,25 +52,30 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) { for (i = 0; i < batch; i++) { + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); umr_wqe->inline_mtts[i] = (struct mlx5_mtt) { .ptag = cpu_to_be64(addr | MLX5_EN_WR), }; + mxbuf->rq = rq; } } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) { for (i = 0; i < batch; i++) { + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { .key = rq->mkey_be, .va = cpu_to_be64(addr), }; + mxbuf->rq = rq; } } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) { u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2); for (i = 0; i < batch; i++) { + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) { @@ -80,6 +94,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) .key = rq->mkey_be, .va = cpu_to_be64(rq->wqe_overflow.addr), }; + mxbuf->rq = rq; } } else { __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) - @@ -87,6 +102,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size); for (i = 0; i < batch; i++) { + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk); dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk); umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) { @@ -99,6 +115,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) .va = cpu_to_be64(rq->wqe_overflow.addr), .bcount = pad_size, }; + mxbuf->rq = rq; } } @@ -229,11 +246,12 @@ static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_b struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, u32 page_idx) { - struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk; + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[page_idx].xsk); struct bpf_prog *prog; /* Check packet size. Note LRO doesn't use linear SKB */ @@ -249,9 +267,11 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, */ WARN_ON_ONCE(head_offset); - xsk_buff_set_size(xdp, cqe_bcnt); - xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); - net_prefetch(xdp->data); + /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */ + mxbuf->cqe = cqe; + xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt); + xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool); + net_prefetch(mxbuf->xdp.data); /* Possible flows: * - XDP_REDIRECT to XSKMAP: @@ -269,7 +289,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, */ prog = rcu_dereference(rq->xdp_prog); - if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) { + if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) { if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))) __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ @@ -278,14 +298,15 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the * frame. On SKB allocation failure, NULL is returned. */ - return mlx5e_xsk_construct_skb(rq, xdp); + return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp); } struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { - struct xdp_buff *xdp = wi->au->xsk; + struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->au->xsk); struct bpf_prog *prog; /* wi->offset is not used in this function, because xdp->data and the @@ -295,17 +316,19 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, */ WARN_ON_ONCE(wi->offset); - xsk_buff_set_size(xdp, cqe_bcnt); - xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool); - net_prefetch(xdp->data); + /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */ + mxbuf->cqe = cqe; + xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt); + xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool); + net_prefetch(mxbuf->xdp.data); prog = rcu_dereference(rq->xdp_prog); - if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) + if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) return NULL; /* page/packet was consumed by XDP */ /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse * will be handled by mlx5e_free_rx_wqe. * On SKB allocation failure, NULL is returned. */ - return mlx5e_xsk_construct_skb(rq, xdp); + return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index 087c943bd8e9..cefc0ef6105d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -13,11 +13,13 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk); struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, u32 page_idx); struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, + struct mlx5_cqe64 *cqe, u32 cqe_bcnt); #endif /* __MLX5_EN_XSK_RX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index ff03c43833bb..81a567e17264 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -7,6 +7,18 @@ #include "en/health.h" #include <net/xdp_sock_drv.h> +static int mlx5e_legacy_rq_validate_xsk(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + struct mlx5e_xsk_param *xsk) +{ + if (!mlx5e_rx_is_linear_skb(mdev, params, xsk)) { + mlx5_core_err(mdev, "Legacy RQ linear mode for XSK can't be activated with current params\n"); + return -EINVAL; + } + + return 0; +} + /* The limitation of 2048 can be altered, but shouldn't go beyond the minimal * stride size of striding RQ. */ @@ -17,8 +29,11 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, struct mlx5_core_dev *mdev) { /* AF_XDP doesn't support frames larger than PAGE_SIZE. */ - if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) + if (xsk->chunk_size > PAGE_SIZE || xsk->chunk_size < MLX5E_MIN_XSK_CHUNK_SIZE) { + mlx5_core_err(mdev, "XSK chunk size %u out of bounds [%u, %lu]\n", xsk->chunk_size, + MLX5E_MIN_XSK_CHUNK_SIZE, PAGE_SIZE); return false; + } /* frag_sz is different for regular and XSK RQs, so ensure that linear * SKB mode is possible. @@ -27,7 +42,7 @@ bool mlx5e_validate_xsk_param(struct mlx5e_params *params, case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: return !mlx5e_mpwrq_validate_xsk(mdev, params, xsk); default: /* MLX5_WQ_TYPE_CYCLIC */ - return mlx5e_rx_is_linear_skb(mdev, params, xsk); + return !mlx5e_legacy_rq_validate_xsk(mdev, params, xsk); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 07187028f0d3..c964644ee866 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -124,7 +124,7 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, mlx5e_udp_gso_handle_tx_skb(skb); #ifdef CONFIG_MLX5_EN_TLS - /* May send SKBs and WQEs. */ + /* May send WQEs. */ if (mlx5e_ktls_skb_offloaded(skb)) if (unlikely(!mlx5e_ktls_handle_tx_skb(dev, sq, skb, &state->tls))) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index d7c020f72401..88a5aed9d678 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -365,7 +365,7 @@ void mlx5e_accel_fs_tcp_destroy(struct mlx5e_flow_steering *fs) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) accel_fs_tcp_destroy_table(fs, i); - kvfree(accel_tcp); + kfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); } @@ -377,7 +377,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) if (!MLX5_CAP_FLOWTABLE_NIC_RX(mlx5e_fs_get_mdev(fs), ft_field_support.outer_ip_version)) return -EOPNOTSUPP; - accel_tcp = kvzalloc(sizeof(*accel_tcp), GFP_KERNEL); + accel_tcp = kzalloc(sizeof(*accel_tcp), GFP_KERNEL); if (!accel_tcp) return -ENOMEM; mlx5e_fs_set_accel_tcp(fs, accel_tcp); @@ -397,7 +397,7 @@ int mlx5e_accel_fs_tcp_create(struct mlx5e_flow_steering *fs) err_destroy_tables: while (--i >= 0) accel_fs_tcp_destroy_table(fs, i); - kvfree(accel_tcp); + kfree(accel_tcp); mlx5e_fs_set_accel_tcp(fs, NULL); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index bb9023957f74..7b0d3de0ec6c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -158,95 +158,103 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, attrs->family = x->props.family; attrs->type = x->xso.type; attrs->reqid = x->props.reqid; + attrs->upspec.dport = ntohs(x->sel.dport); + attrs->upspec.dport_mask = ntohs(x->sel.dport_mask); + attrs->upspec.sport = ntohs(x->sel.sport); + attrs->upspec.sport_mask = ntohs(x->sel.sport_mask); + attrs->upspec.proto = x->sel.proto; mlx5e_ipsec_init_limits(sa_entry, attrs); } -static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) +static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, + struct xfrm_state *x, + struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xso.real_dev; - struct mlx5e_priv *priv; - - priv = netdev_priv(netdev); - if (x->props.aalgo != SADB_AALG_NONE) { - netdev_info(netdev, "Cannot offload authenticated xfrm states\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states"); return -EINVAL; } if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) { - netdev_info(netdev, "Only AES-GCM-ICV16 xfrm state may be offloaded\n"); + NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded"); return -EINVAL; } if (x->props.calgo != SADB_X_CALG_NONE) { - netdev_info(netdev, "Cannot offload compressed xfrm states\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states"); return -EINVAL; } if (x->props.flags & XFRM_STATE_ESN && - !(mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_ESN)) { - netdev_info(netdev, "Cannot offload ESN xfrm states\n"); + !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states"); return -EINVAL; } if (x->props.family != AF_INET && x->props.family != AF_INET6) { - netdev_info(netdev, "Only IPv4/6 xfrm states may be offloaded\n"); + NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded"); return -EINVAL; } if (x->id.proto != IPPROTO_ESP) { - netdev_info(netdev, "Only ESP xfrm state may be offloaded\n"); + NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded"); return -EINVAL; } if (x->encap) { - netdev_info(netdev, "Encapsulated xfrm state may not be offloaded\n"); + NL_SET_ERR_MSG_MOD(extack, "Encapsulated xfrm state may not be offloaded"); return -EINVAL; } if (!x->aead) { - netdev_info(netdev, "Cannot offload xfrm states without aead\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead"); return -EINVAL; } if (x->aead->alg_icv_len != 128) { - netdev_info(netdev, "Cannot offload xfrm states with AEAD ICV length other than 128bit\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit"); return -EINVAL; } if ((x->aead->alg_key_len != 128 + 32) && (x->aead->alg_key_len != 256 + 32)) { - netdev_info(netdev, "Cannot offload xfrm states with AEAD key length other than 128/256 bit\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit"); return -EINVAL; } if (x->tfcpad) { - netdev_info(netdev, "Cannot offload xfrm states with tfc padding\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding"); return -EINVAL; } if (!x->geniv) { - netdev_info(netdev, "Cannot offload xfrm states without geniv\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv"); return -EINVAL; } if (strcmp(x->geniv, "seqiv")) { - netdev_info(netdev, "Cannot offload xfrm states with geniv other than seqiv\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv"); return -EINVAL; } + + if (x->sel.proto != IPPROTO_IP && + (x->sel.proto != IPPROTO_UDP || x->xso.dir != XFRM_DEV_OFFLOAD_OUT)) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); + return -EINVAL; + } + switch (x->xso.type) { case XFRM_DEV_OFFLOAD_CRYPTO: - if (!(mlx5_ipsec_device_caps(priv->mdev) & - MLX5_IPSEC_CAP_CRYPTO)) { - netdev_info(netdev, "Crypto offload is not supported\n"); + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) { + NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported"); return -EINVAL; } if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) { - netdev_info(netdev, "Only transport and tunnel xfrm states may be offloaded\n"); + NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded"); return -EINVAL; } break; case XFRM_DEV_OFFLOAD_PACKET: - if (!(mlx5_ipsec_device_caps(priv->mdev) & + if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)) { - netdev_info(netdev, "Packet offload is not supported\n"); + NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported"); return -EINVAL; } if (x->props.mode != XFRM_MODE_TRANSPORT) { - netdev_info(netdev, "Only transport xfrm states may be offloaded in packet mode\n"); + NL_SET_ERR_MSG_MOD(extack, "Only transport xfrm states may be offloaded in packet mode"); return -EINVAL; } @@ -254,35 +262,30 @@ static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x) x->replay_esn->replay_window != 64 && x->replay_esn->replay_window != 128 && x->replay_esn->replay_window != 256) { - netdev_info(netdev, - "Unsupported replay window size %u\n", - x->replay_esn->replay_window); + NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size"); return -EINVAL; } if (!x->props.reqid) { - netdev_info(netdev, "Cannot offload without reqid\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid"); return -EINVAL; } if (x->lft.hard_byte_limit != XFRM_INF || x->lft.soft_byte_limit != XFRM_INF) { - netdev_info(netdev, - "Device doesn't support limits in bytes\n"); + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support limits in bytes"); return -EINVAL; } if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit && x->lft.hard_packet_limit != XFRM_INF) { /* XFRM stack doesn't prevent such configuration :(. */ - netdev_info(netdev, - "Hard packet limit must be greater than soft one\n"); + NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one"); return -EINVAL; } break; default: - netdev_info(netdev, "Unsupported xfrm offload type %d\n", - x->xso.type); + NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); return -EINVAL; } return 0; @@ -298,7 +301,8 @@ static void _update_xfrm_state(struct work_struct *work) mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs); } -static int mlx5e_xfrm_add_state(struct xfrm_state *x) +static int mlx5e_xfrm_add_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) { struct mlx5e_ipsec_sa_entry *sa_entry = NULL; struct net_device *netdev = x->xso.real_dev; @@ -311,15 +315,13 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x) return -EOPNOTSUPP; ipsec = priv->ipsec; - err = mlx5e_xfrm_validate_state(x); + err = mlx5e_xfrm_validate_state(priv->mdev, x, extack); if (err) return err; sa_entry = kzalloc(sizeof(*sa_entry), GFP_KERNEL); - if (!sa_entry) { - err = -ENOMEM; - goto out; - } + if (!sa_entry) + return -ENOMEM; sa_entry->x = x; sa_entry->ipsec = ipsec; @@ -360,7 +362,7 @@ err_hw_ctx: mlx5_ipsec_free_sa_ctx(sa_entry); err_xfrm: kfree(sa_entry); -out: + NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy"); return err; } @@ -497,34 +499,39 @@ static void mlx5e_xfrm_update_curlft(struct xfrm_state *x) mlx5e_ipsec_aso_update_curlft(sa_entry, &x->curlft.packets); } -static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x) +static int mlx5e_xfrm_validate_policy(struct xfrm_policy *x, + struct netlink_ext_ack *extack) { - struct net_device *netdev = x->xdo.real_dev; - if (x->type != XFRM_POLICY_TYPE_MAIN) { - netdev_info(netdev, "Cannot offload non-main policy types\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types"); return -EINVAL; } /* Please pay attention that we support only one template */ if (x->xfrm_nr > 1) { - netdev_info(netdev, "Cannot offload more than one template\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template"); return -EINVAL; } if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN && x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) { - netdev_info(netdev, "Cannot offload forward policy\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy"); return -EINVAL; } if (!x->xfrm_vec[0].reqid) { - netdev_info(netdev, "Cannot offload policy without reqid\n"); + NL_SET_ERR_MSG_MOD(extack, "Cannot offload policy without reqid"); return -EINVAL; } if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) { - netdev_info(netdev, "Unsupported xfrm offload type\n"); + NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); + return -EINVAL; + } + + if (x->selector.proto != IPPROTO_IP && + (x->selector.proto != IPPROTO_UDP || x->xdo.dir != XFRM_DEV_OFFLOAD_OUT)) { + NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than UDP, and only Tx direction"); return -EINVAL; } @@ -548,9 +555,15 @@ mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry, attrs->action = x->action; attrs->type = XFRM_DEV_OFFLOAD_PACKET; attrs->reqid = x->xfrm_vec[0].reqid; + attrs->upspec.dport = ntohs(sel->dport); + attrs->upspec.dport_mask = ntohs(sel->dport_mask); + attrs->upspec.sport = ntohs(sel->sport); + attrs->upspec.sport_mask = ntohs(sel->sport_mask); + attrs->upspec.proto = sel->proto; } -static int mlx5e_xfrm_add_policy(struct xfrm_policy *x) +static int mlx5e_xfrm_add_policy(struct xfrm_policy *x, + struct netlink_ext_ack *extack) { struct net_device *netdev = x->xdo.real_dev; struct mlx5e_ipsec_pol_entry *pol_entry; @@ -558,10 +571,12 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x) int err; priv = netdev_priv(netdev); - if (!priv->ipsec) + if (!priv->ipsec) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload"); return -EOPNOTSUPP; + } - err = mlx5e_xfrm_validate_policy(x); + err = mlx5e_xfrm_validate_policy(x, extack); if (err) return err; @@ -582,6 +597,7 @@ static int mlx5e_xfrm_add_policy(struct xfrm_policy *x) err_fs: kfree(pol_entry); + NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy"); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 8bed9c361075..12f044330639 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -52,6 +52,14 @@ struct aes_gcm_keymat { u32 aes_key[256 / 32]; }; +struct upspec { + u16 dport; + u16 dport_mask; + u16 sport; + u16 sport_mask; + u8 proto; +}; + struct mlx5_accel_esp_xfrm_attrs { u32 esn; u32 spi; @@ -68,6 +76,7 @@ struct mlx5_accel_esp_xfrm_attrs { __be32 a6[4]; } daddr; + struct upspec upspec; u8 dir : 2; u8 esn_overlap : 1; u8 esn_trigger : 1; @@ -84,6 +93,7 @@ enum mlx5_ipsec_cap { MLX5_IPSEC_CAP_CRYPTO = 1 << 0, MLX5_IPSEC_CAP_ESN = 1 << 1, MLX5_IPSEC_CAP_PACKET_OFFLOAD = 1 << 2, + MLX5_IPSEC_CAP_ROCE = 1 << 3, }; struct mlx5e_priv; @@ -119,7 +129,7 @@ struct mlx5e_ipsec_work { }; struct mlx5e_ipsec_aso { - u8 ctx[MLX5_ST_SZ_BYTES(ipsec_aso)]; + u8 __aligned(64) ctx[MLX5_ST_SZ_BYTES(ipsec_aso)]; dma_addr_t dma_addr; struct mlx5_aso *aso; /* Protect ASO WQ access, as it is global to whole IPsec */ @@ -138,6 +148,7 @@ struct mlx5e_ipsec { struct mlx5e_ipsec_tx *tx; struct mlx5e_ipsec_aso *aso; struct notifier_block nb; + struct mlx5_ipsec_fs *roce; }; struct mlx5e_ipsec_esn_state { @@ -181,6 +192,7 @@ struct mlx5_accel_pol_xfrm_attrs { __be32 a6[4]; } daddr; + struct upspec upspec; u8 family; u8 action; u8 type : 2; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 9f19f4b59a70..9871ba1b25ff 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -6,6 +6,7 @@ #include "en/fs.h" #include "ipsec.h" #include "fs_core.h" +#include "lib/ipsec_fs_roce.h" #define NUM_IPSEC_FTE BIT(15) @@ -166,7 +167,8 @@ out: return err; } -static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx) +static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) { mlx5_del_flow_rules(rx->pol.rule); mlx5_destroy_flow_group(rx->pol.group); @@ -179,6 +181,8 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_rx *rx) mlx5_del_flow_rules(rx->status.rule); mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); mlx5_destroy_flow_table(rx->ft.status); + + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); } static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, @@ -186,18 +190,35 @@ static int rx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, { struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(ipsec->fs, false); struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); + struct mlx5_flow_destination default_dest; struct mlx5_flow_destination dest[2]; struct mlx5_flow_table *ft; int err; + default_dest = mlx5_ttc_get_default_dest(ttc, family2tt(family)); + err = mlx5_ipsec_fs_roce_rx_create(mdev, ipsec->roce, ns, &default_dest, + family, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, + MLX5E_NIC_PRIO); + if (err) + return err; + ft = ipsec_ft_create(ns, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, MLX5E_NIC_PRIO, 1); - if (IS_ERR(ft)) - return PTR_ERR(ft); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_fs_ft_status; + } rx->ft.status = ft; - dest[0] = mlx5_ttc_get_default_dest(ttc, family2tt(family)); + ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, family); + if (ft) { + dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[0].ft = ft; + } else { + dest[0] = default_dest; + } + dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[1].counter_id = mlx5_fc_id(rx->fc->cnt); err = ipsec_status_rule(mdev, rx, dest); @@ -245,6 +266,8 @@ err_fs_ft: mlx5_modify_header_dealloc(mdev, rx->status.modify_hdr); err_add: mlx5_destroy_flow_table(rx->ft.status); +err_fs_ft_status: + mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family); return err; } @@ -304,14 +327,15 @@ static void rx_ft_put(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, mlx5_ttc_fwd_default_dest(ttc, family2tt(family)); /* remove FT */ - rx_destroy(mdev, rx); + rx_destroy(mdev, ipsec, rx, family); out: mutex_unlock(&rx->ft.mutex); } /* IPsec TX flow steering */ -static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx) +static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx, + struct mlx5_ipsec_fs *roce) { struct mlx5_flow_destination dest = {}; struct mlx5_flow_table *ft; @@ -334,8 +358,15 @@ static int tx_create(struct mlx5_core_dev *mdev, struct mlx5e_ipsec_tx *tx) err = ipsec_miss_create(mdev, tx->ft.pol, &tx->pol, &dest); if (err) goto err_pol_miss; + + err = mlx5_ipsec_fs_roce_tx_create(mdev, roce, tx->ft.pol); + if (err) + goto err_roce; return 0; +err_roce: + mlx5_del_flow_rules(tx->pol.rule); + mlx5_destroy_flow_group(tx->pol.group); err_pol_miss: mlx5_destroy_flow_table(tx->ft.pol); err_pol_ft: @@ -353,9 +384,10 @@ static struct mlx5e_ipsec_tx *tx_ft_get(struct mlx5_core_dev *mdev, if (tx->ft.refcnt) goto skip; - err = tx_create(mdev, tx); + err = tx_create(mdev, tx, ipsec->roce); if (err) goto out; + skip: tx->ft.refcnt++; out: @@ -374,6 +406,7 @@ static void tx_ft_put(struct mlx5e_ipsec *ipsec) if (tx->ft.refcnt) goto out; + mlx5_ipsec_fs_roce_tx_destroy(ipsec->roce); mlx5_del_flow_rules(tx->pol.rule); mlx5_destroy_flow_group(tx->pol.group); mlx5_destroy_flow_table(tx->ft.pol); @@ -467,6 +500,27 @@ static void setup_fte_reg_c0(struct mlx5_flow_spec *spec, u32 reqid) misc_parameters_2.metadata_reg_c_0, reqid); } +static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upspec *upspec) +{ + if (upspec->proto != IPPROTO_UDP) + return; + + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, spec->match_criteria, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, ip_protocol, upspec->proto); + if (upspec->dport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport, + upspec->dport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->dport); + } + + if (upspec->sport) { + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport, + upspec->sport_mask); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->sport); + } +} + static int setup_modify_header(struct mlx5_core_dev *mdev, u32 val, u8 dir, struct mlx5_flow_act *flow_act) { @@ -654,6 +708,7 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); switch (attrs->type) { case XFRM_DEV_OFFLOAD_CRYPTO: @@ -728,6 +783,7 @@ static int tx_add_policy(struct mlx5e_ipsec_pol_entry *pol_entry) setup_fte_addr6(spec, attrs->saddr.a6, attrs->daddr.a6); setup_fte_no_frags(spec); + setup_fte_upper_proto_match(spec, &attrs->upspec); err = setup_modify_header(mdev, attrs->reqid, XFRM_DEV_OFFLOAD_OUT, &flow_act); @@ -1008,6 +1064,9 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) if (!ipsec->tx) return; + if (mlx5_ipsec_device_caps(ipsec->mdev) & MLX5_IPSEC_CAP_ROCE) + mlx5_ipsec_fs_roce_cleanup(ipsec->roce); + ipsec_fs_destroy_counters(ipsec); mutex_destroy(&ipsec->tx->ft.mutex); WARN_ON(ipsec->tx->ft.refcnt); @@ -1024,6 +1083,7 @@ void mlx5e_accel_ipsec_fs_cleanup(struct mlx5e_ipsec *ipsec) int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) { + struct mlx5_core_dev *mdev = ipsec->mdev; struct mlx5_flow_namespace *ns; int err = -ENOMEM; @@ -1053,6 +1113,9 @@ int mlx5e_accel_ipsec_fs_init(struct mlx5e_ipsec *ipsec) mutex_init(&ipsec->rx_ipv6->ft.mutex); ipsec->tx->ns = ns; + if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ROCE) + ipsec->roce = mlx5_ipsec_fs_roce_init(mdev); + return 0; err_counters: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 2461462b7b99..5fa7a4c40429 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -4,7 +4,7 @@ #include "mlx5_core.h" #include "en.h" #include "ipsec.h" -#include "lib/mlx5.h" +#include "lib/crypto.h" enum { MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, @@ -42,6 +42,11 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; + if (mlx5_get_roce_state(mdev) && + MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_RX_2_NIC_RX_RDMA && + MLX5_CAP_GEN_2(mdev, flow_table_type_2_type) & MLX5_FT_NIC_TX_RDMA_2_NIC_TX) + caps |= MLX5_IPSEC_CAP_ROCE; + if (!caps) return 0; @@ -92,7 +97,6 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt, lower_32_bits(attrs->hard_packet_limit)); MLX5_SET(ipsec_aso, aso_ctx, hard_lft_arm, 1); - MLX5_SET(ipsec_aso, aso_ctx, remove_flow_enable, 1); } if (attrs->soft_packet_limit != XFRM_INF) { @@ -329,8 +333,7 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) if (attrs->soft_packet_limit != XFRM_INF) if (!MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm) || - !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm) || - !MLX5_GET(ipsec_aso, aso->ctx, remove_flow_enable)) + !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm)) xfrm_state_check_expire(sa_entry->x); unlock: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index da2184c94203..cf704f106b7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -1,18 +1,19 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2019 Mellanox Technologies. +#include <linux/debugfs.h> #include "en.h" #include "lib/mlx5.h" +#include "lib/crypto.h" #include "en_accel/ktls.h" #include "en_accel/ktls_utils.h" #include "en_accel/fs_tcp.h" -int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info, - u32 *p_key_id) +struct mlx5_crypto_dek *mlx5_ktls_create_key(struct mlx5_crypto_dek_pool *dek_pool, + struct tls_crypto_info *crypto_info) { + const void *key; u32 sz_bytes; - void *key; switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: { @@ -32,17 +33,16 @@ int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, break; } default: - return -EINVAL; + return ERR_PTR(-EINVAL); } - return mlx5_create_encryption_key(mdev, key, sz_bytes, - MLX5_ACCEL_OBJ_TLS_KEY, - p_key_id); + return mlx5_crypto_dek_create(dek_pool, key, sz_bytes); } -void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id) +void mlx5_ktls_destroy_key(struct mlx5_crypto_dek_pool *dek_pool, + struct mlx5_crypto_dek *dek) { - mlx5_destroy_encryption_key(mdev, key_id); + mlx5_crypto_dek_destroy(dek_pool, dek); } static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, @@ -177,8 +177,18 @@ void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv) destroy_workqueue(priv->tls->rx_wq); } +static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + tls->debugfs.dfs = debugfs_create_dir("tls", dfs_root); +} + int mlx5e_ktls_init(struct mlx5e_priv *priv) { + struct mlx5_crypto_dek_pool *dek_pool; struct mlx5e_tls *tls; if (!mlx5e_is_ktls_device(priv->mdev)) @@ -187,13 +197,32 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv) tls = kzalloc(sizeof(*tls), GFP_KERNEL); if (!tls) return -ENOMEM; + tls->mdev = priv->mdev; + dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY); + if (IS_ERR(dek_pool)) { + kfree(tls); + return PTR_ERR(dek_pool); + } + tls->dek_pool = dek_pool; priv->tls = tls; + + mlx5e_tls_debugfs_init(tls, priv->dfs_root); + return 0; } void mlx5e_ktls_cleanup(struct mlx5e_priv *priv) { + struct mlx5e_tls *tls = priv->tls; + + if (!mlx5e_is_ktls_device(priv->mdev)) + return; + + debugfs_remove_recursive(tls->debugfs.dfs); + tls->debugfs.dfs = NULL; + + mlx5_crypto_dek_pool_destroy(tls->dek_pool); kfree(priv->tls); priv->tls = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index 1c35045e41fb..f11075e67658 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -4,15 +4,18 @@ #ifndef __MLX5E_KTLS_H__ #define __MLX5E_KTLS_H__ +#include <linux/debugfs.h> #include <linux/tls.h> #include <net/tls.h> #include "en.h" #ifdef CONFIG_MLX5_EN_TLS -int mlx5_ktls_create_key(struct mlx5_core_dev *mdev, - struct tls_crypto_info *crypto_info, - u32 *p_key_id); -void mlx5_ktls_destroy_key(struct mlx5_core_dev *mdev, u32 key_id); +#include "lib/crypto.h" + +struct mlx5_crypto_dek *mlx5_ktls_create_key(struct mlx5_crypto_dek_pool *dek_pool, + struct tls_crypto_info *crypto_info); +void mlx5_ktls_destroy_key(struct mlx5_crypto_dek_pool *dek_pool, + struct mlx5_crypto_dek *dek); static inline bool mlx5e_is_ktls_device(struct mlx5_core_dev *mdev) { @@ -72,10 +75,18 @@ struct mlx5e_tls_sw_stats { atomic64_t rx_tls_del; }; +struct mlx5e_tls_debugfs { + struct dentry *dfs; + struct dentry *dfs_tx; +}; + struct mlx5e_tls { + struct mlx5_core_dev *mdev; struct mlx5e_tls_sw_stats sw_stats; struct workqueue_struct *rx_wq; struct mlx5e_tls_tx_pool *tx_pool; + struct mlx5_crypto_dek_pool *dek_pool; + struct mlx5e_tls_debugfs debugfs; }; int mlx5e_ktls_init(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 3e54834747ce..4be770443b0c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -50,7 +50,7 @@ struct mlx5e_ktls_offload_context_rx { struct mlx5e_tls_sw_stats *sw_stats; struct completion add_ctx; struct mlx5e_tir tir; - u32 key_id; + struct mlx5_crypto_dek *dek; u32 rxq; DECLARE_BITMAP(flags, MLX5E_NUM_PRIV_RX_FLAGS); @@ -148,7 +148,8 @@ post_static_params(struct mlx5e_icosq *sq, wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_rx->crypto_info, mlx5e_tir_get_tirn(&priv_rx->tir), - priv_rx->key_id, priv_rx->resync.seq, false, + mlx5_crypto_dek_get_id(priv_rx->dek), + priv_rx->resync.seq, false, TLS_OFFLOAD_CTX_DIR_RX); wi = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_TLS, @@ -610,20 +611,22 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, struct mlx5e_ktls_offload_context_rx *priv_rx; struct mlx5e_ktls_rx_resync_ctx *resync; struct tls_context *tls_ctx; - struct mlx5_core_dev *mdev; + struct mlx5_crypto_dek *dek; struct mlx5e_priv *priv; int rxq, err; tls_ctx = tls_get_ctx(sk); priv = netdev_priv(netdev); - mdev = priv->mdev; priv_rx = kzalloc(sizeof(*priv_rx), GFP_KERNEL); if (unlikely(!priv_rx)) return -ENOMEM; - err = mlx5_ktls_create_key(mdev, crypto_info, &priv_rx->key_id); - if (err) + dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); + if (IS_ERR(dek)) { + err = PTR_ERR(dek); goto err_create_key; + } + priv_rx->dek = dek; INIT_LIST_HEAD(&priv_rx->list); spin_lock_init(&priv_rx->lock); @@ -673,7 +676,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, err_post_wqes: mlx5e_tir_destroy(&priv_rx->tir); err_create_tir: - mlx5_ktls_destroy_key(mdev, priv_rx->key_id); + mlx5_ktls_destroy_key(priv->tls->dek_pool, priv_rx->dek); err_create_key: kfree(priv_rx); return err; @@ -683,11 +686,9 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) { struct mlx5e_ktls_offload_context_rx *priv_rx; struct mlx5e_ktls_rx_resync_ctx *resync; - struct mlx5_core_dev *mdev; struct mlx5e_priv *priv; priv = netdev_priv(netdev); - mdev = priv->mdev; priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx); set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags); @@ -707,7 +708,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) mlx5e_accel_fs_del_sk(priv_rx->rule.rule); mlx5e_tir_destroy(&priv_rx->tir); - mlx5_ktls_destroy_key(mdev, priv_rx->key_id); + mlx5_ktls_destroy_key(priv->tls->dek_pool, priv_rx->dek); /* priv_rx should normally be freed here, but if there is an outstanding * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is * processed. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index 78072bf93f3f..60b3e08a1028 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2019 Mellanox Technologies. +#include <linux/debugfs.h> #include "en_accel/ktls.h" #include "en_accel/ktls_txrx.h" #include "en_accel/ktls_utils.h" @@ -97,7 +98,7 @@ struct mlx5e_ktls_offload_context_tx { struct tls_offload_context_tx *tx_ctx; struct mlx5_core_dev *mdev; struct mlx5e_tls_sw_stats *sw_stats; - u32 key_id; + struct mlx5_crypto_dek *dek; u8 create_err : 1; }; @@ -456,6 +457,7 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, struct mlx5e_ktls_offload_context_tx *priv_tx; struct mlx5e_tls_tx_pool *pool; struct tls_context *tls_ctx; + struct mlx5_crypto_dek *dek; struct mlx5e_priv *priv; int err; @@ -467,9 +469,12 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, if (IS_ERR(priv_tx)) return PTR_ERR(priv_tx); - err = mlx5_ktls_create_key(pool->mdev, crypto_info, &priv_tx->key_id); - if (err) + dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info); + if (IS_ERR(dek)) { + err = PTR_ERR(dek); goto err_create_key; + } + priv_tx->dek = dek; priv_tx->expected_seq = start_offload_tcp_sn; switch (crypto_info->cipher_type) { @@ -511,7 +516,7 @@ void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx) pool = priv->tls->tx_pool; atomic64_inc(&priv_tx->sw_stats->tx_tls_del); - mlx5_ktls_destroy_key(priv_tx->mdev, priv_tx->key_id); + mlx5_ktls_destroy_key(priv->tls->dek_pool, priv_tx->dek); pool_push(pool, priv_tx); } @@ -550,8 +555,9 @@ post_static_params(struct mlx5e_txqsq *sq, pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs); wqe = MLX5E_TLS_FETCH_SET_STATIC_PARAMS_WQE(sq, pi); mlx5e_ktls_build_static_params(wqe, sq->pc, sq->sqn, &priv_tx->crypto_info, - priv_tx->tisn, priv_tx->key_id, 0, fence, - TLS_OFFLOAD_CTX_DIR_TX); + priv_tx->tisn, + mlx5_crypto_dek_get_id(priv_tx->dek), + 0, fence, TLS_OFFLOAD_CTX_DIR_TX); tx_fill_wi(sq, pi, num_wqebbs, 0, NULL); sq->pc += num_wqebbs; } @@ -886,8 +892,22 @@ err_out: return false; } +static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + tls->debugfs.dfs_tx = debugfs_create_dir("tx", dfs_root); + + debugfs_create_size_t("pool_size", 0400, tls->debugfs.dfs_tx, + &tls->tx_pool->size); +} + int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) { + struct mlx5e_tls *tls = priv->tls; + if (!mlx5e_is_ktls_tx(priv->mdev)) return 0; @@ -895,6 +915,8 @@ int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) if (!priv->tls->tx_pool) return -ENOMEM; + mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs); + return 0; } @@ -903,6 +925,9 @@ void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) if (!mlx5e_is_ktls_tx(priv->mdev)) return; + debugfs_remove_recursive(priv->tls->debugfs.dfs_tx); + priv->tls->debugfs.dfs_tx = NULL; + mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); priv->tls->tx_pool = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 7f6b940830b3..08d0929e8260 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -7,7 +7,7 @@ #include "en.h" #include "lib/aso.h" -#include "lib/mlx5.h" +#include "lib/crypto.h" #include "en_accel/macsec.h" #include "en_accel/macsec_fs.h" diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 68f19324db93..4c9a3210600c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -31,6 +31,7 @@ */ #include "en.h" +#include "lib/crypto.h" /* mlx5e global resources should be placed in this file. * Global resources are common to all the netdevices created on the same nic. @@ -104,6 +105,13 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) INIT_LIST_HEAD(&res->td.tirs_list); mutex_init(&res->td.list_lock); + mdev->mlx5e_res.dek_priv = mlx5_crypto_dek_init(mdev); + if (IS_ERR(mdev->mlx5e_res.dek_priv)) { + mlx5_core_err(mdev, "crypto dek init failed, %ld\n", + PTR_ERR(mdev->mlx5e_res.dek_priv)); + mdev->mlx5e_res.dek_priv = NULL; + } + return 0; err_destroy_mkey: @@ -119,6 +127,8 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) { struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; + mlx5_crypto_dek_cleanup(mdev->mlx5e_res.dek_priv); + mdev->mlx5e_res.dek_priv = NULL; mlx5_free_bfreg(mdev, &res->bfreg); mlx5_core_destroy_mkey(mdev, res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1892ccb889b3..05796f8b1d7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include <linux/debugfs.h> #include <linux/list.h> #include <linux/ip.h> #include <linux/ipv6.h> @@ -67,6 +68,7 @@ struct mlx5e_flow_steering { struct mlx5e_fs_udp *udp; struct mlx5e_fs_any *any; struct mlx5e_ptp_fs *ptp_fs; + struct dentry *dfs_root; }; static int mlx5e_add_l2_flow_rule(struct mlx5e_flow_steering *fs, @@ -104,6 +106,11 @@ static inline int mlx5e_hash_l2(const u8 *addr) return addr[5]; } +struct dentry *mlx5e_fs_get_debugfs_root(struct mlx5e_flow_steering *fs) +{ + return fs->dfs_root; +} + static void mlx5e_add_l2_to_hash(struct hlist_head *hash, const u8 *addr) { struct mlx5e_l2_hash_node *hn; @@ -443,7 +450,7 @@ void mlx5e_enable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) void mlx5e_disable_cvlan_filter(struct mlx5e_flow_steering *fs, bool promisc) { - if (fs->vlan->cvlan_filter_disabled) + if (!fs->vlan || fs->vlan->cvlan_filter_disabled) return; fs->vlan->cvlan_filter_disabled = true; @@ -1429,9 +1436,19 @@ static int mlx5e_fs_ethtool_alloc(struct mlx5e_flow_steering *fs) static void mlx5e_fs_ethtool_free(struct mlx5e_flow_steering *fs) { } #endif +static void mlx5e_fs_debugfs_init(struct mlx5e_flow_steering *fs, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + fs->dfs_root = debugfs_create_dir("fs", dfs_root); +} + struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, struct mlx5_core_dev *mdev, - bool state_destroy) + bool state_destroy, + struct dentry *dfs_root) { struct mlx5e_flow_steering *fs; int err; @@ -1458,6 +1475,8 @@ struct mlx5e_flow_steering *mlx5e_fs_init(const struct mlx5e_profile *profile, if (err) goto err_free_tc; + mlx5e_fs_debugfs_init(fs, dfs_root); + return fs; err_free_tc: mlx5e_fs_tc_free(fs); @@ -1471,6 +1490,7 @@ err: void mlx5e_fs_cleanup(struct mlx5e_flow_steering *fs) { + debugfs_remove_recursive(fs->dfs_root); mlx5e_fs_ethtool_free(fs); mlx5e_fs_tc_free(fs); mlx5e_fs_vlan_free(fs); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index abcc614b6191..53feb0529943 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -35,9 +35,11 @@ #include <net/vxlan.h> #include <net/geneve.h> #include <linux/bpf.h> +#include <linux/debugfs.h> #include <linux/if_bridge.h> #include <linux/filter.h> #include <net/page_pool.h> +#include <net/pkt_sched.h> #include <net/xdp_sock_drv.h> #include "eswitch.h" #include "en.h" @@ -179,17 +181,21 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) static int blocking_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb); + struct mlx5_devlink_trap_event_ctx *trap_event_ctx = data; int err; switch (event) { case MLX5_DRIVER_EVENT_TYPE_TRAP: - err = mlx5e_handle_trap_event(priv, data); + err = mlx5e_handle_trap_event(priv, trap_event_ctx->trap); + if (err) { + trap_event_ctx->err = err; + return NOTIFY_BAD; + } break; default: - netdev_warn(priv->netdev, "Sync event: Unknown event %ld\n", event); - err = -EINVAL; + return NOTIFY_DONE; } - return err; + return NOTIFY_OK; } static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv) @@ -591,7 +597,8 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->ix = c->ix; rq->channel = c; rq->mdev = mdev; - rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); + rq->hw_mtu = + MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en; rq->xdpsq = &c->rq_xdpsq; rq->stats = &c->priv->channel_stats[c->ix]->rq; rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev); @@ -1014,35 +1021,6 @@ int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state) return mlx5e_rq_to_ready(rq, curr_state); } -static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable) -{ - struct mlx5_core_dev *mdev = rq->mdev; - - void *in; - void *rqc; - int inlen; - int err; - - inlen = MLX5_ST_SZ_BYTES(modify_rq_in); - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) - return -ENOMEM; - - rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); - - MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); - MLX5_SET64(modify_rq_in, in, modify_bitmask, - MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS); - MLX5_SET(rqc, rqc, scatter_fcs, enable); - MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); - - err = mlx5_core_modify_rq(mdev, rq->rqn, in); - - kvfree(in); - - return err; -} - static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) { struct mlx5_core_dev *mdev = rq->mdev; @@ -1469,6 +1447,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; sq->mdev = c->mdev; + sq->channel = c; sq->priv = c->priv; sq->ch_ix = c->ix; sq->txq_ix = txq_ix; @@ -2481,8 +2460,6 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c) mlx5e_activate_xsk(c); else mlx5e_activate_rq(&c->rq); - - mlx5e_trigger_napi_icosq(c); } static void mlx5e_deactivate_channel(struct mlx5e_channel *c) @@ -2574,13 +2551,19 @@ err_free: return err; } -static void mlx5e_activate_channels(struct mlx5e_channels *chs) +static void mlx5e_activate_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) { int i; for (i = 0; i < chs->num; i++) mlx5e_activate_channel(chs->c[i]); + if (priv->htb) + mlx5e_qos_activate_queues(priv); + + for (i = 0; i < chs->num; i++) + mlx5e_trigger_napi_icosq(chs->c[i]); + if (chs->ptp) mlx5e_ptp_activate_channel(chs->ptp); } @@ -2887,9 +2870,7 @@ out: void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { mlx5e_build_txq_maps(priv); - mlx5e_activate_channels(&priv->channels); - if (priv->htb) - mlx5e_qos_activate_queues(priv); + mlx5e_activate_channels(priv, &priv->channels); mlx5e_xdp_tx_enable(priv); /* dev_watchdog() wants all TX queues to be started when the carrier is @@ -2997,32 +2978,37 @@ int mlx5e_safe_switch_params(struct mlx5e_priv *priv, mlx5e_fp_preactivate preactivate, void *context, bool reset) { - struct mlx5e_channels new_chs = {}; + struct mlx5e_channels *new_chs; int err; reset &= test_bit(MLX5E_STATE_OPENED, &priv->state); if (!reset) return mlx5e_switch_priv_params(priv, params, preactivate, context); - new_chs.params = *params; + new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL); + if (!new_chs) + return -ENOMEM; + new_chs->params = *params; - mlx5e_selq_prepare_params(&priv->selq, &new_chs.params); + mlx5e_selq_prepare_params(&priv->selq, &new_chs->params); - err = mlx5e_open_channels(priv, &new_chs); + err = mlx5e_open_channels(priv, new_chs); if (err) goto err_cancel_selq; - err = mlx5e_switch_priv_channels(priv, &new_chs, preactivate, context); + err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context); if (err) goto err_close; + kfree(new_chs); return 0; err_close: - mlx5e_close_channels(&new_chs); + mlx5e_close_channels(new_chs); err_cancel_selq: mlx5e_selq_cancel(&priv->selq); + kfree(new_chs); return err; } @@ -3314,20 +3300,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) mlx5e_destroy_tises(priv); } -static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) -{ - int err = 0; - int i; - - for (i = 0; i < chs->num; i++) { - err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable); - if (err) - return err; - } - - return 0; -} - static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) { int err; @@ -3903,41 +3875,27 @@ static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) return mlx5_set_ports_check(mdev, in, sizeof(in)); } +static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx) +{ + struct mlx5_core_dev *mdev = priv->mdev; + bool enable = *(bool *)ctx; + + return mlx5e_set_rx_port_ts(mdev, enable); +} + static int set_feature_rx_fcs(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_channels *chs = &priv->channels; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_params new_params; int err; mutex_lock(&priv->state_lock); - if (enable) { - err = mlx5e_set_rx_port_ts(mdev, false); - if (err) - goto out; - - chs->params.scatter_fcs_en = true; - err = mlx5e_modify_channels_scatter_fcs(chs, true); - if (err) { - chs->params.scatter_fcs_en = false; - mlx5e_set_rx_port_ts(mdev, true); - } - } else { - chs->params.scatter_fcs_en = false; - err = mlx5e_modify_channels_scatter_fcs(chs, false); - if (err) { - chs->params.scatter_fcs_en = true; - goto out; - } - err = mlx5e_set_rx_port_ts(mdev, true); - if (err) { - mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err); - err = 0; - } - } - -out: + new_params = chs->params; + new_params.scatter_fcs_en = enable; + err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap, + &new_params.scatter_fcs_en, true); mutex_unlock(&priv->state_lock); return err; } @@ -4074,6 +4032,10 @@ static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev if (netdev->features & NETIF_F_GRO_HW) netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n"); + features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n"); + return features; } @@ -4779,6 +4741,13 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) if (old_prog) bpf_prog_put(old_prog); + if (reset) { + if (prog) + xdp_features_set_redirect_target(netdev, true); + else + xdp_features_clear_redirect_target(netdev); + } + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) goto unlock; @@ -5056,6 +5025,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, mdev->device); netdev->netdev_ops = &mlx5e_netdev_ops; + netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops; mlx5e_dcbnl_build_netdev(netdev); @@ -5173,6 +5143,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; + netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_XSK_ZEROCOPY | + NETDEV_XDP_ACT_RX_SG; + netdev->priv_flags |= IFF_UNICAST_FLT; netif_set_tso_max_size(netdev, GSO_MAX_SIZE); @@ -5233,7 +5207,8 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5e_timestamp_init(priv); fs = mlx5e_fs_init(priv->profile, mdev, - !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); if (!fs) { err = -ENOMEM; mlx5_core_err(mdev, "FS initialization failed, %d\n", err); @@ -5874,7 +5849,8 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv) static int mlx5e_resume(struct auxiliary_device *adev) { struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); - struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); + struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); + struct mlx5e_priv *priv = mlx5e_dev->priv; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = edev->mdev; int err; @@ -5897,7 +5873,8 @@ static int mlx5e_resume(struct auxiliary_device *adev) static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state) { - struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); + struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); + struct mlx5e_priv *priv = mlx5e_dev->priv; struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; @@ -5915,35 +5892,46 @@ static int mlx5e_probe(struct auxiliary_device *adev, struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev); const struct mlx5e_profile *profile = &mlx5e_nic_profile; struct mlx5_core_dev *mdev = edev->mdev; + struct mlx5e_dev *mlx5e_dev; struct net_device *netdev; pm_message_t state = {}; struct mlx5e_priv *priv; int err; + mlx5e_dev = mlx5e_create_devlink(&adev->dev, mdev); + if (IS_ERR(mlx5e_dev)) + return PTR_ERR(mlx5e_dev); + auxiliary_set_drvdata(adev, mlx5e_dev); + + err = mlx5e_devlink_port_register(mlx5e_dev, mdev); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_devlink_unregister; + } + netdev = mlx5e_create_netdev(mdev, profile); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); - return -ENOMEM; + err = -ENOMEM; + goto err_devlink_port_unregister; } + SET_NETDEV_DEVLINK_PORT(netdev, &mlx5e_dev->dl_port); mlx5e_build_nic_netdev(netdev); priv = netdev_priv(netdev); - auxiliary_set_drvdata(adev, priv); + mlx5e_dev->priv = priv; priv->profile = profile; priv->ppriv = NULL; - err = mlx5e_devlink_port_register(priv); - if (err) { - mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); - goto err_destroy_netdev; - } + priv->dfs_root = debugfs_create_dir("nic", + mlx5_debugfs_get_dev_root(priv->mdev)); err = profile->init(mdev, netdev); if (err) { mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); - goto err_devlink_cleanup; + goto err_destroy_netdev; } err = mlx5e_resume(adev); @@ -5952,7 +5940,6 @@ static int mlx5e_probe(struct auxiliary_device *adev, goto err_profile_cleanup; } - SET_NETDEV_DEVLINK_PORT(netdev, mlx5e_devlink_get_dl_port(priv)); err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); @@ -5960,7 +5947,7 @@ static int mlx5e_probe(struct auxiliary_device *adev, } mlx5e_dcbnl_init_app(priv); - mlx5_uplink_netdev_set(mdev, netdev); + mlx5_core_uplink_netdev_set(mdev, netdev); mlx5e_params_print_info(mdev, &priv->channels.params); return 0; @@ -5968,24 +5955,31 @@ err_resume: mlx5e_suspend(adev, state); err_profile_cleanup: profile->cleanup(priv); -err_devlink_cleanup: - mlx5e_devlink_port_unregister(priv); err_destroy_netdev: + debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); +err_devlink_port_unregister: + mlx5e_devlink_port_unregister(mlx5e_dev); +err_devlink_unregister: + mlx5e_destroy_devlink(mlx5e_dev); return err; } static void mlx5e_remove(struct auxiliary_device *adev) { - struct mlx5e_priv *priv = auxiliary_get_drvdata(adev); + struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev); + struct mlx5e_priv *priv = mlx5e_dev->priv; pm_message_t state = {}; + mlx5_core_uplink_netdev_set(priv->mdev, NULL); mlx5e_dcbnl_delete_app(priv); unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); priv->profile->cleanup(priv); - mlx5e_devlink_port_unregister(priv); + debugfs_remove_recursive(priv->dfs_root); mlx5e_destroy_netdev(priv); + mlx5e_devlink_port_unregister(mlx5e_dev); + mlx5e_destroy_devlink(mlx5e_dev); } static const struct auxiliary_device_id mlx5e_id_table[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 7d90e5b72854..9b9203443085 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -789,8 +789,10 @@ static int mlx5e_init_rep(struct mlx5_core_dev *mdev, { struct mlx5e_priv *priv = netdev_priv(netdev); - priv->fs = mlx5e_fs_init(priv->profile, mdev, - !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + priv->fs = + mlx5e_fs_init(priv->profile, mdev, + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); if (!priv->fs) { netdev_err(priv->netdev, "FS allocation failed\n"); return -ENOMEM; @@ -808,7 +810,8 @@ static int mlx5e_init_ul_rep(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv = netdev_priv(netdev); priv->fs = mlx5e_fs_init(priv->profile, mdev, - !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); if (!priv->fs) { netdev_err(priv->netdev, "FS allocation failed\n"); return -ENOMEM; @@ -1004,8 +1007,23 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) priv->rx_res = NULL; } +static void mlx5e_rep_mpesw_work(struct work_struct *work) +{ + struct mlx5_rep_uplink_priv *uplink_priv = + container_of(work, struct mlx5_rep_uplink_priv, + mpesw_work); + struct mlx5e_rep_priv *rpriv = + container_of(uplink_priv, struct mlx5e_rep_priv, + uplink_priv); + struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); + + rep_vport_rx_rule_destroy(priv); + mlx5e_create_rep_vport_rx_rule(priv); +} + static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; int err; mlx5e_create_q_counters(priv); @@ -1015,12 +1033,17 @@ static int mlx5e_init_ul_rep_rx(struct mlx5e_priv *priv) mlx5e_tc_int_port_init_rep_rx(priv); + INIT_WORK(&rpriv->uplink_priv.mpesw_work, mlx5e_rep_mpesw_work); + out: return err; } static void mlx5e_cleanup_ul_rep_rx(struct mlx5e_priv *priv) { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + cancel_work_sync(&rpriv->uplink_priv.mpesw_work); mlx5e_tc_int_port_cleanup_rep_rx(priv); mlx5e_cleanup_rep_rx(priv); mlx5e_destroy_q_counters(priv); @@ -1129,6 +1152,19 @@ static int mlx5e_update_rep_rx(struct mlx5e_priv *priv) return 0; } +static int mlx5e_rep_event_mpesw(struct mlx5e_priv *priv) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + if (rep->vport != MLX5_VPORT_UPLINK) + return NOTIFY_DONE; + + queue_work(priv->wq, &rpriv->uplink_priv.mpesw_work); + + return NOTIFY_OK; +} + static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb); @@ -1150,6 +1186,8 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event if (event == MLX5_DEV_EVENT_PORT_AFFINITY) return mlx5e_rep_tc_event_port_affinity(priv); + else if (event == MLX5_DEV_EVENT_MULTIPORT_ESW) + return mlx5e_rep_event_mpesw(priv); return NOTIFY_DONE; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index b4e691760da9..dcfad0bf0f45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -100,6 +100,11 @@ struct mlx5_rep_uplink_priv { struct mlx5e_tc_int_port_priv *int_port_priv; struct mlx5e_flow_meters *flow_meters; + + /* tc action stats */ + struct mlx5e_tc_act_stats_handle *action_stats_handle; + + struct work_struct mpesw_work; }; struct mlx5e_rep_priv { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3df455f6b168..3f7b63d6616b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -62,10 +62,12 @@ static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, - u16 cqe_bcnt, u32 head_offset, u32 page_idx); + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, + u32 page_idx); static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, - u16 cqe_bcnt, u32 head_offset, u32 page_idx); + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, + u32 page_idx); static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); @@ -76,11 +78,6 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = { .handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo, }; -static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config) -{ - return config->rx_filter == HWTSTAMP_FILTER_ALL; -} - static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq, u32 cqcc, void *data) { @@ -1559,7 +1556,7 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, u32 frag_size, u16 headroom, u32 cqe_bcnt, u32 metasize) { - struct sk_buff *skb = build_skb(va, frag_size); + struct sk_buff *skb = napi_build_skb(va, frag_size); if (unlikely(!skb)) { rq->stats->buff_alloc_err++; @@ -1575,16 +1572,19 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va, return skb; } -static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom, - u32 len, struct xdp_buff *xdp) +static void mlx5e_fill_mxbuf(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + void *va, u16 headroom, u32 len, + struct mlx5e_xdp_buff *mxbuf) { - xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq); - xdp_prepare_buff(xdp, va, headroom, len, true); + xdp_init_buff(&mxbuf->xdp, rq->buff.frame0_sz, &rq->xdp_rxq); + xdp_prepare_buff(&mxbuf->xdp, va, headroom, len, true); + mxbuf->cqe = cqe; + mxbuf->rq = rq; } static struct sk_buff * mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, - u32 cqe_bcnt) + struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { union mlx5e_alloc_unit *au = wi->au; u16 rx_headroom = rq->buff.headroom; @@ -1606,16 +1606,16 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, prog = rcu_dereference(rq->xdp_prog); if (prog) { - struct xdp_buff xdp; + struct mlx5e_xdp_buff mxbuf; net_prefetchw(va); /* xdp_frame data area */ - mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); - if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf); + if (mlx5e_xdp_handle(rq, prog, &mxbuf)) return NULL; /* page/packet was consumed by XDP */ - rx_headroom = xdp.data - xdp.data_hard_start; - metasize = xdp.data - xdp.data_meta; - cqe_bcnt = xdp.data_end - xdp.data; + rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; + metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; + cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; } frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); @@ -1630,16 +1630,16 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, static struct sk_buff * mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi, - u32 cqe_bcnt) + struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0]; struct mlx5e_wqe_frag_info *head_wi = wi; union mlx5e_alloc_unit *au = wi->au; u16 rx_headroom = rq->buff.headroom; struct skb_shared_info *sinfo; + struct mlx5e_xdp_buff mxbuf; u32 frag_consumed_bytes; struct bpf_prog *prog; - struct xdp_buff xdp; struct sk_buff *skb; dma_addr_t addr; u32 truesize; @@ -1654,8 +1654,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi net_prefetchw(va); /* xdp_frame data area */ net_prefetch(va + rx_headroom); - mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp); - sinfo = xdp_get_shared_info_from_buff(&xdp); + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, frag_consumed_bytes, &mxbuf); + sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp); truesize = 0; cqe_bcnt -= frag_consumed_bytes; @@ -1673,13 +1673,13 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi dma_sync_single_for_cpu(rq->pdev, addr + wi->offset, frag_consumed_bytes, rq->buff.map_dir); - if (!xdp_buff_has_frags(&xdp)) { + if (!xdp_buff_has_frags(&mxbuf.xdp)) { /* Init on the first fragment to avoid cold cache access * when possible. */ sinfo->nr_frags = 0; sinfo->xdp_frags_size = 0; - xdp_buff_set_frags_flag(&xdp); + xdp_buff_set_frags_flag(&mxbuf.xdp); } frag = &sinfo->frags[sinfo->nr_frags++]; @@ -1688,7 +1688,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi skb_frag_size_set(frag, frag_consumed_bytes); if (page_is_pfmemalloc(au->page)) - xdp_buff_set_frag_pfmemalloc(&xdp); + xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp); sinfo->xdp_frags_size += frag_consumed_bytes; truesize += frag_info->frag_stride; @@ -1698,10 +1698,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi wi++; } - au = head_wi->au; - prog = rcu_dereference(rq->xdp_prog); - if (prog && mlx5e_xdp_handle(rq, au->page, prog, &xdp)) { + if (prog && mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { int i; @@ -1711,22 +1709,22 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi return NULL; /* page/packet was consumed by XDP */ } - skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz, - xdp.data - xdp.data_hard_start, - xdp.data_end - xdp.data, - xdp.data - xdp.data_meta); + skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, rq->buff.frame0_sz, + mxbuf.xdp.data - mxbuf.xdp.data_hard_start, + mxbuf.xdp.data_end - mxbuf.xdp.data, + mxbuf.xdp.data - mxbuf.xdp.data_meta); if (unlikely(!skb)) return NULL; - page_ref_inc(au->page); + page_ref_inc(head_wi->au->page); - if (unlikely(xdp_buff_has_frags(&xdp))) { + if (xdp_buff_has_frags(&mxbuf.xdp)) { int i; /* sinfo->nr_frags is reset by build_skb, calculate again. */ xdp_update_skb_shared_info(skb, wi - head_wi - 1, sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc(&xdp)); + xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp)); for (i = 0; i < sinfo->nr_frags; i++) { skb_frag_t *frag = &sinfo->frags[i]; @@ -1777,7 +1775,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, mlx5e_xsk_skb_from_cqe_linear, - rq, wi, cqe_bcnt); + rq, wi, cqe, cqe_bcnt); if (!skb) { /* probably for XDP */ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { @@ -1792,7 +1790,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (mlx5e_cqe_regb_chain(cqe)) - if (!mlx5e_tc_update_skb(cqe, skb)) { + if (!mlx5e_tc_update_skb_nic(cqe, skb)) { dev_kfree_skb_any(skb); goto free_wqe; } @@ -1830,7 +1828,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, - rq, wi, cqe_bcnt); + rq, wi, cqe, cqe_bcnt); if (!skb) { /* probably for XDP */ if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) { @@ -1889,7 +1887,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq, mlx5e_skb_from_cqe_mpwrq_linear, mlx5e_skb_from_cqe_mpwrq_nonlinear, - rq, wi, cqe_bcnt, head_offset, page_idx); + rq, wi, cqe, cqe_bcnt, head_offset, page_idx); if (!skb) goto mpwrq_cqe_out; @@ -1940,7 +1938,8 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq, static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, - u16 cqe_bcnt, u32 head_offset, u32 page_idx) + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, + u32 page_idx) { union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt); @@ -1979,7 +1978,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, - u16 cqe_bcnt, u32 head_offset, u32 page_idx) + struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset, + u32 page_idx) { union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx]; u16 rx_headroom = rq->buff.headroom; @@ -2007,19 +2007,19 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, prog = rcu_dereference(rq->xdp_prog); if (prog) { - struct xdp_buff xdp; + struct mlx5e_xdp_buff mxbuf; net_prefetchw(va); /* xdp_frame data area */ - mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp); - if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) { + mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf); + if (mlx5e_xdp_handle(rq, prog, &mxbuf)) { if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) __set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */ return NULL; /* page/packet was consumed by XDP */ } - rx_headroom = xdp.data - xdp.data_hard_start; - metasize = xdp.data - xdp.data_meta; - cqe_bcnt = xdp.data_end - xdp.data; + rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start; + metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta; + cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data; } frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt); skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize); @@ -2174,8 +2174,8 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq if (likely(head_size)) *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); else - *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset, - page_idx); + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, cqe_bcnt, + data_offset, page_idx); if (unlikely(!*skb)) goto free_hd_entry; @@ -2249,14 +2249,15 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq mlx5e_skb_from_cqe_mpwrq_linear, mlx5e_skb_from_cqe_mpwrq_nonlinear, mlx5e_xsk_skb_from_cqe_mpwrq_linear, - rq, wi, cqe_bcnt, head_offset, page_idx); + rq, wi, cqe, cqe_bcnt, head_offset, + page_idx); if (!skb) goto mpwrq_cqe_out; mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); if (mlx5e_cqe_regb_chain(cqe)) - if (!mlx5e_tc_update_skb(cqe, skb)) { + if (!mlx5e_tc_update_skb_nic(cqe, skb)) { dev_kfree_skb_any(skb); goto mpwrq_cqe_out; } @@ -2494,7 +2495,7 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe, mlx5e_skb_from_cqe_linear, mlx5e_skb_from_cqe_nonlinear, - rq, wi, cqe_bcnt); + rq, wi, cqe, cqe_bcnt); if (!skb) goto wq_free_wqe; @@ -2567,10 +2568,8 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { - struct mlx5e_priv *priv = netdev_priv(rq->netdev); struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; - struct devlink_port *dl_port; struct sk_buff *skb; u32 cqe_bcnt; u16 trap_id; @@ -2586,15 +2585,15 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe goto free_wqe; } - skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt); + skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe, cqe_bcnt); if (!skb) goto free_wqe; mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); skb_push(skb, ETH_HLEN); - dl_port = mlx5e_devlink_get_dl_port(priv); - mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port); + mlx5_devlink_trap_report(rq->mdev, trap_id, skb, + rq->netdev->devlink_port); dev_kfree_skb_any(skb); free_wqe: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 243d5d7750be..e34d9b5fb504 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -43,8 +43,10 @@ #include <net/ipv6_stubs.h> #include <net/bareudp.h> #include <net/bonding.h> +#include <net/dst_metadata.h> #include "en.h" #include "en/tc/post_act.h" +#include "en/tc/act_stats.h" #include "en_rep.h" #include "en/rep/tc.h" #include "en/rep/neigh.h" @@ -71,6 +73,12 @@ #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18) +struct mlx5e_hairpin_params { + struct mlx5_core_dev *mdev; + u32 num_queues; + u32 queue_size; +}; + struct mlx5e_tc_table { /* Protects the dynamic assignment of the t parameter * which is the nic tc root table. @@ -93,10 +101,15 @@ struct mlx5e_tc_table { struct mlx5_tc_ct_priv *ct; struct mapping_ctx *mapping; + struct mlx5e_hairpin_params hairpin_params; + struct dentry *dfs_root; + + /* tc action stats */ + struct mlx5e_tc_act_stats_handle *action_stats_handle; }; struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { - [CHAIN_TO_REG] = { + [MAPPED_OBJ_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, .moffset = 0, .mlen = 16, @@ -123,7 +136,7 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { * into reg_b that is passed to SW since we don't * jump between steering domains. */ - [NIC_CHAIN_TO_REG] = { + [NIC_MAPPED_OBJ_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, .moffset = 0, .mlen = 16, @@ -278,6 +291,24 @@ mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, return err; } +static struct mlx5e_tc_act_stats_handle * +get_act_stats_handle(struct mlx5e_priv *priv) +{ + struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + + if (is_mdev_switchdev_mode(priv->mdev)) { + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + return uplink_priv->action_stats_handle; + } + + return tc->action_stats_handle; +} + struct mlx5e_tc_int_port_priv * mlx5e_get_int_port_priv(struct mlx5e_priv *priv) { @@ -639,36 +670,36 @@ get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) &tc->mod_hdr; } -static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct mlx5e_tc_flow_parse_attr *parse_attr) +int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) { - struct mlx5_modify_hdr *modify_hdr; struct mlx5e_mod_hdr_handle *mh; mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow), mlx5e_get_flow_namespace(flow), - &parse_attr->mod_hdr_acts); + &attr->parse_attr->mod_hdr_acts); if (IS_ERR(mh)) return PTR_ERR(mh); - modify_hdr = mlx5e_mod_hdr_get(mh); - flow->attr->modify_hdr = modify_hdr; - flow->mh = mh; + WARN_ON(attr->modify_hdr); + attr->modify_hdr = mlx5e_mod_hdr_get(mh); + attr->mh = mh; return 0; } -static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) +void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr) { /* flow wasn't fully initialized */ - if (!flow->mh) + if (!attr->mh) return; mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow), - flow->mh); - flow->mh = NULL; + attr->mh); + attr->mh = NULL; } static @@ -1017,6 +1048,136 @@ static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, return 0; } +static int debugfs_hairpin_queues_set(void *data, u64 val) +{ + struct mlx5e_hairpin_params *hp = data; + + if (!val) { + mlx5_core_err(hp->mdev, + "Number of hairpin queues must be > 0\n"); + return -EINVAL; + } + + hp->num_queues = val; + + return 0; +} + +static int debugfs_hairpin_queues_get(void *data, u64 *val) +{ + struct mlx5e_hairpin_params *hp = data; + + *val = hp->num_queues; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_queues, debugfs_hairpin_queues_get, + debugfs_hairpin_queues_set, "%llu\n"); + +static int debugfs_hairpin_queue_size_set(void *data, u64 val) +{ + struct mlx5e_hairpin_params *hp = data; + + if (val > BIT(MLX5_CAP_GEN(hp->mdev, log_max_hairpin_num_packets))) { + mlx5_core_err(hp->mdev, + "Invalid hairpin queue size, must be <= %lu\n", + BIT(MLX5_CAP_GEN(hp->mdev, + log_max_hairpin_num_packets))); + return -EINVAL; + } + + hp->queue_size = roundup_pow_of_two(val); + + return 0; +} + +static int debugfs_hairpin_queue_size_get(void *data, u64 *val) +{ + struct mlx5e_hairpin_params *hp = data; + + *val = hp->queue_size; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_queue_size, + debugfs_hairpin_queue_size_get, + debugfs_hairpin_queue_size_set, "%llu\n"); + +static int debugfs_hairpin_num_active_get(void *data, u64 *val) +{ + struct mlx5e_tc_table *tc = data; + struct mlx5e_hairpin_entry *hpe; + u32 cnt = 0; + u32 bkt; + + mutex_lock(&tc->hairpin_tbl_lock); + hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) + cnt++; + mutex_unlock(&tc->hairpin_tbl_lock); + + *val = cnt; + + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active, + debugfs_hairpin_num_active_get, NULL, "%llu\n"); + +static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv) + +{ + struct mlx5e_tc_table *tc = file->private; + struct mlx5e_hairpin_entry *hpe; + u32 bkt; + + mutex_lock(&tc->hairpin_tbl_lock); + hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist) + seq_printf(file, "Hairpin peer_vhca_id %u prio %u refcnt %u\n", + hpe->peer_vhca_id, hpe->prio, + refcount_read(&hpe->refcnt)); + mutex_unlock(&tc->hairpin_tbl_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump); + +static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc, + struct dentry *dfs_root) +{ + if (IS_ERR_OR_NULL(dfs_root)) + return; + + tc->dfs_root = debugfs_create_dir("tc", dfs_root); + + debugfs_create_file("hairpin_num_queues", 0644, tc->dfs_root, + &tc->hairpin_params, &fops_hairpin_queues); + debugfs_create_file("hairpin_queue_size", 0644, tc->dfs_root, + &tc->hairpin_params, &fops_hairpin_queue_size); + debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc, + &fops_hairpin_num_active); + debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc, + &debugfs_hairpin_table_dump_fops); +} + +static void +mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params, + struct mlx5_core_dev *mdev) +{ + u64 link_speed64; + u32 link_speed; + + hairpin_params->mdev = mdev; + /* set hairpin pair per each 50Gbs share of the link */ + mlx5e_port_max_linkspeed(mdev, &link_speed); + link_speed = max_t(u32, link_speed, 50000); + link_speed64 = link_speed; + do_div(link_speed64, 50000); + hairpin_params->num_queues = link_speed64; + + hairpin_params->queue_size = + BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), + MLX5_CAP_GEN(mdev, log_max_hairpin_num_packets))); +} + static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr, @@ -1028,8 +1189,6 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5_core_dev *peer_mdev; struct mlx5e_hairpin_entry *hpe; struct mlx5e_hairpin *hp; - u64 link_speed64; - u32 link_speed; u8 match_prio; u16 peer_id; int err; @@ -1082,21 +1241,16 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, hash_hairpin_info(peer_id, match_prio)); mutex_unlock(&tc->hairpin_tbl_lock); - params.log_data_size = clamp_t(u8, 16, - MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz), - MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); - params.log_num_packets = params.log_data_size - - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev); - params.log_num_packets = min_t(u8, params.log_num_packets, - MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets)); + params.log_num_packets = ilog2(tc->hairpin_params.queue_size); + params.log_data_size = + clamp_t(u32, + params.log_num_packets + + MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev), + MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz), + MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz)); params.q_counter = priv->q_counter; - /* set hairpin pair per each 50Gbs share of the link */ - mlx5e_port_max_linkspeed(priv->mdev, &link_speed); - link_speed = max_t(u32, link_speed, 50000); - link_speed64 = link_speed; - do_div(link_speed64, 50000); - params.num_channels = link_speed64; + params.num_channels = tc->hairpin_params.num_queues; hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); hpe->hp = hp; @@ -1301,7 +1455,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); + err = mlx5e_tc_attach_mod_hdr(priv, flow, attr); if (err) return err; } @@ -1361,7 +1515,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); - mlx5e_detach_mod_hdr(priv, flow); + mlx5e_tc_detach_mod_hdr(priv, flow, attr); } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) @@ -1451,7 +1605,7 @@ mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, goto err_get_chain; err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, - CHAIN_TO_REG, chain_mapping); + MAPPED_OBJ_TO_REG, chain_mapping); if (err) goto err_reg_set; @@ -1472,7 +1626,7 @@ skip_restore: goto err_offload; } - flow->slow_mh = mh; + flow->attr->slow_mh = mh; flow->chain_mapping = chain_mapping; flow_flag_set(flow, SLOW); @@ -1497,6 +1651,7 @@ err_get_chain: void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, struct mlx5e_tc_flow *flow) { + struct mlx5e_mod_hdr_handle *slow_mh = flow->attr->slow_mh; struct mlx5_flow_attr *slow_attr; slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); @@ -1509,16 +1664,16 @@ void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; slow_attr->esw_attr->split_count = 0; slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH; - if (flow->slow_mh) { + if (slow_mh) { slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - slow_attr->modify_hdr = mlx5e_mod_hdr_get(flow->slow_mh); + slow_attr->modify_hdr = mlx5e_mod_hdr_get(slow_mh); } mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); - if (flow->slow_mh) { - mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), flow->slow_mh); + if (slow_mh) { + mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), slow_mh); mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping); flow->chain_mapping = 0; - flow->slow_mh = NULL; + flow->attr->slow_mh = NULL; } flow_flag_clear(flow, SLOW); kfree(slow_attr); @@ -1629,26 +1784,6 @@ int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *ro return err; } -int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct mlx5_flow_attr *attr) -{ - struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts; - struct mlx5_modify_hdr *mod_hdr; - - mod_hdr = mlx5_modify_header_alloc(priv->mdev, - mlx5e_get_flow_namespace(flow), - mod_hdr_acts->num_actions, - mod_hdr_acts->actions); - if (IS_ERR(mod_hdr)) - return PTR_ERR(mod_hdr); - - WARN_ON(attr->modify_hdr); - attr->modify_hdr = mod_hdr; - - return 0; -} - static int set_encap_dests(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, @@ -1768,10 +1903,8 @@ verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) static int post_process_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, - bool is_post_act_attr, struct netlink_ext_ack *extack) { - struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; bool vf_tun; int err = 0; @@ -1783,34 +1916,22 @@ post_process_attr(struct mlx5e_tc_flow *flow, if (err) goto err_out; - if (mlx5e_is_eswitch_flow(flow)) { - err = mlx5_eswitch_add_vlan_action(esw, attr); + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr); if (err) goto err_out; } - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - if (vf_tun || is_post_act_attr) { - err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr); - if (err) - goto err_out; - } else { - err = mlx5e_attach_mod_hdr(flow->priv, flow, attr->parse_attr); - if (err) - goto err_out; - } - } - if (attr->branch_true && attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr->branch_true); + err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_true); if (err) goto err_out; } if (attr->branch_false && attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr->branch_false); + err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_false); if (err) goto err_out; } @@ -1924,7 +2045,11 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->int_port = int_port; } - err = post_process_attr(flow, attr, false, extack); + err = post_process_attr(flow, attr, extack); + if (err) + goto err_out; + + err = mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv), flow); if (err) goto err_out; @@ -1998,8 +2123,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (mlx5_flow_has_geneve_opt(flow)) mlx5_geneve_tlv_option_del(priv->mdev->geneve); - mlx5_eswitch_del_vlan_action(esw, attr); - if (flow->decap_route) mlx5e_detach_decap_route(priv, flow); @@ -2009,10 +2132,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); - if (vf_tun && attr->modify_hdr) - mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr); - else - mlx5e_detach_mod_hdr(priv, flow); + mlx5e_tc_detach_mod_hdr(priv, flow, attr); } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) @@ -2027,13 +2147,12 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (flow_flag_test(flow, L3_TO_L2_DECAP)) mlx5e_detach_decap(priv, flow); + mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow); + free_flow_post_acts(flow); free_branch_attr(flow, attr->branch_true); free_branch_attr(flow, attr->branch_false); - if (flow->attr->lag.count) - mlx5_lag_del_mpesw_rule(esw->dev); - kvfree(attr->esw_attr->rx_tun_attr); kvfree(attr->parse_attr); kfree(flow->attr); @@ -2492,13 +2611,13 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, err = mlx5e_tc_set_attr_rx_tun(flow, spec); if (err) return err; - } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + } else if (tunnel) { struct mlx5_flow_spec *tmp_spec; tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL); if (!tmp_spec) { - NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec"); - netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec"); + NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec"); + netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec"); return -ENOMEM; } memcpy(tmp_spec, spec, sizeof(*tmp_spec)); @@ -3560,7 +3679,6 @@ out_ok: static bool actions_match_supported_fdb(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { @@ -3609,7 +3727,7 @@ actions_match_supported(struct mlx5e_priv *priv, return false; if (mlx5e_is_eswitch_flow(flow) && - !actions_match_supported_fdb(priv, parse_attr, flow, extack)) + !actions_match_supported_fdb(priv, flow, extack)) return false; return true; @@ -3692,10 +3810,13 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, INIT_LIST_HEAD(&attr2->list); parse_attr->filter_dev = attr->parse_attr->filter_dev; attr2->action = 0; + attr2->counter = NULL; + attr->tc_act_cookies_count = 0; attr2->flags = 0; attr2->parse_attr = parse_attr; attr2->dest_chain = 0; attr2->dest_ft = NULL; + attr2->act_id_restore_rule = NULL; if (ns_type == MLX5_FLOW_NAMESPACE_FDB) { attr2->esw_attr->out_count = 0; @@ -3831,7 +3952,7 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) if (err) goto out_free; - err = post_process_attr(flow, attr, true, extack); + err = post_process_attr(flow, attr, extack); if (err) goto out_free; @@ -3991,6 +4112,11 @@ parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, jump_state->jumping_attr = attr->branch_false; jump_state->jump_count = jump_count; + + /* branching action requires its own counter */ + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + flow_flag_set(flow, USE_ACT_STATS); + return 0; err_branch_false: @@ -4051,6 +4177,8 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, goto out_free; parse_state->actions |= attr->action; + if (!tc_act->stats_action) + attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie; /* Split attr for multi table act if not the last act. */ if (jump_state.jump_target || @@ -4184,12 +4312,7 @@ static bool is_lag_dev(struct mlx5e_priv *priv, static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev) { - if (same_hw_reps(priv, out_dev) && - MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) && - MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up)) - return true; - - return false; + return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev); } bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv, @@ -4360,6 +4483,9 @@ static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow) (is_rep_ingress || act_is_encap)) return true; + if (mlx5_lag_is_mpesw(esw_attr->in_mdev)) + return true; + return false; } @@ -4398,8 +4524,7 @@ mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); - if (attr->modify_hdr) - mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr); + mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr); } } @@ -4492,7 +4617,6 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_core_dev *in_mdev) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@ -4521,33 +4645,21 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - /* always set IP version for indirect table handling */ - flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true); - err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); if (err) goto err_free; - if (flow->attr->lag.count) { - err = mlx5_lag_add_mpesw_rule(esw->dev); - if (err) - goto err_free; - } - err = mlx5e_tc_add_fdb_flow(priv, flow, extack); complete_all(&flow->init_done); if (err) { if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev))) - goto err_lag; + goto err_free; add_unready_flow(flow); } return flow; -err_lag: - if (flow->attr->lag.count) - mlx5_lag_del_mpesw_rule(esw->dev); err_free: mlx5e_flow_put(priv, flow); out: @@ -4579,8 +4691,10 @@ static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, * So packets redirected to uplink use the same mdev of the * original flow and packets redirected from uplink use the * peer mdev. + * In multiport eswitch it's a special case that we need to + * keep the original mdev. */ - if (attr->in_rep->vport == MLX5_VPORT_UPLINK) + if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev)) in_mdev = peer_priv->mdev; else in_mdev = priv->mdev; @@ -4842,6 +4956,12 @@ errout: return err; } +int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act) +{ + return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv), fl_act); +} + int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags) { @@ -4868,11 +4988,15 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, } if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) { - counter = mlx5e_tc_get_counter(flow); - if (!counter) - goto errout; + if (flow_flag_test(flow, USE_ACT_STATS)) { + f->use_act_stats = true; + } else { + counter = mlx5e_tc_get_counter(flow); + if (!counter) + goto errout; - mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + } } /* Under multipath it's possible for one rule to be currently @@ -4888,14 +5012,18 @@ int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, u64 packets2; u64 lastuse2; - counter = mlx5e_tc_get_counter(flow->peer_flow); - if (!counter) - goto no_peer_counter; - mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); - - bytes += bytes2; - packets += packets2; - lastuse = max_t(u64, lastuse, lastuse2); + if (flow_flag_test(flow, USE_ACT_STATS)) { + f->use_act_stats = true; + } else { + counter = mlx5e_tc_get_counter(flow->peer_flow); + if (!counter) + goto no_peer_counter; + mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2); + + bytes += bytes2; + packets += packets2; + lastuse = max_t(u64, lastuse, lastuse2); + } } no_peer_counter: @@ -5220,6 +5348,8 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr, MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act); + mlx5e_hairpin_params_init(&tc->hairpin_params, dev); + tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, &tc->netdevice_nb, @@ -5230,8 +5360,18 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) goto err_reg; } + mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs)); + + tc->action_stats_handle = mlx5e_tc_act_stats_create(); + if (IS_ERR(tc->action_stats_handle)) + goto err_act_stats; + return 0; +err_act_stats: + unregister_netdevice_notifier_dev_net(priv->netdev, + &tc->netdevice_nb, + &tc->netdevice_nn); err_reg: mlx5_tc_ct_clean(tc->ct); mlx5e_tc_post_act_destroy(tc->post_act); @@ -5258,6 +5398,8 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) { struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs); + debugfs_remove_recursive(tc->dfs_root); + if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier_dev_net(priv->netdev, &tc->netdevice_nb, @@ -5279,6 +5421,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) mapping_destroy(tc->mapping); mlx5_chains_destroy(tc->chains); mlx5e_tc_nic_destroy_miss_table(priv); + mlx5e_tc_act_stats_free(tc->action_stats_handle); } int mlx5e_tc_ht_init(struct rhashtable *tc_ht) @@ -5355,8 +5498,14 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) goto err_register_fib_notifier; } + uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create(); + if (IS_ERR(uplink_priv->action_stats_handle)) + goto err_action_counter; + return 0; +err_action_counter: + mlx5e_tc_tun_cleanup(uplink_priv->encap); err_register_fib_notifier: mapping_destroy(uplink_priv->tunnel_enc_opts_mapping); err_enc_opts_mapping: @@ -5383,6 +5532,7 @@ void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv) mlx5_tc_ct_clean(uplink_priv->ct_priv); mlx5e_flow_meters_cleanup(uplink_priv->flow_meters); mlx5e_tc_post_act_destroy(uplink_priv->post_act); + mlx5e_tc_act_stats_free(uplink_priv->action_stats_handle); } int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) @@ -5456,48 +5606,268 @@ int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, } } -bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, - struct sk_buff *skb) +static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) { -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) - u32 chain = 0, chain_tag, reg_b, zone_restore_id; - struct mlx5e_priv *priv = netdev_priv(skb->dev); - struct mlx5_mapped_obj mapped_obj; - struct tc_skb_ext *tc_skb_ext; - struct mlx5e_tc_table *tc; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct tunnel_match_enc_opts enc_opts = {}; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + struct metadata_dst *tun_dst; + struct tunnel_match_key key; + u32 tun_id, enc_opts_id; + struct net_device *dev; int err; - reg_b = be32_to_cpu(cqe->ft_metadata); - tc = mlx5e_fs_get_tc(priv->fs); - chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK; + tun_id = tunnel_id >> ENC_OPTS_BITS; - err = mapping_find(tc->mapping, chain_tag, &mapped_obj); + if (!tun_id) + return true; + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key); if (err) { netdev_dbg(priv->netdev, - "Couldn't find chain for chain tag: %d, err: %d\n", - chain_tag, err); + "Couldn't find tunnel for tun_id: %d, err: %d\n", + tun_id, err); return false; } - if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { - chain = mapped_obj.chain; - tc_skb_ext = tc_skb_ext_alloc(skb); - if (WARN_ON(!tc_skb_ext)) + if (enc_opts_id) { + err = mapping_find(uplink_priv->tunnel_enc_opts_mapping, + enc_opts_id, &enc_opts); + if (err) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n", + enc_opts_id, err); return false; + } + } + + switch (key.enc_control.addr_type) { + case FLOW_DISSECTOR_KEY_IPV4_ADDRS: + tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + break; + case FLOW_DISSECTOR_KEY_IPV6_ADDRS: + tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst, + key.enc_ip.tos, key.enc_ip.ttl, + key.enc_tp.dst, 0, TUNNEL_KEY, + key32_to_tunnel_id(key.enc_key_id.keyid), + enc_opts.key.len); + break; + default: + netdev_dbg(priv->netdev, + "Couldn't restore tunnel, unsupported addr_type: %d\n", + key.enc_control.addr_type); + return false; + } + + if (!tun_dst) { + netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n"); + return false; + } + + tun_dst->u.tun_info.key.tp_src = key.enc_tp.src; + + if (enc_opts.key.len) + ip_tunnel_info_opts_set(&tun_dst->u.tun_info, + enc_opts.key.data, + enc_opts.key.len, + enc_opts.key.dst_opt_type); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); + dev = dev_get_by_index(&init_net, key.filter_ifindex); + if (!dev) { + netdev_dbg(priv->netdev, + "Couldn't find tunnel device with ifindex: %d\n", + key.filter_ifindex); + return false; + } - tc_skb_ext->chain = chain; + /* Set fwd_dev so we do dev_put() after datapath */ + tc_priv->fwd_dev = dev; - zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & - ESW_ZONE_ID_MASK; + skb->dev = dev; - if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, - zone_restore_id)) + return true; +} + +static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv, + struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id, + u32 tunnel_id, struct mlx5e_tc_update_priv *tc_priv) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + struct tc_skb_ext *tc_skb_ext; + u64 act_miss_cookie; + u32 chain; + + chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0; + act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ? + mapped_obj->act_miss_cookie : 0; + if (chain || act_miss_cookie) { + if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id)) return false; - } else { + + tc_skb_ext = tc_skb_ext_alloc(skb); + if (!tc_skb_ext) { + WARN_ON(1); + return false; + } + + if (act_miss_cookie) { + tc_skb_ext->act_miss_cookie = act_miss_cookie; + tc_skb_ext->act_miss = 1; + } else { + tc_skb_ext->chain = chain; + } + } + + if (tc_priv) + return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); + + return true; +} + +static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv) +{ + if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) { + netdev_dbg(priv->netdev, + "Failed to restore tunnel info for sampled packet\n"); + return; + } + mlx5e_tc_sample_skb(skb, mapped_obj); +} + +static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5_mapped_obj *mapped_obj, + struct mlx5e_tc_update_priv *tc_priv, + u32 tunnel_id) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *uplink_rpriv; + bool forward_tx = false; + + /* Tunnel restore takes precedence over int port restore */ + if (tunnel_id) + return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id); + + uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &uplink_rpriv->uplink_priv; + + if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb, + mapped_obj->int_port_metadata, &forward_tx)) { + /* Set fwd_dev for future dev_put */ + tc_priv->fwd_dev = skb->dev; + tc_priv->forward_tx = forward_tx; + + return true; + } + + return false; +} + +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mapping_ctx *mapping_ctx, u32 mapped_obj_id, + struct mlx5_tc_ct_priv *ct_priv, + u32 zone_restore_id, u32 tunnel_id, + struct mlx5e_tc_update_priv *tc_priv) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + struct mlx5_mapped_obj mapped_obj; + int err; + + err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj); + if (err) { + netdev_dbg(skb->dev, + "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n", + mapped_obj_id, err); + return false; + } + + switch (mapped_obj.type) { + case MLX5_MAPPED_OBJ_CHAIN: + case MLX5_MAPPED_OBJ_ACT_MISS: + return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id, + tunnel_id, tc_priv); + case MLX5_MAPPED_OBJ_SAMPLE: + mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv); + tc_priv->skb_done = true; + return true; + case MLX5_MAPPED_OBJ_INT_PORT_METADATA: + return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id); + default: netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type); return false; } -#endif /* CONFIG_NET_TC_SKB_EXT */ - return true; + return false; +} + +bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) +{ + struct mlx5e_priv *priv = netdev_priv(skb->dev); + u32 mapped_obj_id, reg_b, zone_restore_id; + struct mlx5_tc_ct_priv *ct_priv; + struct mapping_ctx *mapping_ctx; + struct mlx5e_tc_table *tc; + + reg_b = be32_to_cpu(cqe->ft_metadata); + tc = mlx5e_fs_get_tc(priv->fs); + mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; + zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & + ESW_ZONE_ID_MASK; + ct_priv = tc->ct; + mapping_ctx = tc->mapping; + + return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id, + 0, NULL); +} + +int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u64 act_miss_cookie, u32 *act_miss_mapping) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_mapped_obj mapped_obj = {}; + struct mapping_ctx *ctx; + int err; + + ctx = esw->offloads.reg_c0_obj_pool; + + mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS; + mapped_obj.act_miss_cookie = act_miss_cookie; + err = mapping_add(ctx, &mapped_obj, act_miss_mapping); + if (err) + return err; + + attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping); + if (IS_ERR(attr->act_id_restore_rule)) + goto err_rule; + + return 0; + +err_rule: + mapping_remove(ctx, *act_miss_mapping); + return err; +} + +void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u32 act_miss_mapping) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mapping_ctx *ctx; + + ctx = esw->offloads.reg_c0_obj_pool; + mlx5_del_flow_rules(attr->act_id_restore_rule); + mapping_remove(ctx, act_miss_mapping); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 50af70ef22f3..adb39e30f90f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -59,6 +59,8 @@ int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags); struct mlx5e_tc_update_priv { struct net_device *fwd_dev; + bool skb_done; + bool forward_tx; }; struct mlx5_nic_flow_attr { @@ -69,35 +71,33 @@ struct mlx5_nic_flow_attr { struct mlx5_flow_attr { u32 action; + unsigned long tc_act_cookies[TCA_ACT_MAX_PRIO]; struct mlx5_fc *counter; struct mlx5_modify_hdr *modify_hdr; + struct mlx5e_mod_hdr_handle *mh; /* attached mod header instance */ + struct mlx5e_mod_hdr_handle *slow_mh; /* attached mod header instance for slow path */ struct mlx5_ct_attr ct_attr; struct mlx5e_sample_attr sample_attr; struct mlx5e_meter_attr meter_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; u32 chain; u16 prio; + u16 tc_act_cookies_count; u32 dest_chain; struct mlx5_flow_table *ft; struct mlx5_flow_table *dest_ft; u8 inner_match_level; u8 outer_match_level; - u8 ip_version; u8 tun_ip_version; int tunnel_id; /* mapped tunnel id */ u32 flags; u32 exe_aso_type; struct list_head list; struct mlx5e_post_act_handle *post_act_handle; - struct { - /* Indicate whether the parsed flow should be counted for lag mode decision - * making - */ - bool count; - } lag; struct mlx5_flow_attr *branch_true; struct mlx5_flow_attr *branch_false; struct mlx5_flow_attr *jumping_attr; + struct mlx5_flow_handle *act_id_restore_rule; /* keep this union last */ union { DECLARE_FLEX_ARRAY(struct mlx5_esw_flow_attr, esw_attr); @@ -134,7 +134,6 @@ struct mlx5_rx_tun_attr { __be32 v4; struct in6_addr v6; } dst_ip; /* Valid if decap_vport is not zero */ - u32 vni; }; #define MLX5E_TC_TABLE_CHAIN_TAG_BITS 16 @@ -197,6 +196,8 @@ int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, struct flow_cls_offload *f, unsigned long flags); +int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv, + struct flow_offload_action *fl_act); int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, struct tc_cls_matchall_offload *f); @@ -227,7 +228,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); void mlx5e_tc_reoffload_flows_work(struct work_struct *work); enum mlx5e_tc_attr_to_reg { - CHAIN_TO_REG, + MAPPED_OBJ_TO_REG, VPORT_TO_REG, TUNNEL_TO_REG, CTSTATE_TO_REG, @@ -236,7 +237,7 @@ enum mlx5e_tc_attr_to_reg { MARK_TO_REG, LABELS_TO_REG, FTEID_TO_REG, - NIC_CHAIN_TO_REG, + NIC_MAPPED_OBJ_TO_REG, NIC_ZONE_RESTORE_TO_REG, PACKET_COLOR_TO_REG, }; @@ -285,9 +286,13 @@ int mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev, enum mlx5e_tc_attr_to_reg type, u32 data); -int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, - struct mlx5_flow_attr *attr); +int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); + +void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr); void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev, struct flow_match_basic *match, bool outer, @@ -366,7 +371,6 @@ struct mlx5e_tc_table *mlx5e_tc_table_alloc(void); void mlx5e_tc_table_free(struct mlx5e_tc_table *tc); static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) { -#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) u32 chain, reg_b; reg_b = be32_to_cpu(cqe->ft_metadata); @@ -377,20 +381,29 @@ static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) chain = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK; if (chain) return true; -#endif return false; } -bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb); +bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb); +bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb, + struct mapping_ctx *mapping_ctx, u32 mapped_obj_id, + struct mlx5_tc_ct_priv *ct_priv, + u32 zone_restore_id, u32 tunnel_id, + struct mlx5e_tc_update_priv *tc_priv); #else /* CONFIG_MLX5_CLS_ACT */ static inline struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { return NULL; } static inline void mlx5e_tc_table_free(struct mlx5e_tc_table *tc) {} static inline bool mlx5e_cqe_regb_chain(struct mlx5_cqe64 *cqe) { return false; } static inline bool -mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb) +mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb) { return true; } #endif +int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u64 act_miss_cookie, u32 *act_miss_mapping); +void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr, + u32 act_miss_mapping); + #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index f7897ddb29c5..df5e780e8e6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -720,21 +720,6 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more) -{ - struct mlx5e_tx_wqe_attr wqe_attr; - struct mlx5e_tx_attr attr; - struct mlx5e_tx_wqe *wqe; - u16 pi; - - mlx5e_sq_xmit_prepare(sq, skb, NULL, &attr); - mlx5e_sq_calc_wqe_attr(skb, &attr, &wqe_attr); - pi = mlx5e_txqsq_get_next_pi(sq, wqe_attr.num_wqebbs); - wqe = MLX5E_TX_FETCH_WQE(sq, pi); - mlx5e_txwqe_build_eseg_csum(sq, skb, NULL, &wqe->eth); - mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, xmit_more); -} - static void mlx5e_tx_wi_dma_unmap(struct mlx5e_txqsq *sq, struct mlx5e_tx_wqe_info *wi, u32 *dma_fifo_cc) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 8f7580fec193..38b32e98f3bd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -629,9 +629,9 @@ static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) union devlink_param_value val; int err; - err = devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, - &val); + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, + &val); if (!err) return val.vu32; mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); @@ -817,9 +817,12 @@ static void comp_irqs_release(struct mlx5_core_dev *dev) static int comp_irqs_request(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; + const struct cpumask *prev = cpu_none_mask; + const struct cpumask *mask; int ncomp_eqs = table->num_comp_eqs; u16 *cpus; int ret; + int cpu; int i; ncomp_eqs = table->num_comp_eqs; @@ -838,8 +841,19 @@ static int comp_irqs_request(struct mlx5_core_dev *dev) ret = -ENOMEM; goto free_irqs; } - for (i = 0; i < ncomp_eqs; i++) - cpus[i] = cpumask_local_spread(i, dev->priv.numa_node); + + i = 0; + rcu_read_lock(); + for_each_numa_hop_mask(mask, dev->priv.numa_node) { + for_each_cpu_andnot(cpu, mask, prev) { + cpus[i] = cpu; + if (++i == ncomp_eqs) + goto spread_done; + } + prev = mask; + } +spread_done: + rcu_read_unlock(); ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs); kfree(cpus); if (ret < 0) @@ -874,9 +888,9 @@ static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev) union devlink_param_value val; int err; - err = devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, - &val); + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, + &val); if (!err) return val.vu32; mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err); @@ -946,11 +960,11 @@ static int vector2eqnirqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq_comp *eq, *n; + struct mlx5_eq_comp *eq; int err = -ENOENT; int i = 0; - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + list_for_each_entry(eq, &table->comp_eqs_list, list) { if (i++ == vector) { if (irqn) *irqn = eq->core.irqn; @@ -985,10 +999,10 @@ struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { struct mlx5_eq_table *table = dev->priv.eq_table; - struct mlx5_eq_comp *eq, *n; + struct mlx5_eq_comp *eq; int i = 0; - list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + list_for_each_entry(eq, &table->comp_eqs_list, list) { if (i++ == vector) break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c index a994e71e05c1..d55775627a47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c @@ -356,8 +356,8 @@ void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, } /* Caller must hold rtnl_lock */ -int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, - u32 metadata) +int mlx5_esw_acl_ingress_vport_metadata_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata) { struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num); int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h index 11d3d3978848..c9f8469e9a47 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h @@ -24,8 +24,8 @@ static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch * /* Eswitch acl ingress external APIs */ int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport); -int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num, - u32 metadata); +int mlx5_esw_acl_ingress_vport_metadata_update(struct mlx5_eswitch *esw, u16 vport_num, + u32 metadata); void mlx5_esw_acl_ingress_vport_drop_rule_destroy(struct mlx5_eswitch *esw, u16 vport_num); int mlx5_esw_acl_ingress_vport_drop_rule_create(struct mlx5_eswitch *esw, u16 vport_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index b176648d1343..3cdcb0e0b20f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1715,7 +1715,7 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 struct mlx5_esw_bridge *bridge; port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); - if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) + if (!port) return; bridge = port->bridge; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c index c9a91158e99c..9959e9fd15a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -16,18 +16,12 @@ #include "lib/fs_chains.h" #include "en/mod_hdr.h" -#define MLX5_ESW_INDIR_TABLE_SIZE 128 -#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX (MLX5_ESW_INDIR_TABLE_SIZE - 2) +#define MLX5_ESW_INDIR_TABLE_SIZE 2 +#define MLX5_ESW_INDIR_TABLE_RECIRC_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 2) #define MLX5_ESW_INDIR_TABLE_FWD_IDX (MLX5_ESW_INDIR_TABLE_SIZE - 1) struct mlx5_esw_indir_table_rule { - struct list_head list; struct mlx5_flow_handle *handle; - union { - __be32 v4; - struct in6_addr v6; - } dst_ip; - u32 vni; struct mlx5_modify_hdr *mh; refcount_t refcnt; }; @@ -38,12 +32,10 @@ struct mlx5_esw_indir_table_entry { struct mlx5_flow_group *recirc_grp; struct mlx5_flow_group *fwd_grp; struct mlx5_flow_handle *fwd_rule; - struct list_head recirc_rules; - int recirc_cnt; + struct mlx5_esw_indir_table_rule *recirc_rule; int fwd_ref; u16 vport; - u8 ip_version; }; struct mlx5_esw_indir_table { @@ -89,7 +81,6 @@ mlx5_esw_indir_table_needed(struct mlx5_eswitch *esw, return esw_attr->in_rep->vport == MLX5_VPORT_UPLINK && vf_sf_vport && esw->dev == dest_mdev && - attr->ip_version && attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE; } @@ -101,27 +92,8 @@ mlx5_esw_indir_table_decap_vport(struct mlx5_flow_attr *attr) return esw_attr->rx_tun_attr ? esw_attr->rx_tun_attr->decap_vport : 0; } -static struct mlx5_esw_indir_table_rule * -mlx5_esw_indir_table_rule_lookup(struct mlx5_esw_indir_table_entry *e, - struct mlx5_esw_flow_attr *attr) -{ - struct mlx5_esw_indir_table_rule *rule; - - list_for_each_entry(rule, &e->recirc_rules, list) - if (rule->vni == attr->rx_tun_attr->vni && - !memcmp(&rule->dst_ip, &attr->rx_tun_attr->dst_ip, - sizeof(attr->rx_tun_attr->dst_ip))) - goto found; - return NULL; - -found: - refcount_inc(&rule->refcnt); - return rule; -} - static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, - struct mlx5_flow_spec *spec, struct mlx5_esw_indir_table_entry *e) { struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; @@ -130,73 +102,18 @@ static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, struct mlx5_flow_destination dest = {}; struct mlx5_esw_indir_table_rule *rule; struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_spec *rule_spec; struct mlx5_flow_handle *handle; int err = 0; u32 data; - rule = mlx5_esw_indir_table_rule_lookup(e, esw_attr); - if (rule) + if (e->recirc_rule) { + refcount_inc(&e->recirc_rule->refcnt); return 0; - - if (e->recirc_cnt == MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX) - return -EINVAL; - - rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); - if (!rule_spec) - return -ENOMEM; - - rule = kzalloc(sizeof(*rule), GFP_KERNEL); - if (!rule) { - err = -ENOMEM; - goto out; - } - - rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS | - MLX5_MATCH_MISC_PARAMETERS_2; - if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) { - MLX5_SET(fte_match_param, rule_spec->match_criteria, - outer_headers.ip_version, 0xf); - MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ip_version, - attr->ip_version); - } else if (attr->ip_version) { - MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, - outer_headers.ethertype); - MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.ethertype, - (attr->ip_version == 4 ? ETH_P_IP : ETH_P_IPV6)); - } else { - err = -EOPNOTSUPP; - goto err_ethertype; } - if (attr->ip_version == 4) { - MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, - outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - MLX5_SET(fte_match_param, rule_spec->match_value, - outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(esw_attr->rx_tun_attr->dst_ip.v4)); - } else if (attr->ip_version == 6) { - int len = sizeof(struct in6_addr); - - memset(MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - 0xff, len); - memcpy(MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &esw_attr->rx_tun_attr->dst_ip.v6, len); - } - - MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, - misc_parameters.vxlan_vni); - MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters.vxlan_vni, - MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni)); - - MLX5_SET(fte_match_param, rule_spec->match_criteria, - misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); - MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(esw_attr->in_mdev->priv.eswitch, - MLX5_VPORT_UPLINK)); + rule = kzalloc(sizeof(*rule), GFP_KERNEL); + if (!rule) + return -ENOMEM; /* Modify flow source to recirculate packet */ data = mlx5_eswitch_get_vport_metadata_for_set(esw, esw_attr->rx_tun_attr->decap_vport); @@ -219,13 +136,14 @@ static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; flow_act.flags = FLOW_ACT_IGNORE_FLOW_LEVEL | FLOW_ACT_NO_APPEND; + flow_act.fg = e->recirc_grp; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = mlx5_chains_get_table(chains, 0, 1, 0); if (IS_ERR(dest.ft)) { err = PTR_ERR(dest.ft); goto err_table; } - handle = mlx5_add_flow_rules(e->ft, rule_spec, &flow_act, &dest, 1); + handle = mlx5_add_flow_rules(e->ft, NULL, &flow_act, &dest, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); goto err_handle; @@ -233,14 +151,10 @@ static int mlx5_esw_indir_table_rule_get(struct mlx5_eswitch *esw, mlx5e_mod_hdr_dealloc(&mod_acts); rule->handle = handle; - rule->vni = esw_attr->rx_tun_attr->vni; rule->mh = flow_act.modify_hdr; - memcpy(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, - sizeof(esw_attr->rx_tun_attr->dst_ip)); refcount_set(&rule->refcnt, 1); - list_add(&rule->list, &e->recirc_rules); - e->recirc_cnt++; - goto out; + e->recirc_rule = rule; + return 0; err_handle: mlx5_chains_put_table(chains, 0, 1, 0); @@ -250,89 +164,44 @@ err_mod_hdr_alloc: err_mod_hdr_regc1: mlx5e_mod_hdr_dealloc(&mod_acts); err_mod_hdr_regc0: -err_ethertype: kfree(rule); -out: - kvfree(rule_spec); return err; } static void mlx5_esw_indir_table_rule_put(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr, struct mlx5_esw_indir_table_entry *e) { - struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; + struct mlx5_esw_indir_table_rule *rule = e->recirc_rule; struct mlx5_fs_chains *chains = esw_chains(esw); - struct mlx5_esw_indir_table_rule *rule; - list_for_each_entry(rule, &e->recirc_rules, list) - if (rule->vni == esw_attr->rx_tun_attr->vni && - !memcmp(&rule->dst_ip, &esw_attr->rx_tun_attr->dst_ip, - sizeof(esw_attr->rx_tun_attr->dst_ip))) - goto found; - - return; + if (!rule) + return; -found: if (!refcount_dec_and_test(&rule->refcnt)) return; mlx5_del_flow_rules(rule->handle); mlx5_chains_put_table(chains, 0, 1, 0); mlx5_modify_header_dealloc(esw->dev, rule->mh); - list_del(&rule->list); kfree(rule); - e->recirc_cnt--; + e->recirc_rule = NULL; } -static int mlx5_create_indir_recirc_group(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr, - struct mlx5_flow_spec *spec, - struct mlx5_esw_indir_table_entry *e) +static int mlx5_create_indir_recirc_group(struct mlx5_esw_indir_table_entry *e) { int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - u32 *in, *match; + u32 *in; in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; - MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS | - MLX5_MATCH_MISC_PARAMETERS | MLX5_MATCH_MISC_PARAMETERS_2); - match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - - if (MLX5_CAP_FLOWTABLE_NIC_RX(esw->dev, ft_field_support.outer_ip_version)) - MLX5_SET(fte_match_param, match, outer_headers.ip_version, 0xf); - else - MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.ethertype); - - if (attr->ip_version == 4) { - MLX5_SET_TO_ONES(fte_match_param, match, - outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - } else if (attr->ip_version == 6) { - memset(MLX5_ADDR_OF(fte_match_param, match, - outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - 0xff, sizeof(struct in6_addr)); - } else { - err = -EOPNOTSUPP; - goto out; - } - - MLX5_SET_TO_ONES(fte_match_param, match, misc_parameters.vxlan_vni); - MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_mask()); MLX5_SET(create_flow_group_in, in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX_MAX); + MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_RECIRC_IDX); e->recirc_grp = mlx5_create_flow_group(e->ft, in); - if (IS_ERR(e->recirc_grp)) { + if (IS_ERR(e->recirc_grp)) err = PTR_ERR(e->recirc_grp); - goto out; - } - INIT_LIST_HEAD(&e->recirc_rules); - e->recirc_cnt = 0; - -out: kvfree(in); return err; } @@ -343,19 +212,12 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, int err = 0, inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_spec *spec; u32 *in; in = kvzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) { - kvfree(in); - return -ENOMEM; - } - /* Hold one entry */ MLX5_SET(create_flow_group_in, in, start_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); MLX5_SET(create_flow_group_in, in, end_flow_index, MLX5_ESW_INDIR_TABLE_FWD_IDX); @@ -366,25 +228,25 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, } flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.fg = e->fwd_grp; dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport.num = e->vport; dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; - e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1); + e->fwd_rule = mlx5_add_flow_rules(e->ft, NULL, &flow_act, &dest, 1); if (IS_ERR(e->fwd_rule)) { mlx5_destroy_flow_group(e->fwd_grp); err = PTR_ERR(e->fwd_rule); } err_out: - kvfree(spec); kvfree(in); return err; } static struct mlx5_esw_indir_table_entry * mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, - struct mlx5_flow_spec *spec, u16 vport, bool decap) + u16 vport, bool decap) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *root_ns; @@ -412,15 +274,14 @@ mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_att } e->ft = ft; e->vport = vport; - e->ip_version = attr->ip_version; e->fwd_ref = !decap; - err = mlx5_create_indir_recirc_group(esw, attr, spec, e); + err = mlx5_create_indir_recirc_group(e); if (err) goto recirc_grp_err; if (decap) { - err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); + err = mlx5_esw_indir_table_rule_get(esw, attr, e); if (err) goto recirc_rule_err; } @@ -430,13 +291,13 @@ mlx5_esw_indir_table_entry_create(struct mlx5_eswitch *esw, struct mlx5_flow_att goto fwd_grp_err; hash_add(esw->fdb_table.offloads.indir->table, &e->hlist, - vport << 16 | attr->ip_version); + vport << 16); return e; fwd_grp_err: if (decap) - mlx5_esw_indir_table_rule_put(esw, attr, e); + mlx5_esw_indir_table_rule_put(esw, e); recirc_rule_err: mlx5_destroy_flow_group(e->recirc_grp); recirc_grp_err: @@ -447,13 +308,13 @@ tbl_err: } static struct mlx5_esw_indir_table_entry * -mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_version) +mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport) { struct mlx5_esw_indir_table_entry *e; - u32 key = vport << 16 | ip_version; + u32 key = vport << 16; hash_for_each_possible(esw->fdb_table.offloads.indir->table, e, hlist, key) - if (e->vport == vport && e->ip_version == ip_version) + if (e->vport == vport) return e; return NULL; @@ -461,24 +322,23 @@ mlx5_esw_indir_table_entry_lookup(struct mlx5_eswitch *esw, u16 vport, u8 ip_ver struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, - struct mlx5_flow_spec *spec, u16 vport, bool decap) { struct mlx5_esw_indir_table_entry *e; int err; mutex_lock(&esw->fdb_table.offloads.indir->lock); - e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); + e = mlx5_esw_indir_table_entry_lookup(esw, vport); if (e) { if (!decap) { e->fwd_ref++; } else { - err = mlx5_esw_indir_table_rule_get(esw, attr, spec, e); + err = mlx5_esw_indir_table_rule_get(esw, attr, e); if (err) goto out_err; } } else { - e = mlx5_esw_indir_table_entry_create(esw, attr, spec, vport, decap); + e = mlx5_esw_indir_table_entry_create(esw, attr, vport, decap); if (IS_ERR(e)) { err = PTR_ERR(e); esw_warn(esw->dev, "Failed to create indirection table, err %d.\n", err); @@ -494,22 +354,21 @@ out_err: } void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr, u16 vport, bool decap) { struct mlx5_esw_indir_table_entry *e; mutex_lock(&esw->fdb_table.offloads.indir->lock); - e = mlx5_esw_indir_table_entry_lookup(esw, vport, attr->ip_version); + e = mlx5_esw_indir_table_entry_lookup(esw, vport); if (!e) goto out; if (!decap) e->fwd_ref--; else - mlx5_esw_indir_table_rule_put(esw, attr, e); + mlx5_esw_indir_table_rule_put(esw, e); - if (e->fwd_ref || e->recirc_cnt) + if (e->fwd_ref || e->recirc_rule) goto out; hash_del(&e->hlist); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h index 21d56b49d14b..036f5b3a341b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.h @@ -13,10 +13,8 @@ mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir); struct mlx5_flow_table *mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, - struct mlx5_flow_spec *spec, u16 vport, bool decap); void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr, u16 vport, bool decap); bool @@ -44,7 +42,6 @@ mlx5_esw_indir_table_destroy(struct mlx5_esw_indir_table *indir) static inline struct mlx5_flow_table * mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, - struct mlx5_flow_spec *spec, u16 vport, bool decap) { return ERR_PTR(-EOPNOTSUPP); @@ -52,7 +49,6 @@ mlx5_esw_indir_table_get(struct mlx5_eswitch *esw, static inline void mlx5_esw_indir_table_put(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr, u16 vport, bool decap) { } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 9daf55e90367..0f052513fefa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1190,9 +1190,9 @@ static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw) union devlink_param_value val; int err; - err = devlink_param_driverinit_value_get(devlink, - MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, - &val); + err = devl_param_driverinit_value_get(devlink, + MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM, + &val); if (!err) { esw->params.large_group_num = val.vu32; } else { @@ -1250,7 +1250,7 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) if (err) return err; } else { - esw_warn(dev, "engress ACL is not supported by FW\n"); + esw_warn(dev, "egress ACL is not supported by FW\n"); } if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { @@ -1406,9 +1406,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); if (clear_vf) mlx5_eswitch_clear_vf_vports_info(esw); - /* If disabling sriov in switchdev mode, free meta rules here - * because it depends on num_vfs. - */ + if (esw->mode == MLX5_ESWITCH_OFFLOADS) { struct devlink *devlink = priv_to_devlink(esw->dev); @@ -1489,7 +1487,7 @@ int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 * void *hca_caps; int err; - if (!mlx5_core_is_ecpf(dev)) { + if (!mlx5_core_is_ecpf(dev) || mlx5_core_is_management_pf(dev)) { *max_sfs = 0; return 0; } @@ -1642,7 +1640,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (err) goto abort; - err = esw_offloads_init_reps(esw); + err = esw_offloads_init(esw); if (err) goto reps_err; @@ -1708,7 +1706,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr); mutex_destroy(&esw->offloads.encap_tbl_lock); mutex_destroy(&esw->offloads.decap_tbl_lock); - esw_offloads_cleanup_reps(esw); + esw_offloads_cleanup(esw); mlx5_esw_vports_cleanup(esw); kfree(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 92644fbb5081..19e9a77c4633 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -52,12 +52,14 @@ enum mlx5_mapped_obj_type { MLX5_MAPPED_OBJ_CHAIN, MLX5_MAPPED_OBJ_SAMPLE, MLX5_MAPPED_OBJ_INT_PORT_METADATA, + MLX5_MAPPED_OBJ_ACT_MISS, }; struct mlx5_mapped_obj { enum mlx5_mapped_obj_type type; union { u32 chain; + u64 act_miss_cookie; struct { u32 group_id; u32 rate; @@ -222,7 +224,6 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_handle **send_to_vport_meta_rules; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; - int vlan_push_pop_refcount; struct mlx5_fs_chains *esw_chains_priv; struct { @@ -346,8 +347,8 @@ struct mlx5_eswitch { void esw_offloads_disable(struct mlx5_eswitch *esw); int esw_offloads_enable(struct mlx5_eswitch *esw); -void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw); -int esw_offloads_init_reps(struct mlx5_eswitch *esw); +void esw_offloads_cleanup(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw); struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num); @@ -520,10 +521,6 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, struct netlink_ext_ack *extack); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); -int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr); -int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr); int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, u16 vport, u16 vlan, u8 qos, u8 set_flags); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c981fa77f439..2a98375a0abf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -179,15 +179,14 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, static int esw_setup_decap_indir(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr, - struct mlx5_flow_spec *spec) + struct mlx5_flow_attr *attr) { struct mlx5_flow_table *ft; if (!(attr->flags & MLX5_ATTR_FLAG_SRC_REWRITE)) return -EOPNOTSUPP; - ft = mlx5_esw_indir_table_get(esw, attr, spec, + ft = mlx5_esw_indir_table_get(esw, attr, mlx5_esw_indir_table_decap_vport(attr), true); return PTR_ERR_OR_ZERO(ft); } @@ -197,7 +196,7 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) { if (mlx5_esw_indir_table_decap_vport(attr)) - mlx5_esw_indir_table_put(esw, attr, + mlx5_esw_indir_table_put(esw, mlx5_esw_indir_table_decap_vport(attr), true); } @@ -235,7 +234,6 @@ esw_setup_ft_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, - struct mlx5_flow_spec *spec, int i) { flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; @@ -243,7 +241,7 @@ esw_setup_ft_dest(struct mlx5_flow_destination *dest, dest[i].ft = attr->dest_ft; if (mlx5_esw_indir_table_decap_vport(attr)) - return esw_setup_decap_indir(esw, attr, spec); + return esw_setup_decap_indir(esw, attr); return 0; } @@ -298,7 +296,7 @@ static void esw_put_dest_tables_loop(struct mlx5_eswitch *esw, struct mlx5_flow_ mlx5_chains_put_table(chains, 0, 1, 0); else if (mlx5_esw_indir_table_needed(esw, attr, esw_attr->dests[i].rep->vport, esw_attr->dests[i].mdev)) - mlx5_esw_indir_table_put(esw, attr, esw_attr->dests[i].rep->vport, + mlx5_esw_indir_table_put(esw, esw_attr->dests[i].rep->vport, false); } @@ -384,7 +382,6 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr, - struct mlx5_flow_spec *spec, bool ignore_flow_lvl, int *i) { @@ -399,7 +396,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; dest[*i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, spec, + dest[*i].ft = mlx5_esw_indir_table_get(esw, attr, esw_attr->dests[j].rep->vport, false); if (IS_ERR(dest[*i].ft)) { err = PTR_ERR(dest[*i].ft); @@ -408,7 +405,7 @@ esw_setup_indir_table(struct mlx5_flow_destination *dest, } if (mlx5_esw_indir_table_decap_vport(attr)) { - err = esw_setup_decap_indir(esw, attr, spec); + err = esw_setup_decap_indir(esw, attr); if (err) goto err_indir_tbl_get; } @@ -446,7 +443,7 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; if (dest[dest_idx].vport.num == MLX5_VPORT_UPLINK && - mlx5_lag_mpesw_is_activated(esw->dev)) + mlx5_lag_is_mpesw(esw->dev)) dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK; } if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) { @@ -511,14 +508,14 @@ esw_setup_dests(struct mlx5_flow_destination *dest, err = esw_setup_mtu_dest(dest, &attr->meter_attr, *i); (*i)++; } else if (esw_is_indir_table(esw, attr)) { - err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i); + err = esw_setup_indir_table(dest, flow_act, esw, attr, true, i); } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) { err = esw_setup_chain_src_port_rewrite(dest, flow_act, esw, chains, attr, i); } else { *i = esw_setup_vport_dests(dest, flow_act, esw, esw_attr, *i); if (attr->dest_ft) { - err = esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); + err = esw_setup_ft_dest(dest, flow_act, esw, attr, *i); (*i)++; } else if (attr->dest_chain) { err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, @@ -582,16 +579,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (esw->mode != MLX5_ESWITCH_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); + if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) + return ERR_PTR(-EOPNOTSUPP); + dest = kcalloc(MLX5_MAX_FLOW_FWD_VPORTS + 1, sizeof(*dest), GFP_KERNEL); if (!dest) return ERR_PTR(-ENOMEM); flow_act.action = attr->action; - /* if per flow vlan pop/push is emulated, don't set that into the firmware */ - if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) - flow_act.action &= ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | - MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); - else if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { + + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH) { flow_act.vlan[0].ethtype = ntohs(esw_attr->vlan_proto[0]); flow_act.vlan[0].vid = esw_attr->vlan_vid[0]; flow_act.vlan[0].prio = esw_attr->vlan_prio[0]; @@ -727,7 +724,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < esw_attr->split_count; i++) { if (esw_is_indir_table(esw, attr)) - err = esw_setup_indir_table(dest, &flow_act, esw, attr, spec, false, &i); + err = esw_setup_indir_table(dest, &flow_act, esw, attr, false, &i); else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr, &i); @@ -832,204 +829,6 @@ mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, __mlx5_eswitch_del_rule(esw, rule, attr, true); } -static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) -{ - struct mlx5_eswitch_rep *rep; - unsigned long i; - int err = 0; - - esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); - mlx5_esw_for_each_host_func_vport(esw, i, rep, esw->esw_funcs.num_vfs) { - if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) - continue; - - err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); - if (err) - goto out; - } - -out: - return err; -} - -static struct mlx5_eswitch_rep * -esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) -{ - struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; - - in_rep = attr->in_rep; - out_rep = attr->dests[0].rep; - - if (push) - vport = in_rep; - else if (pop) - vport = out_rep; - else - vport = in_rep; - - return vport; -} - -static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, - bool push, bool pop, bool fwd) -{ - struct mlx5_eswitch_rep *in_rep, *out_rep; - - if ((push || pop) && !fwd) - goto out_notsupp; - - in_rep = attr->in_rep; - out_rep = attr->dests[0].rep; - - if (push && in_rep->vport == MLX5_VPORT_UPLINK) - goto out_notsupp; - - if (pop && out_rep->vport == MLX5_VPORT_UPLINK) - goto out_notsupp; - - /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ - if (!push && !pop && fwd) - if (in_rep->vlan && out_rep->vport == MLX5_VPORT_UPLINK) - goto out_notsupp; - - /* protects against (1) setting rules with different vlans to push and - * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0) - */ - if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan_vid[0])) - goto out_notsupp; - - return 0; - -out_notsupp: - return -EOPNOTSUPP; -} - -int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr) -{ - struct offloads_fdb *offloads = &esw->fdb_table.offloads; - struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - struct mlx5_eswitch_rep *vport = NULL; - bool push, pop, fwd; - int err = 0; - - /* nop if we're on the vlan push/pop non emulation mode */ - if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) - return 0; - - push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); - pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); - fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && - !attr->dest_chain); - - mutex_lock(&esw->state_lock); - - err = esw_add_vlan_action_check(esw_attr, push, pop, fwd); - if (err) - goto unlock; - - attr->flags &= ~MLX5_ATTR_FLAG_VLAN_HANDLED; - - vport = esw_vlan_action_get_vport(esw_attr, push, pop); - - if (!push && !pop && fwd) { - /* tracks VF --> wire rules without vlan push action */ - if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) { - vport->vlan_refcount++; - attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; - } - - goto unlock; - } - - if (!push && !pop) - goto unlock; - - if (!(offloads->vlan_push_pop_refcount)) { - /* it's the 1st vlan rule, apply global vlan pop policy */ - err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP); - if (err) - goto out; - } - offloads->vlan_push_pop_refcount++; - - if (push) { - if (vport->vlan_refcount) - goto skip_set_push; - - err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, esw_attr->vlan_vid[0], - 0, SET_VLAN_INSERT | SET_VLAN_STRIP); - if (err) - goto out; - vport->vlan = esw_attr->vlan_vid[0]; -skip_set_push: - vport->vlan_refcount++; - } -out: - if (!err) - attr->flags |= MLX5_ATTR_FLAG_VLAN_HANDLED; -unlock: - mutex_unlock(&esw->state_lock); - return err; -} - -int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, - struct mlx5_flow_attr *attr) -{ - struct offloads_fdb *offloads = &esw->fdb_table.offloads; - struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - struct mlx5_eswitch_rep *vport = NULL; - bool push, pop, fwd; - int err = 0; - - /* nop if we're on the vlan push/pop non emulation mode */ - if (mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) - return 0; - - if (!(attr->flags & MLX5_ATTR_FLAG_VLAN_HANDLED)) - return 0; - - push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); - pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); - fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); - - mutex_lock(&esw->state_lock); - - vport = esw_vlan_action_get_vport(esw_attr, push, pop); - - if (!push && !pop && fwd) { - /* tracks VF --> wire rules without vlan push action */ - if (esw_attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) - vport->vlan_refcount--; - - goto out; - } - - if (push) { - vport->vlan_refcount--; - if (vport->vlan_refcount) - goto skip_unset_push; - - vport->vlan = 0; - err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, - 0, 0, SET_VLAN_STRIP); - if (err) - goto out; - } - -skip_unset_push: - offloads->vlan_push_pop_refcount--; - if (offloads->vlan_push_pop_refcount) - goto out; - - /* no more vlan rules, stop global vlan pop policy */ - err = esw_set_global_vlan_pop(esw, 0); - -out: - mutex_unlock(&esw->state_lock); - return err; -} - struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, struct mlx5_eswitch *from_esw, @@ -2406,7 +2205,7 @@ static void mlx5_esw_offloads_rep_cleanup(struct mlx5_eswitch *esw, kfree(rep); } -void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) +static void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) { struct mlx5_eswitch_rep *rep; unsigned long i; @@ -2416,7 +2215,7 @@ void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw) xa_destroy(&esw->offloads.vport_reps); } -int esw_offloads_init_reps(struct mlx5_eswitch *esw) +static int esw_offloads_init_reps(struct mlx5_eswitch *esw) { struct mlx5_vport *vport; unsigned long i; @@ -2436,6 +2235,94 @@ err: return err; } +static int esw_port_metadata_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err = 0; + + down_write(&esw->mode_lock); + if (mlx5_esw_is_fdb_created(esw)) { + err = -EBUSY; + goto done; + } + if (!mlx5_esw_vport_match_metadata_supported(esw)) { + err = -EOPNOTSUPP; + goto done; + } + if (ctx->val.vbool) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + else + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; +done: + up_write(&esw->mode_lock); + return err; +} + +static int esw_port_metadata_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + ctx->val.vbool = mlx5_eswitch_vport_match_metadata_enabled(dev->priv.eswitch); + return 0; +} + +static int esw_port_metadata_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u8 esw_mode; + + esw_mode = mlx5_eswitch_mode(dev); + if (esw_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "E-Switch must either disabled or non switchdev mode"); + return -EBUSY; + } + return 0; +} + +static const struct devlink_param esw_devlink_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA, + "esw_port_metadata", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + esw_port_metadata_get, + esw_port_metadata_set, + esw_port_metadata_validate), +}; + +int esw_offloads_init(struct mlx5_eswitch *esw) +{ + int err; + + err = esw_offloads_init_reps(esw); + if (err) + return err; + + err = devl_params_register(priv_to_devlink(esw->dev), + esw_devlink_params, + ARRAY_SIZE(esw_devlink_params)); + if (err) + goto err_params; + + return 0; + +err_params: + esw_offloads_cleanup_reps(esw); + return err; +} + +void esw_offloads_cleanup(struct mlx5_eswitch *esw) +{ + devl_params_unregister(priv_to_devlink(esw->dev), + esw_devlink_params, + ARRAY_SIZE(esw_devlink_params)); + esw_offloads_cleanup_reps(esw); +} + static void __esw_offloads_unload_rep(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u8 rep_type) { @@ -3575,9 +3462,9 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) if (IS_ERR(esw)) return PTR_ERR(esw); - down_write(&esw->mode_lock); + down_read(&esw->mode_lock); err = esw_mode_to_devlink(esw->mode, mode); - up_write(&esw->mode_lock); + up_read(&esw->mode_lock); return err; } @@ -3675,9 +3562,9 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (IS_ERR(esw)) return PTR_ERR(esw); - down_write(&esw->mode_lock); + down_read(&esw->mode_lock); err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); - up_write(&esw->mode_lock); + up_read(&esw->mode_lock); return err; } @@ -3749,9 +3636,9 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, if (IS_ERR(esw)) return PTR_ERR(esw); - down_write(&esw->mode_lock); + down_read(&esw->mode_lock); *encap = esw->offloads.encap; - up_write(&esw->mode_lock); + up_read(&esw->mode_lock); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/events.c b/drivers/net/ethernet/mellanox/mlx5/core/events.c index 9459e56ee90a..718cf09c28ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/events.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/events.c @@ -424,6 +424,7 @@ int mlx5_blocking_notifier_register(struct mlx5_core_dev *dev, struct notifier_b return blocking_notifier_chain_register(&events->sw_nh, nb); } +EXPORT_SYMBOL(mlx5_blocking_notifier_register); int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb) { @@ -431,6 +432,7 @@ int mlx5_blocking_notifier_unregister(struct mlx5_core_dev *dev, struct notifier return blocking_notifier_chain_unregister(&events->sw_nh, nb); } +EXPORT_SYMBOL(mlx5_blocking_notifier_unregister); int mlx5_blocking_notifier_call_chain(struct mlx5_core_dev *dev, unsigned int event, void *data) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 32d4c967469c..144e59480686 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -272,8 +272,6 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, unsigned int size; int err; - if (ft_attr->max_fte != POOL_NEXT_SIZE) - size = roundup_pow_of_two(ft_attr->max_fte); size = mlx5_ft_pool_get_avail_sz(dev, ft->type, ft_attr->max_fte); if (!size) return -ENOSPC; @@ -412,11 +410,6 @@ static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns, MLX5_CMD_OP_CREATE_FLOW_GROUP); MLX5_SET(create_flow_group_in, in, table_type, ft->type); MLX5_SET(create_flow_group_in, in, table_id, ft->id); - if (ft->vport) { - MLX5_SET(create_flow_group_in, in, vport_number, ft->vport); - MLX5_SET(create_flow_group_in, in, other_vport, 1); - } - MLX5_SET(create_flow_group_in, in, vport_number, ft->vport); MLX5_SET(create_flow_group_in, in, other_vport, !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); @@ -653,6 +646,12 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, id = dst->dest_attr.sampler_id; ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_FLOW_SAMPLER; break; + case MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE: + MLX5_SET(dest_format_struct, in_dests, + destination_table_type, dst->dest_attr.ft->type); + id = dst->dest_attr.ft->id; + ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TABLE_TYPE; + break; default: id = dst->dest_attr.tir_num; ifc_type = MLX5_IFC_FLOW_DESTINATION_TYPE_TIR; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5a85d8c1e797..731acbe22dc7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -34,12 +34,14 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> #include <linux/mlx5/eswitch.h> +#include <net/devlink.h> #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" #include "fs_ft_pool.h" #include "diag/fs_tracepoint.h" +#include "devlink.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -111,8 +113,10 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 8 +/* Promiscuous, Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, + * IPsec RoCE policy + */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 9 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) @@ -219,19 +223,30 @@ static struct init_tree_node egress_root_fs = { }; enum { + RDMA_RX_IPSEC_PRIO, RDMA_RX_COUNTERS_PRIO, RDMA_RX_BYPASS_PRIO, RDMA_RX_KERNEL_PRIO, }; +#define RDMA_RX_IPSEC_NUM_PRIOS 1 +#define RDMA_RX_IPSEC_NUM_LEVELS 2 +#define RDMA_RX_IPSEC_MIN_LEVEL (RDMA_RX_IPSEC_NUM_LEVELS) + #define RDMA_RX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_REGULAR_PRIOS #define RDMA_RX_KERNEL_MIN_LEVEL (RDMA_RX_BYPASS_MIN_LEVEL + 1) #define RDMA_RX_COUNTERS_MIN_LEVEL (RDMA_RX_KERNEL_MIN_LEVEL + 2) static struct init_tree_node rdma_rx_root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 3, + .ar_size = 4, .children = (struct init_tree_node[]) { + [RDMA_RX_IPSEC_PRIO] = + ADD_PRIO(0, RDMA_RX_IPSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_RX_IPSEC_NUM_PRIOS, + RDMA_RX_IPSEC_NUM_LEVELS))), [RDMA_RX_COUNTERS_PRIO] = ADD_PRIO(0, RDMA_RX_COUNTERS_MIN_LEVEL, 0, FS_CHAINING_CAPS, @@ -254,15 +269,20 @@ static struct init_tree_node rdma_rx_root_fs = { enum { RDMA_TX_COUNTERS_PRIO, + RDMA_TX_IPSEC_PRIO, RDMA_TX_BYPASS_PRIO, }; #define RDMA_TX_BYPASS_MIN_LEVEL MLX5_BY_PASS_NUM_PRIOS #define RDMA_TX_COUNTERS_MIN_LEVEL (RDMA_TX_BYPASS_MIN_LEVEL + 1) +#define RDMA_TX_IPSEC_NUM_PRIOS 1 +#define RDMA_TX_IPSEC_PRIO_NUM_LEVELS 1 +#define RDMA_TX_IPSEC_MIN_LEVEL (RDMA_TX_COUNTERS_MIN_LEVEL + RDMA_TX_IPSEC_NUM_PRIOS) + static struct init_tree_node rdma_tx_root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 2, + .ar_size = 3, .children = (struct init_tree_node[]) { [RDMA_TX_COUNTERS_PRIO] = ADD_PRIO(0, RDMA_TX_COUNTERS_MIN_LEVEL, 0, @@ -270,6 +290,13 @@ static struct init_tree_node rdma_tx_root_fs = { ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, ADD_MULTIPLE_PRIO(MLX5_RDMA_TX_NUM_COUNTERS_PRIOS, RDMA_TX_COUNTERS_PRIO_NUM_LEVELS))), + [RDMA_TX_IPSEC_PRIO] = + ADD_PRIO(0, RDMA_TX_IPSEC_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(MLX5_FLOW_TABLE_MISS_ACTION_DEF, + ADD_MULTIPLE_PRIO(RDMA_TX_IPSEC_NUM_PRIOS, + RDMA_TX_IPSEC_PRIO_NUM_LEVELS))), + [RDMA_TX_BYPASS_PRIO] = ADD_PRIO(0, RDMA_TX_BYPASS_MIN_LEVEL, 0, FS_CHAINING_CAPS_RDMA_TX, @@ -449,7 +476,8 @@ static bool is_fwd_dest_type(enum mlx5_flow_destination_type type) type == MLX5_FLOW_DESTINATION_TYPE_VPORT || type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER || type == MLX5_FLOW_DESTINATION_TYPE_TIR || - type == MLX5_FLOW_DESTINATION_TYPE_RANGE; + type == MLX5_FLOW_DESTINATION_TYPE_RANGE || + type == MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; } static bool check_valid_spec(const struct mlx5_flow_spec *spec) @@ -1774,7 +1802,6 @@ static int build_match_list(struct match_list *match_head, { struct rhlist_head *tmp, *list; struct mlx5_flow_group *g; - int err = 0; rcu_read_lock(); INIT_LIST_HEAD(&match_head->list); @@ -1800,7 +1827,7 @@ static int build_match_list(struct match_list *match_head, list_add_tail(&curr_match->list, &match_head->list); } rcu_read_unlock(); - return err; + return 0; } static u64 matched_fgs_get_version(struct list_head *match_head) @@ -2367,6 +2394,14 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, root_ns = steering->rdma_tx_root_ns; prio = RDMA_TX_COUNTERS_PRIO; break; + case MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC: + root_ns = steering->rdma_rx_root_ns; + prio = RDMA_RX_IPSEC_PRIO; + break; + case MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC: + root_ns = steering->rdma_tx_root_ns; + prio = RDMA_TX_IPSEC_PRIO; + break; default: /* Must be NIC RX */ WARN_ON(!is_nic_rx_ns(type)); root_ns = steering->root_ns; @@ -3143,6 +3178,78 @@ cleanup: return err; } +static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + char *value = val.vstr; + int err = 0; + + if (!strcmp(value, "dmfs")) { + return 0; + } else if (!strcmp(value, "smfs")) { + u8 eswitch_mode; + bool smfs_cap; + + eswitch_mode = mlx5_eswitch_mode(dev); + smfs_cap = mlx5_fs_dr_is_supported(dev); + + if (!smfs_cap) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported by current device"); + } + + else if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Software managed steering is not supported when eswitch offloads enabled."); + err = -EOPNOTSUPP; + } + } else { + NL_SET_ERR_MSG_MOD(extack, + "Bad parameter: supported values are [\"dmfs\", \"smfs\"]"); + err = -EINVAL; + } + + return err; +} + +static int mlx5_fs_mode_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + enum mlx5_flow_steering_mode mode; + + if (!strcmp(ctx->val.vstr, "smfs")) + mode = MLX5_FLOW_STEERING_MODE_SMFS; + else + mode = MLX5_FLOW_STEERING_MODE_DMFS; + dev->priv.steering->mode = mode; + + return 0; +} + +static int mlx5_fs_mode_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) + strcpy(ctx->val.vstr, "smfs"); + else + strcpy(ctx->val.vstr, "dmfs"); + return 0; +} + +static const struct devlink_param mlx5_fs_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE, + "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_fs_mode_get, mlx5_fs_mode_set, + mlx5_fs_mode_validate), +}; + void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; @@ -3155,12 +3262,20 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) cleanup_root_ns(steering->rdma_rx_root_ns); cleanup_root_ns(steering->rdma_tx_root_ns); cleanup_root_ns(steering->egress_root_ns); + + devl_params_unregister(priv_to_devlink(dev), mlx5_fs_params, + ARRAY_SIZE(mlx5_fs_params)); } int mlx5_fs_core_init(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; - int err = 0; + int err; + + err = devl_params_register(priv_to_devlink(dev), mlx5_fs_params, + ARRAY_SIZE(mlx5_fs_params)); + if (err) + return err; if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && (MLX5_CAP_GEN(dev, nic_flow_table))) || diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index b406e0367af6..17fe30a4c06c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -504,6 +504,16 @@ void mlx5_fc_query_cached(struct mlx5_fc *counter, counter->lastpackets = c.packets; } +void mlx5_fc_query_cached_raw(struct mlx5_fc *counter, + u64 *bytes, u64 *packets, u64 *lastuse) +{ + struct mlx5_fc_cache c = counter->cache; + + *bytes = c.bytes; + *packets = c.packets; + *lastuse = c.lastuse; +} + void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, struct delayed_work *dwork, unsigned long delay) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index f34e758a2f1f..7bb7be01225a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -267,6 +267,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, crypto)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_CRYPTO); + if (err) + return err; + } + if (MLX5_CAP_GEN(dev, shampo)) { err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_SHAMPO); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 1e46f9afa40e..4c2dad9d7cfb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */ +#include <devlink.h> + #include "fw_reset.h" #include "diag/fw_tracer.h" #include "lib/tout.h" @@ -28,21 +30,32 @@ struct mlx5_fw_reset { int ret; }; -void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable) +static int mlx5_fw_reset_enable_remote_dev_reset_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) { - struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_fw_reset *fw_reset; - if (enable) + fw_reset = dev->priv.fw_reset; + + if (ctx->val.vbool) clear_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); else set_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); + return 0; } -bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev) +static int mlx5_fw_reset_enable_remote_dev_reset_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) { - struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_fw_reset *fw_reset; + + fw_reset = dev->priv.fw_reset; - return !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags); + ctx->val.vbool = !test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, + &fw_reset->reset_flags); + return 0; } static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level, @@ -150,11 +163,11 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - mlx5_unload_one(dev); + mlx5_unload_one(dev, false); if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else - mlx5_load_one(dev, false); + mlx5_load_one(dev); devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); @@ -358,6 +371,7 @@ static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev) mlx5_core_err(dev, "PCI link not ready (0x%04x) after %llu ms\n", reg16, mlx5_tout_ms(dev, PCI_TOGGLE)); err = -ETIMEDOUT; + goto restore; } do { @@ -484,7 +498,7 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev) } err = fw_reset->ret; if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) { - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, false); mlx5_load_one_devl_locked(dev, false); } out: @@ -517,9 +531,16 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) cancel_work_sync(&fw_reset->reset_abort_work); } +static const struct devlink_param mlx5_fw_reset_devlink_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_REMOTE_DEV_RESET, BIT(DEVLINK_PARAM_CMODE_RUNTIME), + mlx5_fw_reset_enable_remote_dev_reset_get, + mlx5_fw_reset_enable_remote_dev_reset_set, NULL), +}; + int mlx5_fw_reset_init(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = kzalloc(sizeof(*fw_reset), GFP_KERNEL); + int err; if (!fw_reset) return -ENOMEM; @@ -532,6 +553,15 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev) fw_reset->dev = dev; dev->priv.fw_reset = fw_reset; + err = devl_params_register(priv_to_devlink(dev), + mlx5_fw_reset_devlink_params, + ARRAY_SIZE(mlx5_fw_reset_devlink_params)); + if (err) { + destroy_workqueue(fw_reset->wq); + kfree(fw_reset); + return err; + } + INIT_WORK(&fw_reset->fw_live_patch_work, mlx5_fw_live_patch_event); INIT_WORK(&fw_reset->reset_request_work, mlx5_sync_reset_request_event); INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work); @@ -546,6 +576,9 @@ void mlx5_fw_reset_cleanup(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + devl_params_unregister(priv_to_devlink(dev), + mlx5_fw_reset_devlink_params, + ARRAY_SIZE(mlx5_fw_reset_devlink_params)); destroy_workqueue(fw_reset->wq); kfree(dev->priv.fw_reset); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index dc141c7e641a..c57465595f7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -6,8 +6,6 @@ #include "mlx5_core.h" -void mlx5_fw_reset_enable_remote_dev_reset_set(struct mlx5_core_dev *dev, bool enable); -bool mlx5_fw_reset_enable_remote_dev_reset_get(struct mlx5_core_dev *dev); int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type); int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, struct netlink_ext_ack *extack); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 879555ba847d..f9438d4e43ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -62,7 +62,7 @@ enum { }; enum { - MLX5_DROP_NEW_HEALTH_WORK, + MLX5_DROP_HEALTH_WORK, }; enum { @@ -675,7 +675,7 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) devlink = priv_to_devlink(dev); mutex_lock(&dev->intf_state_mutex); - if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) { + if (test_bit(MLX5_DROP_HEALTH_WORK, &health->flags)) { mlx5_core_err(dev, "health works are not permitted at this stage\n"); mutex_unlock(&dev->intf_state_mutex); return; @@ -699,7 +699,7 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) * requests from the kernel. */ mlx5_core_err(dev, "Driver is in error state. Unloading\n"); - mlx5_unload_one(dev); + mlx5_unload_one(dev, false); } } @@ -771,14 +771,8 @@ static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev) void mlx5_trigger_health_work(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - unsigned long flags; - spin_lock_irqsave(&health->wq_lock, flags); - if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) - queue_work(health->wq, &health->fatal_report_work); - else - mlx5_core_err(dev, "new health works are not permitted at this stage\n"); - spin_unlock_irqrestore(&health->wq_lock, flags); + queue_work(health->wq, &health->fatal_report_work); } #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60) @@ -858,7 +852,7 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) timer_setup(&health->timer, poll_health, 0); health->fatal_error = MLX5_SENSOR_NO_ERR; - clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); + clear_bit(MLX5_DROP_HEALTH_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; @@ -869,13 +863,9 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) { struct mlx5_core_health *health = &dev->priv.health; - unsigned long flags; - if (disable_health) { - spin_lock_irqsave(&health->wq_lock, flags); - set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - spin_unlock_irqrestore(&health->wq_lock, flags); - } + if (disable_health) + set_bit(MLX5_DROP_HEALTH_WORK, &health->flags); del_timer_sync(&health->timer); } @@ -891,11 +881,8 @@ void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev) void mlx5_drain_health_wq(struct mlx5_core_dev *dev) { struct mlx5_core_health *health = &dev->priv.health; - unsigned long flags; - spin_lock_irqsave(&health->wq_lock, flags); - set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); - spin_unlock_irqrestore(&health->wq_lock, flags); + set_bit(MLX5_DROP_HEALTH_WORK, &health->flags); cancel_delayed_work_sync(&health->update_fw_log_ts_work); cancel_work_sync(&health->report_work); cancel_work_sync(&health->fatal_report_work); @@ -928,7 +915,6 @@ int mlx5_health_init(struct mlx5_core_dev *dev) kfree(name); if (!health->wq) goto out_err; - spin_lock_init(&health->wq_lock); INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work); INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work); INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index eff92dc0927c..779d92b762d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -172,6 +172,7 @@ enum mlx5_ptys_rate { MLX5_PTYS_RATE_EDR = 1 << 5, MLX5_PTYS_RATE_HDR = 1 << 6, MLX5_PTYS_RATE_NDR = 1 << 7, + MLX5_PTYS_RATE_XDR = 1 << 8, }; static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) @@ -185,20 +186,21 @@ static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) case MLX5_PTYS_RATE_EDR: return 25000; case MLX5_PTYS_RATE_HDR: return 50000; case MLX5_PTYS_RATE_NDR: return 100000; + case MLX5_PTYS_RATE_XDR: return 200000; default: return -1; } } -static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) +static u32 mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) { int rate, width; rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper); if (rate < 0) - return -EINVAL; + return SPEED_UNKNOWN; width = mlx5_ptys_width_enum_to_int(ib_link_width_oper); if (width < 0) - return -EINVAL; + return SPEED_UNKNOWN; return rate * width; } @@ -221,16 +223,13 @@ static int mlx5i_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper); - if (speed < 0) - return -EINVAL; + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = speed == SPEED_UNKNOWN ? DUPLEX_UNKNOWN : DUPLEX_FULL; - link_ksettings->base.duplex = DUPLEX_FULL; link_ksettings->base.port = PORT_OTHER; link_ksettings->base.autoneg = AUTONEG_DISABLE; - link_ksettings->base.speed = speed; - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 911cf4d23964..c2a4f86bc890 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -412,7 +412,8 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) int err; priv->fs = mlx5e_fs_init(priv->profile, mdev, - !test_bit(MLX5E_STATE_DESTROYING, &priv->state)); + !test_bit(MLX5E_STATE_DESTROYING, &priv->state), + priv->dfs_root); if (!priv->fs) { netdev_err(priv->netdev, "FS allocation failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c index b8feaf0f5c4c..f4b777d4e108 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/debugfs.c @@ -22,7 +22,7 @@ static int type_show(struct seq_file *file, void *priv) struct mlx5_lag *ldev; char *mode = NULL; - ldev = dev->priv.lag; + ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); if (__mlx5_lag_is_active(ldev)) mode = get_str_mode_type(ldev); @@ -41,7 +41,7 @@ static int port_sel_mode_show(struct seq_file *file, void *priv) int ret = 0; char *mode; - ldev = dev->priv.lag; + ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); if (__mlx5_lag_is_active(ldev)) mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags); @@ -61,7 +61,7 @@ static int state_show(struct seq_file *file, void *priv) struct mlx5_lag *ldev; bool active; - ldev = dev->priv.lag; + ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); active = __mlx5_lag_is_active(ldev); mutex_unlock(&ldev->lock); @@ -77,7 +77,7 @@ static int flags_show(struct seq_file *file, void *priv) bool shared_fdb; bool lag_active; - ldev = dev->priv.lag; + ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); lag_active = __mlx5_lag_is_active(ldev); if (!lag_active) @@ -108,7 +108,7 @@ static int mapping_show(struct seq_file *file, void *priv) int num_ports; int i; - ldev = dev->priv.lag; + ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); lag_active = __mlx5_lag_is_active(ldev); if (lag_active) { @@ -142,7 +142,7 @@ static int members_show(struct seq_file *file, void *priv) struct mlx5_lag *ldev; int i; - ldev = dev->priv.lag; + ldev = mlx5_lag_dev(dev); mutex_lock(&ldev->lock); for (i = 0; i < ldev->ports; i++) { if (!ldev->pf[i].dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index ad32b80e8501..5d331b940f4d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -230,7 +230,6 @@ static void mlx5_ldev_free(struct kref *ref) mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); destroy_workqueue(ldev->wq); - mlx5_lag_mpesw_cleanup(ldev); mutex_destroy(&ldev->lock); kfree(ldev); } @@ -276,7 +275,6 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", err); - mlx5_lag_mpesw_init(ldev); ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports); ldev->buckets = 1; @@ -646,7 +644,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, return 0; } -static int mlx5_deactivate_lag(struct mlx5_lag *ldev) +int mlx5_deactivate_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; @@ -688,7 +686,7 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) } #define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2 -static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) { #ifdef CONFIG_MLX5_ESWITCH struct mlx5_core_dev *dev; @@ -723,7 +721,7 @@ static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) return true; } -static void mlx5_lag_add_devices(struct mlx5_lag *ldev) +void mlx5_lag_add_devices(struct mlx5_lag *ldev) { int i; @@ -740,7 +738,7 @@ static void mlx5_lag_add_devices(struct mlx5_lag *ldev) } } -static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) +void mlx5_lag_remove_devices(struct mlx5_lag *ldev) { int i; @@ -1187,7 +1185,7 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) tmp_dev = mlx5_get_next_phys_dev_lag(dev); if (tmp_dev) - ldev = tmp_dev->priv.lag; + ldev = mlx5_lag_dev(tmp_dev); if (!ldev) { ldev = mlx5_lag_dev_alloc(dev); @@ -1386,8 +1384,7 @@ bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) spin_lock_irqsave(&lag_lock, flags); ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_sriov(ldev) && - test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); + res = ldev && test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags); spin_unlock_irqrestore(&lag_lock, flags); return res; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index f30ac2de639f..bc1f1dd3e283 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -50,19 +50,6 @@ struct lag_tracker { enum netdev_lag_hash hash_type; }; -enum mpesw_op { - MLX5_MPESW_OP_ENABLE, - MLX5_MPESW_OP_DISABLE, -}; - -struct mlx5_mpesw_work_st { - struct work_struct work; - struct mlx5_lag *lag; - enum mpesw_op op; - struct completion comp; - int result; -}; - /* LAG data of a ConnectX card. * It serves both its phys functions. */ @@ -115,6 +102,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev) return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags); } +bool mlx5_lag_check_prereq(struct mlx5_lag *ldev); void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); int mlx5_activate_lag(struct mlx5_lag *ldev, @@ -124,8 +112,6 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev); bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev); -void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev); -int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev); char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags); void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, @@ -134,5 +120,8 @@ void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports, void mlx5_ldev_add_debugfs(struct mlx5_core_dev *dev); void mlx5_ldev_remove_debugfs(struct dentry *dbg); void mlx5_disable_lag(struct mlx5_lag *ldev); +void mlx5_lag_remove_devices(struct mlx5_lag *ldev); +int mlx5_deactivate_lag(struct mlx5_lag *ldev); +void mlx5_lag_add_devices(struct mlx5_lag *ldev); #endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index d9fcb9ed726f..d85a8dfc153d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -28,13 +28,9 @@ static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) { - struct mlx5_lag *ldev; - bool res; - - ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_multipath(ldev); + struct mlx5_lag *ldev = mlx5_lag_dev(dev); - return res; + return ldev && __mlx5_lag_is_multipath(ldev); } /** diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c index c17e8f1ec914..0c0ef600f643 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c @@ -5,39 +5,121 @@ #include <net/nexthop.h> #include "lag/lag.h" #include "eswitch.h" +#include "esw/acl/ofld.h" #include "lib/mlx5.h" -static int add_mpesw_rule(struct mlx5_lag *ldev) +static void mlx5_mpesw_metadata_cleanup(struct mlx5_lag *ldev) { - struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev; - int err; + struct mlx5_core_dev *dev; + struct mlx5_eswitch *esw; + u32 pf_metadata; + int i; + + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + esw = dev->priv.eswitch; + pf_metadata = ldev->lag_mpesw.pf_metadata[i]; + if (!pf_metadata) + continue; + mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK, 0); + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, + (void *)0); + mlx5_esw_match_metadata_free(esw, pf_metadata); + ldev->lag_mpesw.pf_metadata[i] = 0; + } +} - if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1) - return 0; +static int mlx5_mpesw_metadata_set(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev; + struct mlx5_eswitch *esw; + u32 pf_metadata; + int i, err; + + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + esw = dev->priv.eswitch; + pf_metadata = mlx5_esw_match_metadata_alloc(esw); + if (!pf_metadata) { + err = -ENOSPC; + goto err_metadata; + } + + ldev->lag_mpesw.pf_metadata[i] = pf_metadata; + err = mlx5_esw_acl_ingress_vport_metadata_update(esw, MLX5_VPORT_UPLINK, + pf_metadata); + if (err) + goto err_metadata; + } - if (ldev->mode != MLX5_LAG_MODE_NONE) { - err = -EINVAL; - goto out_err; + for (i = 0; i < ldev->ports; i++) { + dev = ldev->pf[i].dev; + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_MULTIPORT_ESW, + (void *)0); } - err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false); + return 0; + +err_metadata: + mlx5_mpesw_metadata_cleanup(ldev); + return err; +} + +static int enable_mpesw(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + int err; + + if (ldev->mode != MLX5_LAG_MODE_NONE) + return -EINVAL; + + if (mlx5_eswitch_mode(dev0) != MLX5_ESWITCH_OFFLOADS || + !MLX5_CAP_PORT_SELECTION(dev0, port_select_flow_table) || + !MLX5_CAP_GEN(dev0, create_lag_when_not_master_up) || + !mlx5_lag_check_prereq(ldev)) + return -EOPNOTSUPP; + + err = mlx5_mpesw_metadata_set(ldev); + if (err) + return err; + + mlx5_lag_remove_devices(ldev); + + err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, true); if (err) { - mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err); - goto out_err; + mlx5_core_warn(dev0, "Failed to create LAG in MPESW mode (%d)\n", err); + goto err_add_devices; } + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!err) + err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + if (err) + goto err_rescan_drivers; + return 0; -out_err: - atomic_dec(&ldev->lag_mpesw.mpesw_rule_count); +err_rescan_drivers: + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); +err_add_devices: + mlx5_lag_add_devices(ldev); + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + mlx5_mpesw_metadata_cleanup(ldev); return err; } -static void del_mpesw_rule(struct mlx5_lag *ldev) +static void disable_mpesw(struct mlx5_lag *ldev) { - if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) && - ldev->mode == MLX5_LAG_MODE_MPESW) + if (ldev->mode == MLX5_LAG_MODE_MPESW) { + mlx5_mpesw_metadata_cleanup(ldev); mlx5_disable_lag(ldev); + } } static void mlx5_mpesw_work(struct work_struct *work) @@ -45,20 +127,27 @@ static void mlx5_mpesw_work(struct work_struct *work) struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work); struct mlx5_lag *ldev = mpesww->lag; + mlx5_dev_list_lock(); mutex_lock(&ldev->lock); + if (ldev->mode_changes_in_progress) { + mpesww->result = -EAGAIN; + goto unlock; + } + if (mpesww->op == MLX5_MPESW_OP_ENABLE) - mpesww->result = add_mpesw_rule(ldev); + mpesww->result = enable_mpesw(ldev); else if (mpesww->op == MLX5_MPESW_OP_DISABLE) - del_mpesw_rule(ldev); + disable_mpesw(ldev); +unlock: mutex_unlock(&ldev->lock); - + mlx5_dev_list_unlock(); complete(&mpesww->comp); } static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev, enum mpesw_op op) { - struct mlx5_lag *ldev = dev->priv.lag; + struct mlx5_lag *ldev = mlx5_lag_dev(dev); struct mlx5_mpesw_work_st *work; int err = 0; @@ -86,43 +175,36 @@ out: return err; } -void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev) +void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev) { mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE); } -int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev) +int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev) { return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE); } -int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev) +int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev, + struct net_device *out_dev, + struct netlink_ext_ack *extack) { - struct mlx5_lag *ldev = mdev->priv.lag; + struct mlx5_lag *ldev = mlx5_lag_dev(mdev); if (!netif_is_bond_master(out_dev) || !ldev) return 0; - if (ldev->mode == MLX5_LAG_MODE_MPESW) - return -EOPNOTSUPP; - - return 0; -} - -bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev) -{ - bool ret; + if (ldev->mode != MLX5_LAG_MODE_MPESW) + return 0; - ret = dev->priv.lag && dev->priv.lag->mode == MLX5_LAG_MODE_MPESW; - return ret; + NL_SET_ERR_MSG_MOD(extack, "can't forward to bond in mpesw mode"); + return -EOPNOTSUPP; } -void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) +bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev) { - atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0); -} + struct mlx5_lag *ldev = mlx5_lag_dev(dev); -void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) -{ - WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count)); + return ldev && ldev->mode == MLX5_LAG_MODE_MPESW; } +EXPORT_SYMBOL(mlx5_lag_is_mpesw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h index 88e8daffcf92..02520f27a033 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h @@ -9,17 +9,27 @@ struct lag_mpesw { struct work_struct mpesw_work; - atomic_t mpesw_rule_count; + u32 pf_metadata[MLX5_MAX_PORTS]; }; -int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev); -bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev); -#if IS_ENABLED(CONFIG_MLX5_ESWITCH) -void mlx5_lag_mpesw_init(struct mlx5_lag *ldev); -void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev); -#else -static inline void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) {} -static inline void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) {} -#endif +enum mpesw_op { + MLX5_MPESW_OP_ENABLE, + MLX5_MPESW_OP_DISABLE, +}; + +struct mlx5_mpesw_work_st { + struct work_struct work; + struct mlx5_lag *lag; + enum mpesw_op op; + struct completion comp; + int result; +}; + +int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev, + struct net_device *out_dev, + struct netlink_ext_ack *extack); +bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev); +void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev); +int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev); #endif /* __MLX5_LAG_MPESW_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 69318b143268..4c9a40211059 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -69,6 +69,13 @@ enum { MLX5_MTPPS_FS_OUT_PULSE_DURATION_NS = BIT(0xa), }; +enum { + MLX5_MTUTC_OPERATION_ADJUST_TIME_MIN = S16_MIN, + MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX = S16_MAX, + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MIN = -200000, + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000, +}; + static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev) { return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev)); @@ -86,6 +93,22 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); } +static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) +{ + s64 min = MLX5_MTUTC_OPERATION_ADJUST_TIME_MIN; + s64 max = MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; + + if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range)) { + min = MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MIN; + max = MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX; + } + + if (delta < min || delta > max) + return false; + + return true; +} + static int mlx5_set_mtutc(struct mlx5_core_dev *dev, u32 *mtutc, u32 size) { u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {}; @@ -288,8 +311,8 @@ static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta) if (!mlx5_modify_mtutc_allowed(mdev)) return 0; - /* HW time adjustment range is s16. If out of range, settime instead */ - if (delta < S16_MIN || delta > S16_MAX) { + /* HW time adjustment range is checked. If out of range, settime instead */ + if (!mlx5_is_mtutc_time_adj_cap(mdev, delta)) { struct timespec64 ts; s64 ns; @@ -326,7 +349,20 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) return 0; } -static int mlx5_ptp_adjfreq_real_time(struct mlx5_core_dev *mdev, s32 freq) +static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + + mdev = container_of(clock, struct mlx5_core_dev, clock); + + if (!mlx5_is_mtutc_time_adj_cap(mdev, delta)) + return -ERANGE; + + return mlx5_ptp_adjtime(ptp, delta); +} + +static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm) { u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; @@ -334,7 +370,15 @@ static int mlx5_ptp_adjfreq_real_time(struct mlx5_core_dev *mdev, s32 freq) return 0; MLX5_SET(mtutc_reg, in, operation, MLX5_MTUTC_OPERATION_ADJUST_FREQ_UTC); - MLX5_SET(mtutc_reg, in, freq_adjustment, freq); + + if (MLX5_CAP_MCAM_FEATURE(mdev, mtutc_freq_adj_units)) { + MLX5_SET(mtutc_reg, in, freq_adj_units, + MLX5_MTUTC_FREQ_ADJ_UNITS_SCALED_PPM); + MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm); + } else { + MLX5_SET(mtutc_reg, in, freq_adj_units, MLX5_MTUTC_FREQ_ADJ_UNITS_PPB); + MLX5_SET(mtutc_reg, in, freq_adjustment, scaled_ppm_to_ppb(scaled_ppm)); + } return mlx5_set_mtutc(mdev, in, sizeof(in)); } @@ -349,7 +393,8 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) int err; mdev = container_of(clock, struct mlx5_core_dev, clock); - err = mlx5_ptp_adjfreq_real_time(mdev, scaled_ppm_to_ppb(scaled_ppm)); + + err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); if (err) return err; @@ -688,6 +733,7 @@ static const struct ptp_clock_info mlx5_ptp_clock_info = { .n_pins = 0, .pps = 0, .adjfine = mlx5_ptp_adjfine, + .adjphase = mlx5_ptp_adjphase, .adjtime = mlx5_ptp_adjtime, .gettimex64 = mlx5_ptp_gettimex, .settime64 = mlx5_ptp_settime, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c index e995f8378df7..3a94b8f8031e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.c @@ -2,53 +2,253 @@ // Copyright (c) 2019 Mellanox Technologies. #include "mlx5_core.h" -#include "lib/mlx5.h" +#include "lib/crypto.h" -int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, - void *key, u32 sz_bytes, - u32 key_type, u32 *p_key_id) -{ - u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; - u32 sz_bits = sz_bytes * BITS_PER_BYTE; - u8 general_obj_key_size; - u64 general_obj_types; - void *obj, *key_p; - int err; +#define MLX5_CRYPTO_DEK_POOLS_NUM (MLX5_ACCEL_OBJ_TYPE_KEY_NUM - 1) +#define type2idx(type) ((type) - 1) - obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object); - key_p = MLX5_ADDR_OF(encryption_key_obj, obj, key); +#define MLX5_CRYPTO_DEK_POOL_SYNC_THRESH 128 - general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); - if (!(general_obj_types & - MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY)) - return -EINVAL; +/* calculate the num of DEKs, which are freed by any user + * (for example, TLS) after last revalidation in a pool or a bulk. + */ +#define MLX5_CRYPTO_DEK_CALC_FREED(a) \ + ({ typeof(a) _a = (a); \ + _a->num_deks - _a->avail_deks - _a->in_use_deks; }) + +#define MLX5_CRYPTO_DEK_POOL_CALC_FREED(pool) MLX5_CRYPTO_DEK_CALC_FREED(pool) +#define MLX5_CRYPTO_DEK_BULK_CALC_FREED(bulk) MLX5_CRYPTO_DEK_CALC_FREED(bulk) + +#define MLX5_CRYPTO_DEK_BULK_IDLE(bulk) \ + ({ typeof(bulk) _bulk = (bulk); \ + _bulk->avail_deks == _bulk->num_deks; }) + +enum { + MLX5_CRYPTO_DEK_ALL_TYPE = BIT(0), +}; + +struct mlx5_crypto_dek_pool { + struct mlx5_core_dev *mdev; + u32 key_purpose; + int num_deks; /* the total number of keys in this pool */ + int avail_deks; /* the number of available keys in this pool */ + int in_use_deks; /* the number of being used keys in this pool */ + struct mutex lock; /* protect the following lists, and the bulks */ + struct list_head partial_list; /* some of keys are available */ + struct list_head full_list; /* no available keys */ + struct list_head avail_list; /* all keys are available to use */ + + /* No in-used keys, and all need to be synced. + * These bulks will be put to avail list after sync. + */ + struct list_head sync_list; + + bool syncing; + struct list_head wait_for_free; + struct work_struct sync_work; + + spinlock_t destroy_lock; /* protect destroy_list */ + struct list_head destroy_list; + struct work_struct destroy_work; +}; + +struct mlx5_crypto_dek_bulk { + struct mlx5_core_dev *mdev; + int base_obj_id; + int avail_start; /* the bit to start search */ + int num_deks; /* the total number of keys in a bulk */ + int avail_deks; /* the number of keys available, with need_sync bit 0 */ + int in_use_deks; /* the number of keys being used, with in_use bit 1 */ + struct list_head entry; + + /* 0: not being used by any user, 1: otherwise */ + unsigned long *in_use; + + /* The bits are set when they are used, and reset after crypto_sync + * is executed. So, the value 0 means the key is newly created, or not + * used after sync, and 1 means it is in use, or freed but not synced + */ + unsigned long *need_sync; +}; + +struct mlx5_crypto_dek_priv { + struct mlx5_core_dev *mdev; + int log_dek_obj_range; +}; + +struct mlx5_crypto_dek { + struct mlx5_crypto_dek_bulk *bulk; + struct list_head entry; + u32 obj_id; +}; + +u32 mlx5_crypto_dek_get_id(struct mlx5_crypto_dek *dek) +{ + return dek->obj_id; +} + +static int mlx5_crypto_dek_get_key_sz(struct mlx5_core_dev *mdev, + u32 sz_bytes, u8 *key_sz_p) +{ + u32 sz_bits = sz_bytes * BITS_PER_BYTE; switch (sz_bits) { case 128: - general_obj_key_size = - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128; - key_p += sz_bytes; + *key_sz_p = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128; break; case 256: - general_obj_key_size = - MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256; + *key_sz_p = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256; break; default: + mlx5_core_err(mdev, "Crypto offload error, invalid key size (%u bits)\n", + sz_bits); return -EINVAL; } - memcpy(key_p, key, sz_bytes); + return 0; +} + +static int mlx5_crypto_dek_fill_key(struct mlx5_core_dev *mdev, u8 *key_obj, + const void *key, u32 sz_bytes) +{ + void *dst; + u8 key_sz; + int err; + + err = mlx5_crypto_dek_get_key_sz(mdev, sz_bytes, &key_sz); + if (err) + return err; + + MLX5_SET(encryption_key_obj, key_obj, key_size, key_sz); + + if (sz_bytes == 16) + /* For key size of 128b the MSBs are reserved. */ + dst = MLX5_ADDR_OF(encryption_key_obj, key_obj, key[1]); + else + dst = MLX5_ADDR_OF(encryption_key_obj, key_obj, key); + + memcpy(dst, key, sz_bytes); + + return 0; +} + +static int mlx5_crypto_cmd_sync_crypto(struct mlx5_core_dev *mdev, + int crypto_type) +{ + u32 in[MLX5_ST_SZ_DW(sync_crypto_in)] = {}; + int err; + + mlx5_core_dbg(mdev, + "Execute SYNC_CRYPTO command with crypto_type(0x%x)\n", + crypto_type); + + MLX5_SET(sync_crypto_in, in, opcode, MLX5_CMD_OP_SYNC_CRYPTO); + MLX5_SET(sync_crypto_in, in, crypto_type, crypto_type); + + err = mlx5_cmd_exec_in(mdev, sync_crypto, in); + if (err) + mlx5_core_err(mdev, + "Failed to exec sync crypto, type=%d, err=%d\n", + crypto_type, err); + + return err; +} + +static int mlx5_crypto_create_dek_bulk(struct mlx5_core_dev *mdev, + u32 key_purpose, int log_obj_range, + u32 *obj_id) +{ + u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *obj, *param; + int err; - MLX5_SET(encryption_key_obj, obj, key_size, general_obj_key_size); - MLX5_SET(encryption_key_obj, obj, key_type, key_type); MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + param = MLX5_ADDR_OF(general_obj_in_cmd_hdr, in, op_param); + MLX5_SET(general_obj_create_param, param, log_obj_range, log_obj_range); + + obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object); + MLX5_SET(encryption_key_obj, obj, key_purpose, key_purpose); MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn); err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + mlx5_core_dbg(mdev, "DEK objects created, bulk=%d, obj_id=%d\n", + 1 << log_obj_range, *obj_id); + + return 0; +} + +static int mlx5_crypto_modify_dek_key(struct mlx5_core_dev *mdev, + const void *key, u32 sz_bytes, u32 key_purpose, + u32 obj_id, u32 obj_offset) +{ + u32 in[MLX5_ST_SZ_DW(modify_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + void *obj, *param; + int err; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id); + + param = MLX5_ADDR_OF(general_obj_in_cmd_hdr, in, op_param); + MLX5_SET(general_obj_query_param, param, obj_offset, obj_offset); + + obj = MLX5_ADDR_OF(modify_encryption_key_in, in, encryption_key_object); + MLX5_SET64(encryption_key_obj, obj, modify_field_select, 1); + MLX5_SET(encryption_key_obj, obj, key_purpose, key_purpose); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn); + + err = mlx5_crypto_dek_fill_key(mdev, obj, key, sz_bytes); + if (err) + return err; + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + + /* avoid leaking key on the stack */ + memzero_explicit(in, sizeof(in)); + + return err; +} + +static int mlx5_crypto_create_dek_key(struct mlx5_core_dev *mdev, + const void *key, u32 sz_bytes, + u32 key_purpose, u32 *p_key_id) +{ + u32 in[MLX5_ST_SZ_DW(create_encryption_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + u64 general_obj_types; + void *obj; + int err; + + general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); + if (!(general_obj_types & + MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY)) + return -EINVAL; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, + MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY); + + obj = MLX5_ADDR_OF(create_encryption_key_in, in, encryption_key_object); + MLX5_SET(encryption_key_obj, obj, key_purpose, key_purpose); + MLX5_SET(encryption_key_obj, obj, pd, mdev->mlx5e_res.hw_objs.pdn); + + err = mlx5_crypto_dek_fill_key(mdev, obj, key, sz_bytes); + if (err) + return err; + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (!err) *p_key_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); @@ -58,7 +258,7 @@ int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, return err; } -void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id) +static void mlx5_crypto_destroy_dek_key(struct mlx5_core_dev *mdev, u32 key_id) { u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; @@ -71,3 +271,504 @@ void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id) mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + const void *key, u32 sz_bytes, + u32 key_type, u32 *p_key_id) +{ + return mlx5_crypto_create_dek_key(mdev, key, sz_bytes, key_type, p_key_id); +} + +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id) +{ + mlx5_crypto_destroy_dek_key(mdev, key_id); +} + +static struct mlx5_crypto_dek_bulk * +mlx5_crypto_dek_bulk_create(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek_priv *dek_priv = pool->mdev->mlx5e_res.dek_priv; + struct mlx5_core_dev *mdev = pool->mdev; + struct mlx5_crypto_dek_bulk *bulk; + int num_deks, base_obj_id; + int err; + + bulk = kzalloc(sizeof(*bulk), GFP_KERNEL); + if (!bulk) + return ERR_PTR(-ENOMEM); + + num_deks = 1 << dek_priv->log_dek_obj_range; + bulk->need_sync = bitmap_zalloc(num_deks, GFP_KERNEL); + if (!bulk->need_sync) { + err = -ENOMEM; + goto err_out; + } + + bulk->in_use = bitmap_zalloc(num_deks, GFP_KERNEL); + if (!bulk->in_use) { + err = -ENOMEM; + goto err_out; + } + + err = mlx5_crypto_create_dek_bulk(mdev, pool->key_purpose, + dek_priv->log_dek_obj_range, + &base_obj_id); + if (err) + goto err_out; + + bulk->base_obj_id = base_obj_id; + bulk->num_deks = num_deks; + bulk->avail_deks = num_deks; + bulk->mdev = mdev; + + return bulk; + +err_out: + bitmap_free(bulk->in_use); + bitmap_free(bulk->need_sync); + kfree(bulk); + return ERR_PTR(err); +} + +static struct mlx5_crypto_dek_bulk * +mlx5_crypto_dek_pool_add_bulk(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek_bulk *bulk; + + bulk = mlx5_crypto_dek_bulk_create(pool); + if (IS_ERR(bulk)) + return bulk; + + pool->avail_deks += bulk->num_deks; + pool->num_deks += bulk->num_deks; + list_add(&bulk->entry, &pool->partial_list); + + return bulk; +} + +static void mlx5_crypto_dek_bulk_free(struct mlx5_crypto_dek_bulk *bulk) +{ + mlx5_crypto_destroy_dek_key(bulk->mdev, bulk->base_obj_id); + bitmap_free(bulk->need_sync); + bitmap_free(bulk->in_use); + kfree(bulk); +} + +static void mlx5_crypto_dek_pool_remove_bulk(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek_bulk *bulk, + bool delay) +{ + pool->num_deks -= bulk->num_deks; + pool->avail_deks -= bulk->avail_deks; + pool->in_use_deks -= bulk->in_use_deks; + list_del(&bulk->entry); + if (!delay) + mlx5_crypto_dek_bulk_free(bulk); +} + +static struct mlx5_crypto_dek_bulk * +mlx5_crypto_dek_pool_pop(struct mlx5_crypto_dek_pool *pool, u32 *obj_offset) +{ + struct mlx5_crypto_dek_bulk *bulk; + int pos; + + mutex_lock(&pool->lock); + bulk = list_first_entry_or_null(&pool->partial_list, + struct mlx5_crypto_dek_bulk, entry); + + if (bulk) { + pos = find_next_zero_bit(bulk->need_sync, bulk->num_deks, + bulk->avail_start); + if (pos == bulk->num_deks) { + mlx5_core_err(pool->mdev, "Wrong DEK bulk avail_start.\n"); + pos = find_first_zero_bit(bulk->need_sync, bulk->num_deks); + } + WARN_ON(pos == bulk->num_deks); + } else { + bulk = list_first_entry_or_null(&pool->avail_list, + struct mlx5_crypto_dek_bulk, + entry); + if (bulk) { + list_move(&bulk->entry, &pool->partial_list); + } else { + bulk = mlx5_crypto_dek_pool_add_bulk(pool); + if (IS_ERR(bulk)) + goto out; + } + pos = 0; + } + + *obj_offset = pos; + bitmap_set(bulk->need_sync, pos, 1); + bitmap_set(bulk->in_use, pos, 1); + bulk->in_use_deks++; + bulk->avail_deks--; + if (!bulk->avail_deks) { + list_move(&bulk->entry, &pool->full_list); + bulk->avail_start = bulk->num_deks; + } else { + bulk->avail_start = pos + 1; + } + pool->avail_deks--; + pool->in_use_deks++; + +out: + mutex_unlock(&pool->lock); + return bulk; +} + +static bool mlx5_crypto_dek_need_sync(struct mlx5_crypto_dek_pool *pool) +{ + return !pool->syncing && + MLX5_CRYPTO_DEK_POOL_CALC_FREED(pool) > MLX5_CRYPTO_DEK_POOL_SYNC_THRESH; +} + +static int mlx5_crypto_dek_free_locked(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek *dek) +{ + struct mlx5_crypto_dek_bulk *bulk = dek->bulk; + int obj_offset; + bool old_val; + int err = 0; + + obj_offset = dek->obj_id - bulk->base_obj_id; + old_val = test_and_clear_bit(obj_offset, bulk->in_use); + WARN_ON_ONCE(!old_val); + if (!old_val) { + err = -ENOENT; + goto out_free; + } + pool->in_use_deks--; + bulk->in_use_deks--; + if (!bulk->avail_deks && !bulk->in_use_deks) + list_move(&bulk->entry, &pool->sync_list); + + if (mlx5_crypto_dek_need_sync(pool) && schedule_work(&pool->sync_work)) + pool->syncing = true; + +out_free: + kfree(dek); + return err; +} + +static int mlx5_crypto_dek_pool_push(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek *dek) +{ + int err = 0; + + mutex_lock(&pool->lock); + if (pool->syncing) + list_add(&dek->entry, &pool->wait_for_free); + else + err = mlx5_crypto_dek_free_locked(pool, dek); + mutex_unlock(&pool->lock); + + return err; +} + +/* Update the bits for a bulk while sync, and avail_next for search. + * As the combinations of (need_sync, in_use) of one DEK are + * - (0,0) means the key is ready for use, + * - (1,1) means the key is currently being used by a user, + * - (1,0) means the key is freed, and waiting for being synced, + * - (0,1) is invalid state. + * the number of revalidated DEKs can be calculated by + * hweight_long(need_sync XOR in_use), and the need_sync bits can be reset + * by simply copying from in_use bits. + */ +static void mlx5_crypto_dek_bulk_reset_synced(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek_bulk *bulk) +{ + unsigned long *need_sync = bulk->need_sync; + unsigned long *in_use = bulk->in_use; + int i, freed, reused, avail_next; + bool first = true; + + freed = MLX5_CRYPTO_DEK_BULK_CALC_FREED(bulk); + + for (i = 0; freed && i < BITS_TO_LONGS(bulk->num_deks); + i++, need_sync++, in_use++) { + reused = hweight_long((*need_sync) ^ (*in_use)); + if (!reused) + continue; + + bulk->avail_deks += reused; + pool->avail_deks += reused; + *need_sync = *in_use; + if (first) { + avail_next = i * BITS_PER_TYPE(long); + if (bulk->avail_start > avail_next) + bulk->avail_start = avail_next; + first = false; + } + + freed -= reused; + } +} + +/* Return true if the bulk is reused, false if destroyed with delay */ +static bool mlx5_crypto_dek_bulk_handle_avail(struct mlx5_crypto_dek_pool *pool, + struct mlx5_crypto_dek_bulk *bulk, + struct list_head *destroy_list) +{ + if (list_empty(&pool->avail_list)) { + list_move(&bulk->entry, &pool->avail_list); + return true; + } + + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, true); + list_add(&bulk->entry, destroy_list); + return false; +} + +static void mlx5_crypto_dek_pool_splice_destroy_list(struct mlx5_crypto_dek_pool *pool, + struct list_head *list, + struct list_head *head) +{ + spin_lock(&pool->destroy_lock); + list_splice_init(list, head); + spin_unlock(&pool->destroy_lock); +} + +static void mlx5_crypto_dek_pool_free_wait_keys(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek *dek, *next; + + list_for_each_entry_safe(dek, next, &pool->wait_for_free, entry) { + list_del(&dek->entry); + mlx5_crypto_dek_free_locked(pool, dek); + } +} + +/* For all the bulks in each list, reset the bits while sync. + * Move them to different lists according to the number of available DEKs. + * Destrory all the idle bulks, except one for quick service. + * And free DEKs in the waiting list at the end of this func. + */ +static void mlx5_crypto_dek_pool_reset_synced(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek_bulk *bulk, *tmp; + LIST_HEAD(destroy_list); + + list_for_each_entry_safe(bulk, tmp, &pool->partial_list, entry) { + mlx5_crypto_dek_bulk_reset_synced(pool, bulk); + if (MLX5_CRYPTO_DEK_BULK_IDLE(bulk)) + mlx5_crypto_dek_bulk_handle_avail(pool, bulk, &destroy_list); + } + + list_for_each_entry_safe(bulk, tmp, &pool->full_list, entry) { + mlx5_crypto_dek_bulk_reset_synced(pool, bulk); + + if (!bulk->avail_deks) + continue; + + if (MLX5_CRYPTO_DEK_BULK_IDLE(bulk)) + mlx5_crypto_dek_bulk_handle_avail(pool, bulk, &destroy_list); + else + list_move(&bulk->entry, &pool->partial_list); + } + + list_for_each_entry_safe(bulk, tmp, &pool->sync_list, entry) { + bulk->avail_deks = bulk->num_deks; + pool->avail_deks += bulk->num_deks; + if (mlx5_crypto_dek_bulk_handle_avail(pool, bulk, &destroy_list)) { + memset(bulk->need_sync, 0, BITS_TO_BYTES(bulk->num_deks)); + bulk->avail_start = 0; + } + } + + mlx5_crypto_dek_pool_free_wait_keys(pool); + + if (!list_empty(&destroy_list)) { + mlx5_crypto_dek_pool_splice_destroy_list(pool, &destroy_list, + &pool->destroy_list); + schedule_work(&pool->destroy_work); + } +} + +static void mlx5_crypto_dek_sync_work_fn(struct work_struct *work) +{ + struct mlx5_crypto_dek_pool *pool = + container_of(work, struct mlx5_crypto_dek_pool, sync_work); + int err; + + err = mlx5_crypto_cmd_sync_crypto(pool->mdev, BIT(pool->key_purpose)); + mutex_lock(&pool->lock); + if (!err) + mlx5_crypto_dek_pool_reset_synced(pool); + pool->syncing = false; + mutex_unlock(&pool->lock); +} + +struct mlx5_crypto_dek *mlx5_crypto_dek_create(struct mlx5_crypto_dek_pool *dek_pool, + const void *key, u32 sz_bytes) +{ + struct mlx5_crypto_dek_priv *dek_priv = dek_pool->mdev->mlx5e_res.dek_priv; + struct mlx5_core_dev *mdev = dek_pool->mdev; + u32 key_purpose = dek_pool->key_purpose; + struct mlx5_crypto_dek_bulk *bulk; + struct mlx5_crypto_dek *dek; + int obj_offset; + int err; + + dek = kzalloc(sizeof(*dek), GFP_KERNEL); + if (!dek) + return ERR_PTR(-ENOMEM); + + if (!dek_priv) { + err = mlx5_crypto_create_dek_key(mdev, key, sz_bytes, + key_purpose, &dek->obj_id); + goto out; + } + + bulk = mlx5_crypto_dek_pool_pop(dek_pool, &obj_offset); + if (IS_ERR(bulk)) { + err = PTR_ERR(bulk); + goto out; + } + + dek->bulk = bulk; + dek->obj_id = bulk->base_obj_id + obj_offset; + err = mlx5_crypto_modify_dek_key(mdev, key, sz_bytes, key_purpose, + bulk->base_obj_id, obj_offset); + if (err) { + mlx5_crypto_dek_pool_push(dek_pool, dek); + return ERR_PTR(err); + } + +out: + if (err) { + kfree(dek); + return ERR_PTR(err); + } + + return dek; +} + +void mlx5_crypto_dek_destroy(struct mlx5_crypto_dek_pool *dek_pool, + struct mlx5_crypto_dek *dek) +{ + struct mlx5_crypto_dek_priv *dek_priv = dek_pool->mdev->mlx5e_res.dek_priv; + struct mlx5_core_dev *mdev = dek_pool->mdev; + + if (!dek_priv) { + mlx5_crypto_destroy_dek_key(mdev, dek->obj_id); + kfree(dek); + } else { + mlx5_crypto_dek_pool_push(dek_pool, dek); + } +} + +static void mlx5_crypto_dek_free_destroy_list(struct list_head *destroy_list) +{ + struct mlx5_crypto_dek_bulk *bulk, *tmp; + + list_for_each_entry_safe(bulk, tmp, destroy_list, entry) + mlx5_crypto_dek_bulk_free(bulk); +} + +static void mlx5_crypto_dek_destroy_work_fn(struct work_struct *work) +{ + struct mlx5_crypto_dek_pool *pool = + container_of(work, struct mlx5_crypto_dek_pool, destroy_work); + LIST_HEAD(destroy_list); + + mlx5_crypto_dek_pool_splice_destroy_list(pool, &pool->destroy_list, + &destroy_list); + mlx5_crypto_dek_free_destroy_list(&destroy_list); +} + +struct mlx5_crypto_dek_pool * +mlx5_crypto_dek_pool_create(struct mlx5_core_dev *mdev, int key_purpose) +{ + struct mlx5_crypto_dek_pool *pool; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + pool->mdev = mdev; + pool->key_purpose = key_purpose; + + mutex_init(&pool->lock); + INIT_LIST_HEAD(&pool->avail_list); + INIT_LIST_HEAD(&pool->partial_list); + INIT_LIST_HEAD(&pool->full_list); + INIT_LIST_HEAD(&pool->sync_list); + INIT_LIST_HEAD(&pool->wait_for_free); + INIT_WORK(&pool->sync_work, mlx5_crypto_dek_sync_work_fn); + spin_lock_init(&pool->destroy_lock); + INIT_LIST_HEAD(&pool->destroy_list); + INIT_WORK(&pool->destroy_work, mlx5_crypto_dek_destroy_work_fn); + + return pool; +} + +void mlx5_crypto_dek_pool_destroy(struct mlx5_crypto_dek_pool *pool) +{ + struct mlx5_crypto_dek_bulk *bulk, *tmp; + + cancel_work_sync(&pool->sync_work); + cancel_work_sync(&pool->destroy_work); + + mlx5_crypto_dek_pool_free_wait_keys(pool); + + list_for_each_entry_safe(bulk, tmp, &pool->avail_list, entry) + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false); + + list_for_each_entry_safe(bulk, tmp, &pool->full_list, entry) + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false); + + list_for_each_entry_safe(bulk, tmp, &pool->sync_list, entry) + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false); + + list_for_each_entry_safe(bulk, tmp, &pool->partial_list, entry) + mlx5_crypto_dek_pool_remove_bulk(pool, bulk, false); + + mlx5_crypto_dek_free_destroy_list(&pool->destroy_list); + + mutex_destroy(&pool->lock); + + kfree(pool); +} + +void mlx5_crypto_dek_cleanup(struct mlx5_crypto_dek_priv *dek_priv) +{ + if (!dek_priv) + return; + + kfree(dek_priv); +} + +struct mlx5_crypto_dek_priv *mlx5_crypto_dek_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_crypto_dek_priv *dek_priv; + int err; + + if (!MLX5_CAP_CRYPTO(mdev, log_dek_max_alloc)) + return NULL; + + dek_priv = kzalloc(sizeof(*dek_priv), GFP_KERNEL); + if (!dek_priv) + return ERR_PTR(-ENOMEM); + + dek_priv->mdev = mdev; + dek_priv->log_dek_obj_range = min_t(int, 12, + MLX5_CAP_CRYPTO(mdev, log_dek_max_alloc)); + + /* sync all types of objects */ + err = mlx5_crypto_cmd_sync_crypto(mdev, MLX5_CRYPTO_DEK_ALL_TYPE); + if (err) + goto err_sync_crypto; + + mlx5_core_dbg(mdev, "Crypto DEK enabled, %d deks per alloc (max %d), total %d\n", + 1 << dek_priv->log_dek_obj_range, + 1 << MLX5_CAP_CRYPTO(mdev, log_dek_max_alloc), + 1 << MLX5_CAP_CRYPTO(mdev, log_max_num_deks)); + + return dek_priv; + +err_sync_crypto: + kfree(dek_priv); + return ERR_PTR(err); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h new file mode 100644 index 000000000000..c819c047bb9c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LIB_CRYPTO_H__ +#define __MLX5_LIB_CRYPTO_H__ + +enum { + MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_TLS, + MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_IPSEC, + MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_MACSEC, + MLX5_ACCEL_OBJ_TYPE_KEY_NUM, +}; + +int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, + const void *key, u32 sz_bytes, + u32 key_type, u32 *p_key_id); + +void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); + +struct mlx5_crypto_dek_pool; +struct mlx5_crypto_dek; + +struct mlx5_crypto_dek_pool *mlx5_crypto_dek_pool_create(struct mlx5_core_dev *mdev, + int key_purpose); +void mlx5_crypto_dek_pool_destroy(struct mlx5_crypto_dek_pool *pool); +struct mlx5_crypto_dek *mlx5_crypto_dek_create(struct mlx5_crypto_dek_pool *dek_pool, + const void *key, u32 sz_bytes); +void mlx5_crypto_dek_destroy(struct mlx5_crypto_dek_pool *dek_pool, + struct mlx5_crypto_dek *dek); +u32 mlx5_crypto_dek_get_id(struct mlx5_crypto_dek *dek); + +struct mlx5_crypto_dek_priv *mlx5_crypto_dek_init(struct mlx5_core_dev *mdev); +void mlx5_crypto_dek_cleanup(struct mlx5_crypto_dek_priv *dek_priv); +#endif /* __MLX5_LIB_CRYPTO_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index df58cba37930..81ed91fee59b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -214,7 +214,7 @@ create_chain_restore(struct fs_chain *chain) struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_fs_chains *chains = chain->chains; - enum mlx5e_tc_attr_to_reg chain_to_reg; + enum mlx5e_tc_attr_to_reg mapped_obj_to_reg; struct mlx5_modify_hdr *mod_hdr; u32 index; int err; @@ -242,7 +242,7 @@ create_chain_restore(struct fs_chain *chain) chain->id = index; if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) { - chain_to_reg = CHAIN_TO_REG; + mapped_obj_to_reg = MAPPED_OBJ_TO_REG; chain->restore_rule = esw_add_restore_rule(esw, chain->id); if (IS_ERR(chain->restore_rule)) { err = PTR_ERR(chain->restore_rule); @@ -253,7 +253,7 @@ create_chain_restore(struct fs_chain *chain) * since we write the metadata to reg_b * that is passed to SW directly. */ - chain_to_reg = NIC_CHAIN_TO_REG; + mapped_obj_to_reg = NIC_MAPPED_OBJ_TO_REG; } else { err = -EINVAL; goto err_rule; @@ -261,12 +261,12 @@ create_chain_restore(struct fs_chain *chain) MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, modact, field, - mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield); + mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mfield); MLX5_SET(set_action_in, modact, offset, - mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset); + mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].moffset); MLX5_SET(set_action_in, modact, length, - mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen == 32 ? - 0 : mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen); + mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen == 32 ? + 0 : mlx5e_tc_attr_to_reg_mappings[mapped_obj_to_reg].mlen); MLX5_SET(set_action_in, modact, data, chain->id); mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns, 1, modact); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c new file mode 100644 index 000000000000..2c53589b765d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "fs_core.h" +#include "lib/ipsec_fs_roce.h" +#include "mlx5_core.h" + +struct mlx5_ipsec_miss { + struct mlx5_flow_group *group; + struct mlx5_flow_handle *rule; +}; + +struct mlx5_ipsec_rx_roce { + struct mlx5_flow_group *g; + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_ipsec_miss roce_miss; + + struct mlx5_flow_table *ft_rdma; + struct mlx5_flow_namespace *ns_rdma; +}; + +struct mlx5_ipsec_tx_roce { + struct mlx5_flow_group *g; + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_flow_namespace *ns; +}; + +struct mlx5_ipsec_fs { + struct mlx5_ipsec_rx_roce ipv4_rx; + struct mlx5_ipsec_rx_roce ipv6_rx; + struct mlx5_ipsec_tx_roce tx; +}; + +static void ipsec_fs_roce_setup_udp_dport(struct mlx5_flow_spec *spec, + u16 dport) +{ + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, IPPROTO_UDP); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.udp_dport); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.udp_dport, dport); +} + +static int +ipsec_fs_roce_rx_rule_setup(struct mlx5_core_dev *mdev, + struct mlx5_flow_destination *default_dst, + struct mlx5_ipsec_rx_roce *roce) +{ + struct mlx5_flow_destination dst = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + ipsec_fs_roce_setup_udp_dport(spec, ROCE_V2_UDP_DPORT); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = roce->ft_rdma; + rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, &dst, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add RX RoCE IPsec rule err=%d\n", + err); + goto fail_add_rule; + } + + roce->rule = rule; + + memset(spec, 0, sizeof(*spec)); + rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, default_dst, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add RX RoCE IPsec miss rule err=%d\n", + err); + goto fail_add_default_rule; + } + + roce->roce_miss.rule = rule; + + kvfree(spec); + return 0; + +fail_add_default_rule: + mlx5_del_flow_rules(roce->rule); +fail_add_rule: + kvfree(spec); + return err; +} + +static int ipsec_fs_roce_tx_rule_setup(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_tx_roce *roce, + struct mlx5_flow_table *pol_ft) +{ + struct mlx5_flow_destination dst = {}; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + int err = 0; + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; + dst.ft = pol_ft; + rule = mlx5_add_flow_rules(roce->ft, NULL, &flow_act, &dst, + 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "Fail to add TX RoCE IPsec rule err=%d\n", + err); + goto out; + } + roce->rule = rule; + +out: + return err; +} + +void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce) +{ + struct mlx5_ipsec_tx_roce *tx_roce; + + if (!ipsec_roce) + return; + + tx_roce = &ipsec_roce->tx; + + mlx5_del_flow_rules(tx_roce->rule); + mlx5_destroy_flow_group(tx_roce->g); + mlx5_destroy_flow_table(tx_roce->ft); +} + +#define MLX5_TX_ROCE_GROUP_SIZE BIT(0) + +int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_table *pol_ft) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_ipsec_tx_roce *roce; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + int ix = 0; + int err; + u32 *in; + + if (!ipsec_roce) + return 0; + + roce = &ipsec_roce->tx; + + in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!in) + return -ENOMEM; + + ft_attr.max_fte = 1; + ft = mlx5_create_flow_table(roce->ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx ft err=%d\n", err); + return err; + } + + roce->ft = ft; + + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx group err=%d\n", err); + goto fail; + } + roce->g = g; + + err = ipsec_fs_roce_tx_rule_setup(mdev, roce, pol_ft); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec tx rules err=%d\n", err); + goto rule_fail; + } + + return 0; + +rule_fail: + mlx5_destroy_flow_group(roce->g); +fail: + mlx5_destroy_flow_table(ft); + return err; +} + +struct mlx5_flow_table *mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_roce, u32 family) +{ + struct mlx5_ipsec_rx_roce *rx_roce; + + if (!ipsec_roce) + return NULL; + + rx_roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + return rx_roce->ft; +} + +void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, u32 family) +{ + struct mlx5_ipsec_rx_roce *rx_roce; + + if (!ipsec_roce) + return; + + rx_roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + mlx5_del_flow_rules(rx_roce->roce_miss.rule); + mlx5_del_flow_rules(rx_roce->rule); + mlx5_destroy_flow_table(rx_roce->ft_rdma); + mlx5_destroy_flow_group(rx_roce->roce_miss.group); + mlx5_destroy_flow_group(rx_roce->g); + mlx5_destroy_flow_table(rx_roce->ft); +} + +#define MLX5_RX_ROCE_GROUP_SIZE BIT(0) + +int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_namespace *ns, + struct mlx5_flow_destination *default_dst, + u32 family, u32 level, u32 prio) +{ + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_ipsec_rx_roce *roce; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *g; + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + if (!ipsec_roce) + return 0; + + roce = (family == AF_INET) ? &ipsec_roce->ipv4_rx : + &ipsec_roce->ipv6_rx; + + ft_attr.max_fte = 2; + ft_attr.level = level; + ft_attr.prio = prio; + ft = mlx5_create_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at nic err=%d\n", err); + return err; + } + + roce->ft = ft; + + in = kvzalloc(MLX5_ST_SZ_BYTES(create_flow_group_in), GFP_KERNEL); + if (!in) { + err = -ENOMEM; + goto fail_nomem; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx group at nic err=%d\n", err); + goto fail_group; + } + roce->g = g; + + memset(in, 0, MLX5_ST_SZ_BYTES(create_flow_group_in)); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_RX_ROCE_GROUP_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + g = mlx5_create_flow_group(ft, in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx miss group at nic err=%d\n", err); + goto fail_mgroup; + } + roce->roce_miss.group = g; + + memset(&ft_attr, 0, sizeof(ft_attr)); + if (family == AF_INET) + ft_attr.level = 1; + ft = mlx5_create_flow_table(roce->ns_rdma, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx ft at rdma err=%d\n", err); + goto fail_rdma_table; + } + + roce->ft_rdma = ft; + + err = ipsec_fs_roce_rx_rule_setup(mdev, default_dst, roce); + if (err) { + mlx5_core_err(mdev, "Fail to create RoCE IPsec rx rules err=%d\n", err); + goto fail_setup_rule; + } + + kvfree(in); + return 0; + +fail_setup_rule: + mlx5_destroy_flow_table(roce->ft_rdma); +fail_rdma_table: + mlx5_destroy_flow_group(roce->roce_miss.group); +fail_mgroup: + mlx5_destroy_flow_group(roce->g); +fail_group: + kvfree(in); +fail_nomem: + mlx5_destroy_flow_table(roce->ft); + return err; +} + +void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce) +{ + kfree(ipsec_roce); +} + +struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev) +{ + struct mlx5_ipsec_fs *roce_ipsec; + struct mlx5_flow_namespace *ns; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_RX_IPSEC); + if (!ns) { + mlx5_core_err(mdev, "Failed to get RoCE rx ns\n"); + return NULL; + } + + roce_ipsec = kzalloc(sizeof(*roce_ipsec), GFP_KERNEL); + if (!roce_ipsec) + return NULL; + + roce_ipsec->ipv4_rx.ns_rdma = ns; + roce_ipsec->ipv6_rx.ns_rdma = ns; + + ns = mlx5_get_flow_namespace(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC); + if (!ns) { + mlx5_core_err(mdev, "Failed to get RoCE tx ns\n"); + goto err_tx; + } + + roce_ipsec->tx.ns = ns; + + return roce_ipsec; + +err_tx: + kfree(roce_ipsec); + return NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h new file mode 100644 index 000000000000..9712d705fe48 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_LIB_IPSEC_H__ +#define __MLX5_LIB_IPSEC_H__ + +struct mlx5_ipsec_fs; + +struct mlx5_flow_table * +mlx5_ipsec_fs_roce_ft_get(struct mlx5_ipsec_fs *ipsec_roce, u32 family); +void mlx5_ipsec_fs_roce_rx_destroy(struct mlx5_ipsec_fs *ipsec_roce, + u32 family); +int mlx5_ipsec_fs_roce_rx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_namespace *ns, + struct mlx5_flow_destination *default_dst, + u32 family, u32 level, u32 prio); +void mlx5_ipsec_fs_roce_tx_destroy(struct mlx5_ipsec_fs *ipsec_roce); +int mlx5_ipsec_fs_roce_tx_create(struct mlx5_core_dev *mdev, + struct mlx5_ipsec_fs *ipsec_roce, + struct mlx5_flow_table *pol_ft); +void mlx5_ipsec_fs_roce_cleanup(struct mlx5_ipsec_fs *ipsec_roce); +struct mlx5_ipsec_fs *mlx5_ipsec_fs_roce_init(struct mlx5_core_dev *mdev); + +#endif /* __MLX5_LIB_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index 032adb21ad4b..ccf12f7db6f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -79,28 +79,11 @@ struct mlx5_pme_stats { void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats); int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data); -/* Crypto */ -enum { - MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_TLS, - MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_IPSEC, - MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_TYPE_MACSEC, -}; - -int mlx5_create_encryption_key(struct mlx5_core_dev *mdev, - void *key, u32 sz_bytes, - u32 key_type, u32 *p_key_id); -void mlx5_destroy_encryption_key(struct mlx5_core_dev *mdev, u32 key_id); - static inline struct net *mlx5_core_net(struct mlx5_core_dev *dev) { return devlink_net(priv_to_devlink(dev)); } -static inline void mlx5_uplink_netdev_set(struct mlx5_core_dev *mdev, struct net_device *netdev) -{ - mdev->mlx5e_res.uplink_netdev = netdev; -} - static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev) { return mdev->mlx5e_res.uplink_netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 3d5f2a4b1fed..540840e80493 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -336,6 +336,24 @@ static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) } } +void mlx5_core_uplink_netdev_set(struct mlx5_core_dev *dev, struct net_device *netdev) +{ + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + dev->mlx5e_res.uplink_netdev = netdev; + mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, + netdev); + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); +} + +void mlx5_core_uplink_netdev_event_replay(struct mlx5_core_dev *dev) +{ + mutex_lock(&dev->mlx5e_res.uplink_netdev_lock); + mlx5_blocking_notifier_call_chain(dev, MLX5_DRIVER_EVENT_UPLINK_NETDEV, + dev->mlx5e_res.uplink_netdev); + mutex_unlock(&dev->mlx5e_res.uplink_netdev_lock); +} +EXPORT_SYMBOL(mlx5_core_uplink_netdev_event_replay); + static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type, enum mlx5_cap_mode cap_mode) @@ -484,9 +502,9 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) union devlink_param_value val; int err; - err = devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_MAX_MACS, - &val); + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &val); if (!err) return val.vu32; mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); @@ -499,9 +517,9 @@ bool mlx5_is_roce_on(struct mlx5_core_dev *dev) union devlink_param_value val; int err; - err = devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, - &val); + err = devl_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); if (!err) return val.vbool; @@ -1390,9 +1408,9 @@ int mlx5_init_one(struct mlx5_core_dev *dev) set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - err = mlx5_devlink_register(priv_to_devlink(dev)); + err = mlx5_devlink_params_register(priv_to_devlink(dev)); if (err) - goto err_devlink_reg; + goto err_devlink_params_reg; err = mlx5_register_device(dev); if (err) @@ -1403,8 +1421,8 @@ int mlx5_init_one(struct mlx5_core_dev *dev) return 0; err_register: - mlx5_devlink_unregister(priv_to_devlink(dev)); -err_devlink_reg: + mlx5_devlink_params_unregister(priv_to_devlink(dev)); +err_devlink_params_reg: clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); mlx5_unload(dev); err_load: @@ -1426,7 +1444,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev) mutex_lock(&dev->intf_state_mutex); mlx5_unregister_device(dev); - mlx5_devlink_unregister(priv_to_devlink(dev)); + mlx5_devlink_params_unregister(priv_to_devlink(dev)); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", @@ -1491,23 +1509,23 @@ out: return err; } -int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery) +int mlx5_load_one(struct mlx5_core_dev *dev) { struct devlink *devlink = priv_to_devlink(dev); int ret; devl_lock(devlink); - ret = mlx5_load_one_devl_locked(dev, recovery); + ret = mlx5_load_one_devl_locked(dev, false); devl_unlock(devlink); return ret; } -void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev) +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend) { devl_assert_locked(priv_to_devlink(dev)); mutex_lock(&dev->intf_state_mutex); - mlx5_detach_device(dev); + mlx5_detach_device(dev, suspend); if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) { mlx5_core_warn(dev, "%s: interface is down, NOP\n", @@ -1522,12 +1540,12 @@ out: mutex_unlock(&dev->intf_state_mutex); } -void mlx5_unload_one(struct mlx5_core_dev *dev) +void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend) { struct devlink *devlink = priv_to_devlink(dev); devl_lock(devlink); - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, suspend); devl_unlock(devlink); } @@ -1555,6 +1573,7 @@ static const int types[] = { MLX5_CAP_DEV_SHAMPO, MLX5_CAP_MACSEC, MLX5_CAP_ADV_VIRTUALIZATION, + MLX5_CAP_CRYPTO, }; static void mlx5_hca_caps_free(struct mlx5_core_dev *dev) @@ -1608,6 +1627,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) lockdep_register_key(&dev->lock_key); mutex_init(&dev->intf_state_mutex); lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); + mutex_init(&dev->mlx5e_res.uplink_netdev_lock); mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); @@ -1696,6 +1716,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock); mutex_destroy(&dev->intf_state_mutex); lockdep_unregister_key(&dev->lock_key); } @@ -1809,7 +1830,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, mlx5_enter_error_state(dev, false); mlx5_error_sw_reset(dev); - mlx5_unload_one(dev); + mlx5_unload_one(dev, true); mlx5_drain_health_wq(dev); mlx5_pci_disable_device(dev); @@ -1891,8 +1912,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev) mlx5_pci_trace(dev, "Enter, loading driver..\n"); - err = mlx5_load_one(dev, false); - + err = mlx5_load_one(dev); if (!err) devlink_health_reporter_state_update(dev->priv.health.fw_fatal_reporter, DEVLINK_HEALTH_REPORTER_STATE_HEALTHY); @@ -1966,7 +1986,7 @@ static void shutdown(struct pci_dev *pdev) set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); err = mlx5_try_fast_unload(dev); if (err) - mlx5_unload_one(dev); + mlx5_unload_one(dev, false); mlx5_pci_disable_device(dev); } @@ -1974,7 +1994,7 @@ static int mlx5_suspend(struct pci_dev *pdev, pm_message_t state) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - mlx5_unload_one(dev); + mlx5_unload_one(dev, true); return 0; } @@ -1983,7 +2003,7 @@ static int mlx5_resume(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - return mlx5_load_one(dev, false); + return mlx5_load_one(dev); } static const struct pci_device_id mlx5_core_pci_table[] = { @@ -2017,7 +2037,7 @@ MODULE_DEVICE_TABLE(pci, mlx5_core_pci_table); void mlx5_disable_device(struct mlx5_core_dev *dev) { mlx5_error_sw_reset(dev); - mlx5_unload_one_devl_locked(dev); + mlx5_unload_one_devl_locked(dev, false); } int mlx5_recover_device(struct mlx5_core_dev *dev) @@ -2110,7 +2130,7 @@ static int __init mlx5_init(void) mlx5_core_verify_params(); mlx5_register_debugfs(); - err = pci_register_driver(&mlx5_core_driver); + err = mlx5e_init(); if (err) goto err_debug; @@ -2118,16 +2138,16 @@ static int __init mlx5_init(void) if (err) goto err_sf; - err = mlx5e_init(); + err = pci_register_driver(&mlx5_core_driver); if (err) - goto err_en; + goto err_pci; return 0; -err_en: +err_pci: mlx5_sf_driver_unregister(); err_sf: - pci_unregister_driver(&mlx5_core_driver); + mlx5e_cleanup(); err_debug: mlx5_unregister_debugfs(); return err; @@ -2135,9 +2155,9 @@ err_debug: static void __exit mlx5_cleanup(void) { - mlx5e_cleanup(); - mlx5_sf_driver_unregister(); pci_unregister_driver(&mlx5_core_driver); + mlx5_sf_driver_unregister(); + mlx5e_cleanup(); mlx5_unregister_debugfs(); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 029305a8b80a..be0785f83083 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -236,7 +236,7 @@ void mlx5_adev_cleanup(struct mlx5_core_dev *dev); int mlx5_adev_init(struct mlx5_core_dev *dev); int mlx5_attach_device(struct mlx5_core_dev *dev); -void mlx5_detach_device(struct mlx5_core_dev *dev); +void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend); int mlx5_register_device(struct mlx5_core_dev *dev); void mlx5_unregister_device(struct mlx5_core_dev *dev); struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev); @@ -319,9 +319,9 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx); void mlx5_mdev_uninit(struct mlx5_core_dev *dev); int mlx5_init_one(struct mlx5_core_dev *dev); void mlx5_uninit_one(struct mlx5_core_dev *dev); -void mlx5_unload_one(struct mlx5_core_dev *dev); -void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev); -int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); +void mlx5_unload_one(struct mlx5_core_dev *dev, bool suspend); +void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev, bool suspend); +int mlx5_load_one(struct mlx5_core_dev *dev); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 60596357bfc7..64d4e7125e9b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -74,6 +74,14 @@ static u32 get_function(u16 func_id, bool ec_function) return (u32)func_id | (ec_function << 16); } +static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_function) +{ + if (!func_id) + return mlx5_core_is_ecpf(dev) && !ec_function ? MLX5_HOST_PF : MLX5_PF; + + return func_id <= mlx5_core_max_vfs(dev) ? MLX5_VF : MLX5_SF; +} + static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function) { struct rb_root *root; @@ -211,7 +219,8 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u32 function) n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask)); if (n >= MLX5_NUM_4K_IN_PAGE) { - mlx5_core_warn(dev, "alloc 4k bug\n"); + mlx5_core_warn(dev, "alloc 4k bug: fw page = 0x%llx, n = %u, bitmask: %lu, max num of 4K pages: %d\n", + fp->addr, n, fp->bitmask, MLX5_NUM_4K_IN_PAGE); return -ENOENT; } clear_bit(n, &fp->bitmask); @@ -332,6 +341,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); int notify_fail = event; + u16 func_type; u64 addr; int err; u32 *in; @@ -383,11 +393,9 @@ retry: goto out_dropped; } + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] += npages; dev->priv.fw_pages += npages; - if (func_id) - dev->priv.vfs_pages += npages; - else if (mlx5_core_is_ecpf(dev) && !ec_function) - dev->priv.host_pf_pages += npages; mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x, err %d\n", npages, ec_function, func_id, err); @@ -414,6 +422,7 @@ static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id, struct rb_root *root; struct rb_node *p; int npages = 0; + u16 func_type; root = xa_load(&dev->priv.page_root_xa, function); if (WARN_ON_ONCE(!root)) @@ -428,11 +437,9 @@ static void release_all_pages(struct mlx5_core_dev *dev, u16 func_id, free_fwp(dev, fwp, fwp->free_count); } + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] -= npages; dev->priv.fw_pages -= npages; - if (func_id) - dev->priv.vfs_pages -= npages; - else if (mlx5_core_is_ecpf(dev) && !ec_function) - dev->priv.host_pf_pages -= npages; mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x\n", npages, ec_function, func_id); @@ -498,6 +505,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {}; int num_claimed; + u16 func_type; u32 *out; int err; int i; @@ -549,11 +557,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, if (nclaimed) *nclaimed = num_claimed; + func_type = func_id_to_type(dev, func_id, ec_function); + dev->priv.page_counters[func_type] -= num_claimed; dev->priv.fw_pages -= num_claimed; - if (func_id) - dev->priv.vfs_pages -= num_claimed; - else if (mlx5_core_is_ecpf(dev) && !ec_function) - dev->priv.host_pf_pages -= num_claimed; out_free: kvfree(out); @@ -706,12 +712,12 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) WARN(dev->priv.fw_pages, "FW pages counter is %d after reclaiming all pages\n", dev->priv.fw_pages); - WARN(dev->priv.vfs_pages, + WARN(dev->priv.page_counters[MLX5_VF], "VFs FW pages counter is %d after reclaiming all pages\n", - dev->priv.vfs_pages); - WARN(dev->priv.host_pf_pages, + dev->priv.page_counters[MLX5_VF]); + WARN(dev->priv.page_counters[MLX5_HOST_PF], "External host PF FW pages counter is %d after reclaiming all pages\n", - dev->priv.host_pf_pages); + dev->priv.page_counters[MLX5_HOST_PF]); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 7b4783ce213e..a7377619ba6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -74,7 +74,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) { struct mlx5_sf_dev *sf_dev = container_of(adev, struct mlx5_sf_dev, adev); - mlx5_unload_one(sf_dev->mdev); + mlx5_unload_one(sf_dev->mdev, false); } static const struct auxiliary_device_id mlx5_sf_dev_id_table[] = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index c0e6c487c63c..3008e9ce2bbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -147,7 +147,7 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf) mlx5_eswitch_disable_sriov(dev->priv.eswitch, clear_vf); - if (mlx5_wait_for_pages(dev, &dev->priv.vfs_pages)) + if (mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_VF])) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index b851141e03de..042ca0349124 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -1138,12 +1138,14 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, rule->flow_source)) return 0; + mlx5dr_domain_nic_lock(nic_dmn); + ret = mlx5dr_matcher_select_builders(matcher, nic_matcher, dr_rule_get_ipv(¶m->outer), dr_rule_get_ipv(¶m->inner)); if (ret) - return ret; + goto err_unlock; hw_ste_arr_is_opt = nic_matcher->num_of_builders <= DR_RULE_MAX_STES_OPTIMIZED; if (likely(hw_ste_arr_is_opt)) { @@ -1152,12 +1154,12 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, hw_ste_arr = kzalloc((nic_matcher->num_of_builders + DR_ACTION_MAX_STES) * DR_STE_SIZE, GFP_KERNEL); - if (!hw_ste_arr) - return -ENOMEM; + if (!hw_ste_arr) { + ret = -ENOMEM; + goto err_unlock; + } } - mlx5dr_domain_nic_lock(nic_dmn); - ret = mlx5dr_matcher_add_to_tbl_nic(dmn, nic_matcher); if (ret) goto free_hw_ste; @@ -1223,7 +1225,10 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule, mlx5dr_domain_nic_unlock(nic_dmn); - goto out; + if (unlikely(!hw_ste_arr_is_opt)) + kfree(hw_ste_arr); + + return 0; free_rule: dr_rule_clean_rule_members(rule, nic_rule); @@ -1238,12 +1243,12 @@ remove_from_nic_tbl: mlx5dr_matcher_remove_from_tbl_nic(dmn, nic_matcher); free_hw_ste: - mlx5dr_domain_nic_unlock(nic_dmn); - -out: - if (unlikely(!hw_ste_arr_is_opt)) + if (!hw_ste_arr_is_opt) kfree(hw_ste_arr); +err_unlock: + mlx5dr_domain_nic_unlock(nic_dmn); + return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index a4476cb4c3b3..fd2d31cdbcf9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -724,7 +724,6 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, struct mlx5dr_action *action) { struct postsend_info send_info = {}; - int ret; send_info.write.addr = (uintptr_t)action->rewrite->data; send_info.write.length = action->rewrite->num_of_actions * @@ -734,9 +733,7 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn, mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk); send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk); - ret = dr_postsend_icm_data(dmn, &send_info); - - return ret; + return dr_postsend_icm_data(dmn, &send_info); } static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev, |