From e53a9d26cf80565cfb7172fc52a0dfac73613a0f Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Mon, 28 Oct 2019 23:35:30 +0000 Subject: IB/mlx5: Introduce and use mlx5_core_is_vf() Instead of deciding a given device is virtual function or not based on a device is PF or not, use already defined MLX5_COREDEV_VF by introducing an helper API mlx5_core_is_vf(). This enables to clearly identify PF, VF and non virtual functions. Signed-off-by: Parav Pandit Reviewed-by: Vu Pham Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3e80f03a387f..7b4801e96feb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1121,6 +1121,11 @@ static inline bool mlx5_core_is_pf(const struct mlx5_core_dev *dev) return dev->coredev_type == MLX5_COREDEV_PF; } +static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev) +{ + return dev->coredev_type == MLX5_COREDEV_VF; +} + static inline bool mlx5_core_is_ecpf(struct mlx5_core_dev *dev) { return dev->caps.embedded_cpu; -- cgit v1.2.3 From 6c7295e13ffd5623b02f1adc1442f1d8a3d52424 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 8 Nov 2019 23:45:20 +0000 Subject: devlink: Add new "enable_roce" generic device param New device parameter to enable/disable handling of RoCE traffic in the device. Signed-off-by: Michael Guralnik Acked-by: Jiri Pirko Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- Documentation/networking/devlink-params.txt | 4 ++++ include/net/devlink.h | 4 ++++ net/core/devlink.c | 5 +++++ 3 files changed, 13 insertions(+) (limited to 'include') diff --git a/Documentation/networking/devlink-params.txt b/Documentation/networking/devlink-params.txt index ddba3e9b55b1..04e234e9acc9 100644 --- a/Documentation/networking/devlink-params.txt +++ b/Documentation/networking/devlink-params.txt @@ -65,3 +65,7 @@ reset_dev_on_drv_probe [DEVICE, GENERIC] Reset only if device firmware can be found in the filesystem. Type: u8 + +enable_roce [DEVICE, GENERIC] + Enable handling of RoCE traffic in the device. + Type: Boolean diff --git a/include/net/devlink.h b/include/net/devlink.h index 23e4b65ec9df..39fb4d957838 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -400,6 +400,7 @@ enum devlink_param_generic_id { DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, /* add new param generic ids above here*/ __DEVLINK_PARAM_GENERIC_ID_MAX, @@ -434,6 +435,9 @@ enum devlink_param_generic_id { "reset_dev_on_drv_probe" #define DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE DEVLINK_PARAM_TYPE_U8 +#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME "enable_roce" +#define DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE DEVLINK_PARAM_TYPE_BOOL + #define DEVLINK_PARAM_GENERIC(_id, _cmodes, _get, _set, _validate) \ { \ .id = DEVLINK_PARAM_GENERIC_ID_##_id, \ diff --git a/net/core/devlink.c b/net/core/devlink.c index f80151eeaf51..0fbcd44aa64f 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -2884,6 +2884,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME, .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + .name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME, + .type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) -- cgit v1.2.3 From cc9defcbb8fae52810f7795b039223edae51ef95 Mon Sep 17 00:00:00 2001 From: Michael Guralnik Date: Fri, 8 Nov 2019 23:45:24 +0000 Subject: net/mlx5: Handle "enable_roce" devlink param Register "enable_roce" param, default value is RoCE enabled. Current configuration is stored on mlx5_core_dev and exposed to user through the cmode runtime devlink param. Changing configuration requires changing the cmode driverinit devlink param and calling devlink reload. Signed-off-by: Michael Guralnik Acked-by: Jiri Pirko Signed-off-by: Saeed Mahameed --- .../networking/device_drivers/mellanox/mlx5.rst | 21 +++++++++++++++++++++ Documentation/networking/devlink-params-mlx5.txt | 5 +++++ drivers/net/ethernet/mellanox/mlx5/core/devlink.c | 22 ++++++++++++++++++++++ include/linux/mlx5/driver.h | 11 +++++++++++ 4 files changed, 59 insertions(+) (limited to 'include') diff --git a/Documentation/networking/device_drivers/mellanox/mlx5.rst b/Documentation/networking/device_drivers/mellanox/mlx5.rst index d071c6b49e1f..7599dceba9f1 100644 --- a/Documentation/networking/device_drivers/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/mellanox/mlx5.rst @@ -154,6 +154,27 @@ User command examples: values: cmode runtime value smfs +enable_roce: RoCE enablement state +---------------------------------- +RoCE enablement state controls driver support for RoCE traffic. +When RoCE is disabled, there is no gid table, only raw ethernet QPs are supported and traffic on the well known UDP RoCE port is handled as raw ethernet traffic. + +To change RoCE enablement state a user must change the driverinit cmode value and run devlink reload. + +User command examples: + +- Disable RoCE:: + + $ devlink dev param set pci/0000:06:00.0 name enable_roce value false cmode driverinit + $ devlink dev reload pci/0000:06:00.0 + +- Read RoCE enablement state:: + + $ devlink dev param show pci/0000:06:00.0 name enable_roce + pci/0000:06:00.0: + name enable_roce type generic + values: + cmode driverinit value true Devlink health reporters ======================== diff --git a/Documentation/networking/devlink-params-mlx5.txt b/Documentation/networking/devlink-params-mlx5.txt index 8c0b82d655dc..5071467118bd 100644 --- a/Documentation/networking/devlink-params-mlx5.txt +++ b/Documentation/networking/devlink-params-mlx5.txt @@ -10,3 +10,8 @@ flow_steering_mode [DEVICE, DRIVER-SPECIFIC] without firmware intervention. Type: String Configuration mode: runtime + +enable_roce [DEVICE, GENERIC] + Enable handling of RoCE traffic in the device. + Defaultly enabled. + Configuration mode: driverinit diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 381925c90d94..b2c26388edb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -177,12 +177,29 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, }; +static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + bool new_state = val.vbool; + + if (new_state && !MLX5_CAP_GEN(dev, roce)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); + return -EOPNOTSUPP; + } + + return 0; +} + static const struct devlink_param mlx5_devlink_params[] = { DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, "flow_steering_mode", DEVLINK_PARAM_TYPE_STRING, BIT(DEVLINK_PARAM_CMODE_RUNTIME), mlx5_devlink_fs_mode_get, mlx5_devlink_fs_mode_set, mlx5_devlink_fs_mode_validate), + DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, mlx5_devlink_enable_roce_validate), }; static void mlx5_devlink_set_params_init_values(struct devlink *devlink) @@ -197,6 +214,11 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink) devlink_param_driverinit_value_set(devlink, MLX5_DEVLINK_PARAM_FLOW_STEERING_MODE, value); + + value.vbool = MLX5_CAP_GEN(dev, roce); + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + value); } int mlx5_devlink_register(struct devlink *devlink, struct device *dev) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 7b4801e96feb..1884513aac90 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1191,4 +1191,15 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; +static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + + devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + return val.vbool; +} + #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 975b992fdd4b38028d7c1dcf38286d6e7991c1b2 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 12 Nov 2019 00:34:29 +0100 Subject: net/mlx5: Add new chain for netfilter flow table offload Netfilter tables (nftables) implements a software datapath that comes after tc ingress datapath. The datapath supports offloading such rules via the flow table offload API. This API is currently only used by NFT and it doesn't provide the global priority in regards to tc offload, so we assume offloading such rules must come after tc. It does provide a flow table priority parameter, so we need to provide some supported priority range. For that, split fastpath prio to two, flow table offload and tc offload, with one dedicated priority chain for flow table offload. Next patch will re-use the multi chain API to access this chain by allowing access to this chain by the fdb_sub_namespace. Signed-off-by: Paul Blakey Reviewed-by: Mark Bloch Acked-by: Pablo Neira Ayuso Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.h | 9 +++++++-- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 9 ++++++--- include/linux/mlx5/fs.h | 3 ++- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8c9d8dc85861..2b563700c664 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -44,7 +44,12 @@ #include "lib/mpfs.h" #define FDB_TC_MAX_CHAIN 3 -#define FDB_TC_SLOW_PATH_CHAIN (FDB_TC_MAX_CHAIN + 1) +#define FDB_FT_CHAIN (FDB_TC_MAX_CHAIN + 1) +#define FDB_TC_SLOW_PATH_CHAIN (FDB_FT_CHAIN + 1) + +/* The index of the last real chain (FT) + 1 as chain zero is valid as well */ +#define FDB_NUM_CHAINS (FDB_FT_CHAIN + 1) + #define FDB_TC_MAX_PRIO 16 #define FDB_TC_LEVELS_PER_PRIO 2 @@ -173,7 +178,7 @@ struct mlx5_eswitch_fdb { struct { struct mlx5_flow_table *fdb; u32 num_rules; - } fdb_prio[FDB_TC_MAX_CHAIN + 1][FDB_TC_MAX_PRIO + 1][FDB_TC_LEVELS_PER_PRIO]; + } fdb_prio[FDB_NUM_CHAINS][FDB_TC_MAX_PRIO + 1][FDB_TC_LEVELS_PER_PRIO]; /* Protects fdb_prio table */ struct mutex fdb_prio_lock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 4aa6990a38b3..84e90b21e148 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2624,16 +2624,19 @@ static int create_fdb_chains(struct mlx5_flow_steering *steering, static int create_fdb_fast_path(struct mlx5_flow_steering *steering) { - const int total_chains = FDB_TC_MAX_CHAIN + 1; int err; - steering->fdb_sub_ns = kcalloc(total_chains, + steering->fdb_sub_ns = kcalloc(FDB_NUM_CHAINS, sizeof(*steering->fdb_sub_ns), GFP_KERNEL); if (!steering->fdb_sub_ns) return -ENOMEM; - err = create_fdb_chains(steering, FDB_FAST_PATH, FDB_TC_MAX_CHAIN + 1); + err = create_fdb_chains(steering, FDB_TC_OFFLOAD, FDB_TC_MAX_CHAIN + 1); + if (err) + return err; + + err = create_fdb_chains(steering, FDB_FT_OFFLOAD, 1); if (err) return err; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 724d276ea133..4e5b84e66822 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -80,7 +80,8 @@ enum mlx5_flow_namespace_type { enum { FDB_BYPASS_PATH, - FDB_FAST_PATH, + FDB_TC_OFFLOAD, + FDB_FT_OFFLOAD, FDB_SLOW_PATH, }; -- cgit v1.2.3 From 30aad41721e087babcf27c5192474724d555936c Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Wed, 6 Nov 2019 15:30:07 +0200 Subject: net/core: Add support for getting VF GUIDs Introduce a new ndo: ndo_get_vf_guid, to get from the net device the port and node GUID. New applications can choose to use this interface to show GUIDs with iproute2 with commands such as: - ip link show ib4 ib4: mtu 4092 qdisc noop state DOWN mode DEFAULT group default qlen 256 link/infiniband 00:00:0a:2d:fe:80:00:00:00:00:00:00:ec:0d:9a:03:00:44:36:8d brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff vf 0 link/infiniband 00:00:0a:2d:fe:80:00:00:00:00:00:00:ec:0d:9a:03:00:44:36:8d brd 00:ff:ff:ff:ff:12:40:1b:ff:ff:00:00:00:00:00:00:ff:ff:ff:ff, spoof checking off, NODE_GUID 22:44:33:00:33:11:00:33, PORT_GUID 10:21:33:12:00:11:22:10, link-state disable, trust off, query_rss off Signed-off-by: Danit Goldberg Acked-by: David Ahern Signed-off-by: Leon Romanovsky --- include/linux/netdevice.h | 4 ++++ net/core/rtnetlink.c | 14 ++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9eda1c31d1f7..379338239e49 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1316,6 +1316,10 @@ struct net_device_ops { struct nlattr *port[]); int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb); + int (*ndo_get_vf_guid)(struct net_device *dev, + int vf, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int (*ndo_set_vf_guid)(struct net_device *dev, int vf, u64 guid, int guid_type); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1ee6460f8275..93791dad3e31 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1204,6 +1204,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct ifla_vf_mac vf_mac; struct ifla_vf_broadcast vf_broadcast; struct ifla_vf_info ivi; + struct ifla_vf_guid node_guid; + struct ifla_vf_guid port_guid; memset(&ivi, 0, sizeof(ivi)); @@ -1270,6 +1272,18 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_put(skb, IFLA_VF_TRUST, sizeof(vf_trust), &vf_trust)) goto nla_put_vf_failure; + + memset(&node_guid, 0, sizeof(node_guid)); + memset(&port_guid, 0, sizeof(port_guid)); + if (dev->netdev_ops->ndo_get_vf_guid && + !dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid, + &port_guid)) { + if (nla_put(skb, IFLA_VF_IB_NODE_GUID, sizeof(node_guid), + &node_guid) || + nla_put(skb, IFLA_VF_IB_PORT_GUID, sizeof(port_guid), + &port_guid)) + goto nla_put_vf_failure; + } vfvlanlist = nla_nest_start_noflag(skb, IFLA_VF_VLAN_LIST); if (!vfvlanlist) goto nla_put_vf_failure; -- cgit v1.2.3 From bfcb3c5d14854f001881dc3f5cc29bf186598d9f Mon Sep 17 00:00:00 2001 From: Danit Goldberg Date: Wed, 6 Nov 2019 15:08:32 +0200 Subject: IB/core: Add interfaces to get VF node and port GUIDs Provide ability to get node and port GUIDs of VFs to be symmetrical to already existing set option. Signed-off-by: Danit Goldberg Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/device.c | 1 + drivers/infiniband/core/verbs.c | 10 ++++++++++ include/rdma/ib_verbs.h | 6 ++++++ 3 files changed, 17 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 99c4a55545cf..38fadbec054d 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2614,6 +2614,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) SET_DEVICE_OP(dev_ops, get_port_immutable); SET_DEVICE_OP(dev_ops, get_vector_affinity); SET_DEVICE_OP(dev_ops, get_vf_config); + SET_DEVICE_OP(dev_ops, get_vf_guid); SET_DEVICE_OP(dev_ops, get_vf_stats); SET_DEVICE_OP(dev_ops, init_port); SET_DEVICE_OP(dev_ops, invalidate_range); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f974b6854224..7d96351c8d8c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2458,6 +2458,16 @@ int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, } EXPORT_SYMBOL(ib_set_vf_guid); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid) +{ + if (!device->ops.get_vf_guid) + return -EOPNOTSUPP; + + return device->ops.get_vf_guid(device, vf, port, node_guid, port_guid); +} +EXPORT_SYMBOL(ib_get_vf_guid); /** * ib_map_mr_sg_pi() - Map the dma mapped SG lists for PI (protection * information) and set an appropriate memory region for registration. diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6a47ba85c54c..ec7d1a1f8f31 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2448,6 +2448,9 @@ struct ib_device_ops { struct ifla_vf_info *ivf); int (*get_vf_stats)(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); + int (*get_vf_guid)(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int (*set_vf_guid)(struct ib_device *device, int vf, u8 port, u64 guid, int type); struct ib_wq *(*create_wq)(struct ib_pd *pd, @@ -3303,6 +3306,9 @@ int ib_get_vf_config(struct ib_device *device, int vf, u8 port, struct ifla_vf_info *info); int ib_get_vf_stats(struct ib_device *device, int vf, u8 port, struct ifla_vf_stats *stats); +int ib_get_vf_guid(struct ib_device *device, int vf, u8 port, + struct ifla_vf_guid *node_guid, + struct ifla_vf_guid *port_guid); int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid, int type); -- cgit v1.2.3