summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/ethernet/mellanox')
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/alloc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/icm.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cq.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h47
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.c208
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port.h12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_common.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c181
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c269
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c254
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rx.c56
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c60
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c675
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.h58
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c484
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c89
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c195
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h18
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c201
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fw.c50
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/health.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c188
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c34
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h24
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qp.c81
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/srq.c32
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/vport.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.c120
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/Makefile3
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci_hw.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h603
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/resources.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c145
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h106
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c488
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c225
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c982
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h49
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c249
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c138
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h15
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c573
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h2
57 files changed, 5884 insertions, 1255 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 4bdf25059542..deef5a998985 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -614,7 +614,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
int i;
buf->direct.buf = NULL;
- buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
+ buf->nbufs = DIV_ROUND_UP(size, PAGE_SIZE);
buf->npages = buf->nbufs;
buf->page_shift = PAGE_SHIFT;
buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index d25e16d2c319..109472d6b61f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -167,8 +167,13 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev)
params->prof[i].rx_ppp = pfcrx;
params->prof[i].tx_pause = !(pfcrx || pfctx);
params->prof[i].tx_ppp = pfctx;
- params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
- params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
+ if (mlx4_low_memory_profile()) {
+ params->prof[i].tx_ring_size = MLX4_EN_MIN_TX_SIZE;
+ params->prof[i].rx_ring_size = MLX4_EN_MIN_RX_SIZE;
+ } else {
+ params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
+ params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
+ }
params->prof[i].num_up = MLX4_EN_NUM_UP_LOW;
params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up;
params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up *
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 7262c6310650..4b4351141b94 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -406,7 +406,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
if (WARN_ON(!obj_per_chunk))
return -EINVAL;
- num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
+ num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
table->icm = kvcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL);
if (!table->icm)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index c3228b89df46..485d856546c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -72,7 +72,7 @@
#define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT)
#define DEF_RX_RINGS 16
#define MAX_RX_RINGS 128
-#define MIN_RX_RINGS 4
+#define MIN_RX_RINGS 1
#define LOG_TXBB_SIZE 6
#define TXBB_SIZE BIT(LOG_TXBB_SIZE)
#define HEADROOM (2048 / TXBB_SIZE + 1)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index a53736c26c0c..a5a0823e5ada 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -308,10 +308,11 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
- case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT:
case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT:
case MLX5_CMD_OP_FPGA_DESTROY_QP:
case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_DEALLOC_MEMIC:
return MLX5_CMD_STAT_OK;
case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -426,7 +427,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
- case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT:
case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
case MLX5_CMD_OP_FPGA_CREATE_QP:
case MLX5_CMD_OP_FPGA_MODIFY_QP:
@@ -435,6 +436,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_ALLOC_MEMIC:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
@@ -599,8 +601,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER);
MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
- MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
- MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+ MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT);
MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT);
MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT);
MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP);
@@ -617,6 +619,8 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
+ MLX5_COMMAND_STR_CASE(ALLOC_MEMIC);
+ MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC);
default: return "unknown command opcode";
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index a4179122a279..4b85abb5c9f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -109,6 +109,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
cq->cons_index = 0;
cq->arm_sn = 0;
cq->eq = eq;
+ cq->uid = MLX5_GET(create_cq_in, in, uid);
refcount_set(&cq->refcount, 1);
init_completion(&cq->free);
if (!cq->comp)
@@ -144,6 +145,7 @@ err_cmd:
memset(dout, 0, sizeof(dout));
MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
+ MLX5_SET(destroy_cq_in, din, uid, cq->uid);
mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
return err;
}
@@ -165,6 +167,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
+ MLX5_SET(destroy_cq_in, in, uid, cq->uid);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
@@ -196,6 +199,7 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
+ MLX5_SET(modify_cq_in, in, uid, cq->uid);
return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
}
EXPORT_SYMBOL(mlx5_core_modify_cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
index 0240aee9189e..d027ce00c8ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h
@@ -133,7 +133,7 @@ TRACE_EVENT(mlx5_fs_del_fg,
{MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\
{MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\
{MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\
- {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\
+ {MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\
{MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\
{MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\
{MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\
@@ -252,10 +252,10 @@ TRACE_EVENT(mlx5_fs_add_rule,
memcpy(__entry->destination,
&rule->dest_attr,
sizeof(__entry->destination));
- if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER &&
- rule->dest_attr.counter)
+ if (rule->dest_attr.type &
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
__entry->counter_id =
- rule->dest_attr.counter->id;
+ rule->dest_attr.counter_id;
),
TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n",
__entry->rule, __entry->fte, __entry->index,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0f189f873859..d7fbd5b6ac95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -173,6 +173,7 @@ static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
}
}
+/* Use this function to get max num channels (rxqs/txqs) only to create netdev */
static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
{
return is_kdump_kernel() ?
@@ -181,6 +182,13 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
MLX5E_MAX_NUM_CHANNELS);
}
+/* Use this function to get max num channels after netdev was created */
+static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev)
+{
+ return min_t(unsigned int, netdev->num_rx_queues,
+ netdev->num_tx_queues);
+}
+
struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg eth;
@@ -205,18 +213,12 @@ struct mlx5e_umr_wqe {
extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
-static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
- "rx_cqe_moder",
- "tx_cqe_moder",
- "rx_cqe_compress",
- "rx_striding_rq",
-};
-
enum mlx5e_priv_flag {
MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1),
MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2),
MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3),
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4),
};
#define MLX5E_SET_PFLAG(params, pflag, enable) \
@@ -298,6 +300,7 @@ struct mlx5e_dcbx_dp {
enum {
MLX5E_RQ_STATE_ENABLED,
MLX5E_RQ_STATE_AM,
+ MLX5E_RQ_STATE_NO_CSUM_COMPLETE,
};
struct mlx5e_cq {
@@ -678,7 +681,7 @@ struct mlx5e_priv {
struct work_struct update_carrier_work;
struct work_struct set_rx_mode_work;
struct work_struct tx_timeout_work;
- struct delayed_work update_stats_work;
+ struct work_struct update_stats_work;
struct mlx5_core_dev *mdev;
struct net_device *netdev;
@@ -703,7 +706,7 @@ struct mlx5e_priv {
};
struct mlx5e_profile {
- void (*init)(struct mlx5_core_dev *mdev,
+ int (*init)(struct mlx5_core_dev *mdev,
struct net_device *netdev,
const struct mlx5e_profile *profile, void *ppriv);
void (*cleanup)(struct mlx5e_priv *priv);
@@ -715,7 +718,6 @@ struct mlx5e_profile {
void (*disable)(struct mlx5e_priv *priv);
void (*update_stats)(struct mlx5e_priv *priv);
void (*update_carrier)(struct mlx5e_priv *priv);
- int (*max_nch)(struct mlx5_core_dev *mdev);
struct {
mlx5e_fp_handle_rx_cqe handle_rx_cqe;
mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe;
@@ -906,10 +908,16 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
/* common netdev helpers */
+void mlx5e_create_q_counters(struct mlx5e_priv *priv);
+void mlx5e_destroy_q_counters(struct mlx5e_priv *priv);
+int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+ struct mlx5e_rq *drop_rq);
+void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq);
+
int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv);
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv);
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
+int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
+void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv);
@@ -925,8 +933,8 @@ int mlx5e_create_tises(struct mlx5e_priv *priv);
void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv);
int mlx5e_close(struct net_device *netdev);
int mlx5e_open(struct net_device *netdev);
-void mlx5e_update_stats_work(struct work_struct *work);
+void mlx5e_queue_update_stats(struct mlx5e_priv *priv);
int mlx5e_bits_invert(unsigned long a, int size);
typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv);
@@ -953,21 +961,32 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal);
int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv,
struct ethtool_coalesce *coal);
+u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv);
+u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv);
int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv,
struct ethtool_ts_info *info);
int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv,
struct ethtool_flash *flash);
/* mlx5e generic netdev management API */
+int mlx5e_netdev_init(struct net_device *netdev,
+ struct mlx5e_priv *priv,
+ struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv);
+void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv);
struct net_device*
mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile,
- void *ppriv);
+ int nch, void *ppriv);
int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
u16 max_channels, u16 mtu);
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params);
+void mlx5e_build_rss_params(struct mlx5e_params *params);
u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev);
void mlx5e_rx_dim_work(struct work_struct *work);
void mlx5e_tx_dim_work(struct work_struct *work);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
index 24e3b564964f..023dc4bccd28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c
@@ -235,3 +235,211 @@ out:
kfree(out);
return err;
}
+
+static u32 fec_supported_speeds[] = {
+ 10000,
+ 40000,
+ 25000,
+ 50000,
+ 56000,
+ 100000
+};
+
+#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds)
+
+/* get/set FEC admin field for a given speed */
+static int mlx5e_fec_admin_field(u32 *pplm,
+ u8 *fec_policy,
+ bool write,
+ u32 speed)
+{
+ switch (speed) {
+ case 10000:
+ case 40000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_10g_40g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_10g_40g, *fec_policy);
+ break;
+ case 25000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_25g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_25g, *fec_policy);
+ break;
+ case 50000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_50g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_50g, *fec_policy);
+ break;
+ case 56000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_56g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_56g, *fec_policy);
+ break;
+ case 100000:
+ if (!write)
+ *fec_policy = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_100g);
+ else
+ MLX5_SET(pplm_reg, pplm,
+ fec_override_admin_100g, *fec_policy);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* returns FEC capabilities for a given speed */
+static int mlx5e_get_fec_cap_field(u32 *pplm,
+ u8 *fec_cap,
+ u32 speed)
+{
+ switch (speed) {
+ case 10000:
+ case 40000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_admin_10g_40g);
+ break;
+ case 25000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_25g);
+ break;
+ case 50000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_50g);
+ break;
+ case 56000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_56g);
+ break;
+ case 100000:
+ *fec_cap = MLX5_GET(pplm_reg, pplm,
+ fec_override_cap_100g);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps)
+{
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ u32 current_fec_speed;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ err = mlx5e_port_linkspeed(dev, &current_fec_speed);
+ if (err)
+ return err;
+
+ return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed);
+}
+
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+ u8 *fec_configured_mode)
+{
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ u32 link_speed;
+ int err;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active);
+
+ if (!fec_configured_mode)
+ return 0;
+
+ err = mlx5e_port_linkspeed(dev, &link_speed);
+ if (err)
+ return err;
+
+ return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed);
+}
+
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy)
+{
+ bool fec_mode_not_supp_in_speed = false;
+ u8 no_fec_policy = BIT(MLX5E_FEC_NOFEC);
+ u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(pplm_reg);
+ u32 current_fec_speed;
+ u8 fec_caps = 0;
+ int err;
+ int i;
+
+ if (!MLX5_CAP_GEN(dev, pcam_reg))
+ return -EOPNOTSUPP;
+
+ if (!MLX5_CAP_PCAM_REG(dev, pplm))
+ return -EOPNOTSUPP;
+
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0);
+ if (err)
+ return err;
+
+ err = mlx5e_port_linkspeed(dev, &current_fec_speed);
+ if (err)
+ return err;
+
+ memset(in, 0, sz);
+ MLX5_SET(pplm_reg, in, local_port, 1);
+ for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS && !!fec_policy; i++) {
+ mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]);
+ /* policy supported for link speed */
+ if (!!(fec_caps & fec_policy)) {
+ mlx5e_fec_admin_field(in, &fec_policy, 1,
+ fec_supported_speeds[i]);
+ } else {
+ if (fec_supported_speeds[i] == current_fec_speed)
+ return -EOPNOTSUPP;
+ mlx5e_fec_admin_field(in, &no_fec_policy, 1,
+ fec_supported_speeds[i]);
+ fec_mode_not_supp_in_speed = true;
+ }
+ }
+
+ if (fec_mode_not_supp_in_speed)
+ mlx5_core_dbg(dev,
+ "FEC policy 0x%x is not supported for some speeds",
+ fec_policy);
+
+ return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 1);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
index f8cbd8194179..cd2160b8c9bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h
@@ -45,4 +45,16 @@ int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out);
int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in);
int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer);
+
+int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps);
+int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active,
+ u8 *fec_configured_mode);
+int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy);
+
+enum {
+ MLX5E_FEC_NOFEC,
+ MLX5E_FEC_FIRECODE,
+ MLX5E_FEC_RS_528_514,
+};
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 45cdde694d20..8657e0f26995 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -543,8 +543,11 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
- netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
- __func__, arfs_rule->filter_id, arfs_rule->rxq, err);
+ priv->channel_stats[arfs_rule->rxq].rq.arfs_err++;
+ mlx5e_dbg(HW, priv,
+ "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n",
+ __func__, arfs_rule->filter_id, arfs_rule->rxq,
+ tuple->ip_proto, err);
}
out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index db3278cc052b..3078491cc0d0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -153,7 +153,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
if (enable_uc_lb)
MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
- MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+ MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 98dd3e0ada72..3e770abfd802 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -135,6 +135,14 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT);
}
+static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
+ "rx_cqe_moder",
+ "tx_cqe_moder",
+ "rx_cqe_compress",
+ "rx_striding_rq",
+ "rx_no_csum_complete",
+};
+
int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset)
{
int i, num_stats = 0;
@@ -311,7 +319,7 @@ static int mlx5e_set_ringparam(struct net_device *dev,
void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
struct ethtool_channels *ch)
{
- ch->max_combined = priv->profile->max_nch(priv->mdev);
+ ch->max_combined = mlx5e_get_netdev_max_channels(priv->netdev);
ch->combined_count = priv->channels.params.num_channels;
}
@@ -539,6 +547,70 @@ static void ptys2ethtool_adver_link(unsigned long *advertising_modes,
__ETHTOOL_LINK_MODE_MASK_NBITS);
}
+static const u32 pplm_fec_2_ethtool[] = {
+ [MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF,
+ [MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER,
+ [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS,
+};
+
+static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size)
+{
+ int mode = 0;
+
+ if (!fec_mode)
+ return ETHTOOL_FEC_AUTO;
+
+ mode = find_first_bit(&fec_mode, size);
+
+ if (mode < ARRAY_SIZE(pplm_fec_2_ethtool))
+ return pplm_fec_2_ethtool[mode];
+
+ return 0;
+}
+
+/* we use ETHTOOL_FEC_* offset and apply it to ETHTOOL_LINK_MODE_FEC_*_BIT */
+static u32 ethtool_fec2ethtool_caps(u_long ethtool_fec_code)
+{
+ u32 offset;
+
+ offset = find_first_bit(&ethtool_fec_code, sizeof(u32));
+ offset -= ETHTOOL_FEC_OFF_BIT;
+ offset += ETHTOOL_LINK_MODE_FEC_NONE_BIT;
+
+ return offset;
+}
+
+static int get_fec_supported_advertised(struct mlx5_core_dev *dev,
+ struct ethtool_link_ksettings *link_ksettings)
+{
+ u_long fec_caps = 0;
+ u32 active_fec = 0;
+ u32 offset;
+ u32 bitn;
+ int err;
+
+ err = mlx5e_get_fec_caps(dev, (u8 *)&fec_caps);
+ if (err)
+ return (err == -EOPNOTSUPP) ? 0 : err;
+
+ err = mlx5e_get_fec_mode(dev, &active_fec, NULL);
+ if (err)
+ return err;
+
+ for_each_set_bit(bitn, &fec_caps, ARRAY_SIZE(pplm_fec_2_ethtool)) {
+ u_long ethtool_bitmask = pplm_fec_2_ethtool[bitn];
+
+ offset = ethtool_fec2ethtool_caps(ethtool_bitmask);
+ __set_bit(offset, link_ksettings->link_modes.supported);
+ }
+
+ active_fec = pplm2ethtool_fec(active_fec, sizeof(u32) * BITS_PER_BYTE);
+ offset = ethtool_fec2ethtool_caps(active_fec);
+ __set_bit(offset, link_ksettings->link_modes.advertising);
+
+ return 0;
+}
+
static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings,
u32 eth_proto_cap,
u8 connector_type)
@@ -734,7 +806,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
if (err) {
netdev_err(netdev, "%s: query port ptys failed: %d\n",
__func__, err);
- goto err_query_ptys;
+ goto err_query_regs;
}
eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability);
@@ -770,11 +842,17 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev,
AUTONEG_ENABLE;
ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
Autoneg);
+
+ err = get_fec_supported_advertised(mdev, link_ksettings);
+ if (err)
+ netdev_dbg(netdev, "%s: FEC caps query failed: %d\n",
+ __func__, err);
+
if (!an_disable_admin)
ethtool_link_ksettings_add_link_mode(link_ksettings,
advertising, Autoneg);
-err_query_ptys:
+err_query_regs:
return err;
}
@@ -852,18 +930,30 @@ out:
return err;
}
+u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv)
+{
+ return sizeof(priv->channels.params.toeplitz_hash_key);
+}
+
static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- return sizeof(priv->channels.params.toeplitz_hash_key);
+ return mlx5e_ethtool_get_rxfh_key_size(priv);
}
-static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
+u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv)
{
return MLX5E_INDIR_RQT_SIZE;
}
+static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_rxfh_indir_size(priv);
+}
+
static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
u8 *hfunc)
{
@@ -1257,6 +1347,58 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
return mlx5_set_port_wol(mdev, mlx5_wol_mode);
}
+static int mlx5e_get_fecparam(struct net_device *netdev,
+ struct ethtool_fecparam *fecparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 fec_configured = 0;
+ u32 fec_active = 0;
+ int err;
+
+ err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured);
+
+ if (err)
+ return err;
+
+ fecparam->active_fec = pplm2ethtool_fec((u_long)fec_active,
+ sizeof(u32) * BITS_PER_BYTE);
+
+ if (!fecparam->active_fec)
+ return -EOPNOTSUPP;
+
+ fecparam->fec = pplm2ethtool_fec((u_long)fec_configured,
+ sizeof(u8) * BITS_PER_BYTE);
+
+ return 0;
+}
+
+static int mlx5e_set_fecparam(struct net_device *netdev,
+ struct ethtool_fecparam *fecparam)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5_core_dev *mdev = priv->mdev;
+ u8 fec_policy = 0;
+ int mode;
+ int err;
+
+ for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) {
+ if (!(pplm_fec_2_ethtool[mode] & fecparam->fec))
+ continue;
+ fec_policy |= (1 << mode);
+ break;
+ }
+
+ err = mlx5e_set_fec_mode(mdev, fec_policy);
+
+ if (err)
+ return err;
+
+ mlx5_toggle_port_link(mdev);
+
+ return 0;
+}
+
static u32 mlx5e_get_msglevel(struct net_device *dev)
{
return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel;
@@ -1512,6 +1654,27 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
return 0;
}
+static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_channels *channels = &priv->channels;
+ struct mlx5e_channel *c;
+ int i;
+
+ if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+ return 0;
+
+ for (i = 0; i < channels->num; i++) {
+ c = channels->c[i];
+ if (enable)
+ __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ else
+ __clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+ }
+
+ return 0;
+}
+
static int mlx5e_handle_pflag(struct net_device *netdev,
u32 wanted_flags,
enum mlx5e_priv_flag flag,
@@ -1563,6 +1726,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
err = mlx5e_handle_pflag(netdev, pflags,
MLX5E_PFLAG_RX_STRIDING_RQ,
set_pflag_rx_striding_rq);
+ if (err)
+ goto out;
+
+ err = mlx5e_handle_pflag(netdev, pflags,
+ MLX5E_PFLAG_RX_NO_CSUM_COMPLETE,
+ set_pflag_rx_no_csum_complete);
out:
mutex_unlock(&priv->state_lock);
@@ -1652,4 +1821,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
.self_test = mlx5e_self_test,
.get_msglevel = mlx5e_get_msglevel,
.set_msglevel = mlx5e_set_msglevel,
+ .get_fecparam = mlx5e_get_fecparam,
+ .set_fecparam = mlx5e_set_fecparam,
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 41cde926cdab..c18dcebe1462 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -131,14 +131,14 @@ set_ip4(void *headers_c, void *headers_v, __be32 ip4src_m,
if (ip4src_m) {
memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4),
&ip4src_v, sizeof(ip4src_v));
- memset(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4),
- 0xff, sizeof(ip4src_m));
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4),
+ &ip4src_m, sizeof(ip4src_m));
}
if (ip4dst_m) {
memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
&ip4dst_v, sizeof(ip4dst_v));
- memset(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
- 0xff, sizeof(ip4dst_m));
+ memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
+ &ip4dst_m, sizeof(ip4dst_m));
}
MLX5E_FTE_SET(headers_c, ethertype, 0xffff);
@@ -173,11 +173,11 @@ set_tcp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v,
__be16 pdst_m, __be16 pdst_v)
{
if (psrc_m) {
- MLX5E_FTE_SET(headers_c, tcp_sport, 0xffff);
+ MLX5E_FTE_SET(headers_c, tcp_sport, ntohs(psrc_m));
MLX5E_FTE_SET(headers_v, tcp_sport, ntohs(psrc_v));
}
if (pdst_m) {
- MLX5E_FTE_SET(headers_c, tcp_dport, 0xffff);
+ MLX5E_FTE_SET(headers_c, tcp_dport, ntohs(pdst_m));
MLX5E_FTE_SET(headers_v, tcp_dport, ntohs(pdst_v));
}
@@ -190,12 +190,12 @@ set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v,
__be16 pdst_m, __be16 pdst_v)
{
if (psrc_m) {
- MLX5E_FTE_SET(headers_c, udp_sport, 0xffff);
+ MLX5E_FTE_SET(headers_c, udp_sport, ntohs(psrc_m));
MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v));
}
if (pdst_m) {
- MLX5E_FTE_SET(headers_c, udp_dport, 0xffff);
+ MLX5E_FTE_SET(headers_c, udp_dport, ntohs(pdst_m));
MLX5E_FTE_SET(headers_v, udp_dport, ntohs(pdst_v));
}
@@ -508,26 +508,14 @@ static int validate_tcpudp4(struct ethtool_rx_flow_spec *fs)
if (l4_mask->tos)
return -EINVAL;
- if (l4_mask->ip4src) {
- if (!all_ones(l4_mask->ip4src))
- return -EINVAL;
+ if (l4_mask->ip4src)
ntuples++;
- }
- if (l4_mask->ip4dst) {
- if (!all_ones(l4_mask->ip4dst))
- return -EINVAL;
+ if (l4_mask->ip4dst)
ntuples++;
- }
- if (l4_mask->psrc) {
- if (!all_ones(l4_mask->psrc))
- return -EINVAL;
+ if (l4_mask->psrc)
ntuples++;
- }
- if (l4_mask->pdst) {
- if (!all_ones(l4_mask->pdst))
- return -EINVAL;
+ if (l4_mask->pdst)
ntuples++;
- }
/* Flow is TCP/UDP */
return ++ntuples;
}
@@ -540,16 +528,10 @@ static int validate_ip4(struct ethtool_rx_flow_spec *fs)
if (l3_mask->l4_4_bytes || l3_mask->tos ||
fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4)
return -EINVAL;
- if (l3_mask->ip4src) {
- if (!all_ones(l3_mask->ip4src))
- return -EINVAL;
+ if (l3_mask->ip4src)
ntuples++;
- }
- if (l3_mask->ip4dst) {
- if (!all_ones(l3_mask->ip4dst))
- return -EINVAL;
+ if (l3_mask->ip4dst)
ntuples++;
- }
if (l3_mask->proto)
ntuples++;
/* Flow is IPv4 */
@@ -588,16 +570,10 @@ static int validate_tcpudp6(struct ethtool_rx_flow_spec *fs)
if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6dst))
ntuples++;
- if (l4_mask->psrc) {
- if (!all_ones(l4_mask->psrc))
- return -EINVAL;
+ if (l4_mask->psrc)
ntuples++;
- }
- if (l4_mask->pdst) {
- if (!all_ones(l4_mask->pdst))
- return -EINVAL;
+ if (l4_mask->pdst)
ntuples++;
- }
/* Flow is TCP/UDP */
return ++ntuples;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f291d1bf1558..1243edbedc9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -272,10 +272,9 @@ static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv)
mlx5e_stats_grps[i].update_stats(priv);
}
-void mlx5e_update_stats_work(struct work_struct *work)
+static void mlx5e_update_stats_work(struct work_struct *work)
{
- struct delayed_work *dwork = to_delayed_work(work);
- struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv,
+ struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
update_stats_work);
mutex_lock(&priv->state_lock);
@@ -283,6 +282,17 @@ void mlx5e_update_stats_work(struct work_struct *work)
mutex_unlock(&priv->state_lock);
}
+void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
+{
+ if (!priv->profile->update_stats)
+ return;
+
+ if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state)))
+ return;
+
+ queue_work(priv->wq, &priv->update_stats_work);
+}
+
static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv,
enum mlx5_dev_event event, unsigned long param)
{
@@ -929,6 +939,9 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
if (params->rx_dim_enabled)
__set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
+ if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE)
+ __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
+
return 0;
err_destroy_rq:
@@ -1786,7 +1799,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c,
struct mlx5e_channel_param *cparam)
{
struct mlx5e_priv *priv = c->priv;
- int err, tc, max_nch = priv->profile->max_nch(priv->mdev);
+ int err, tc, max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
for (tc = 0; tc < params->num_tc; tc++) {
int txq_ix = c->ix + tc * max_nch;
@@ -2426,7 +2439,7 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv)
int err;
int ix;
- for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+ for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) {
rqt = &priv->direct_tir[ix].rqt;
err = mlx5e_create_rqt(priv, 1 /*size */, rqt);
if (err)
@@ -2447,7 +2460,7 @@ void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv)
{
int i;
- for (i = 0; i < priv->profile->max_nch(priv->mdev); i++)
+ for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++)
mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
}
@@ -2541,7 +2554,7 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp);
}
- for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+ for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) {
struct mlx5e_redirect_rqt_param direct_rrp = {
.is_rss = false,
{
@@ -2742,7 +2755,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
goto free_in;
}
- for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) {
+ for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) {
err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn,
in, inlen);
if (err)
@@ -2842,7 +2855,7 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
static void mlx5e_build_tc2txq_maps(struct mlx5e_priv *priv)
{
- int max_nch = priv->profile->max_nch(priv->mdev);
+ int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
int i, tc;
for (i = 0; i < max_nch; i++)
@@ -2954,9 +2967,7 @@ int mlx5e_open_locked(struct net_device *netdev)
if (priv->profile->update_carrier)
priv->profile->update_carrier(priv);
- if (priv->profile->update_stats)
- queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
-
+ mlx5e_queue_update_stats(priv);
return 0;
err_clear_state_opened_flag:
@@ -3049,8 +3060,8 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev,
return mlx5e_alloc_cq_common(mdev, param, cq);
}
-static int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
- struct mlx5e_rq *drop_rq)
+int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
+ struct mlx5e_rq *drop_rq)
{
struct mlx5_core_dev *mdev = priv->mdev;
struct mlx5e_cq_param cq_param = {};
@@ -3094,7 +3105,7 @@ err_free_cq:
return err;
}
-static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
+void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
{
mlx5e_destroy_rq(drop_rq);
mlx5e_free_rq(drop_rq);
@@ -3175,7 +3186,7 @@ static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *t
MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
}
-int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
+int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
{
struct mlx5e_tir *tir;
void *tirc;
@@ -3202,7 +3213,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv)
}
}
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
goto out;
for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) {
@@ -3236,7 +3247,7 @@ err_destroy_inner_tirs:
int mlx5e_create_direct_tirs(struct mlx5e_priv *priv)
{
- int nch = priv->profile->max_nch(priv->mdev);
+ int nch = mlx5e_get_netdev_max_channels(priv->netdev);
struct mlx5e_tir *tir;
void *tirc;
int inlen;
@@ -3273,14 +3284,14 @@ err_destroy_ch_tirs:
return err;
}
-void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
+void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
{
int i;
for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]);
- if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
+ if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev))
return;
for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++)
@@ -3289,7 +3300,7 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv)
void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv)
{
- int nch = priv->profile->max_nch(priv->mdev);
+ int nch = mlx5e_get_netdev_max_channels(priv->netdev);
int i;
for (i = 0; i < nch; i++)
@@ -3381,9 +3392,6 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
- return -EOPNOTSUPP;
-
switch (type) {
case TC_SETUP_CLSFLOWER:
return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
@@ -3438,7 +3446,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
struct mlx5e_pport_stats *pstats = &priv->stats.pport;
/* update HW stats in background for next time */
- queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
+ mlx5e_queue_update_stats(priv);
if (mlx5e_is_uplink_rep(priv)) {
stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
@@ -4480,6 +4488,31 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo
return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
}
+void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
+ struct mlx5e_params *params)
+{
+ /* Prefer Striding RQ, unless any of the following holds:
+ * - Striding RQ configuration is not possible/supported.
+ * - Slow PCI heuristic.
+ * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
+ */
+ if (!slow_pci_heuristic(mdev) &&
+ mlx5e_striding_rq_possible(mdev, params) &&
+ (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ||
+ !mlx5e_rx_is_linear_skb(mdev, params)))
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
+ mlx5e_set_rq_type(mdev, params);
+ mlx5e_init_rq_type_params(mdev, params);
+}
+
+void mlx5e_build_rss_params(struct mlx5e_params *params)
+{
+ params->rss_hfunc = ETH_RSS_HASH_XOR;
+ netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
+ mlx5e_build_default_indir_rqt(params->indirection_rqt,
+ MLX5E_INDIR_RQT_SIZE, params->num_channels);
+}
+
void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
u16 max_channels, u16 mtu)
@@ -4503,20 +4536,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
/* RQ */
- /* Prefer Striding RQ, unless any of the following holds:
- * - Striding RQ configuration is not possible/supported.
- * - Slow PCI heuristic.
- * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
- */
- if (!slow_pci_heuristic(mdev) &&
- mlx5e_striding_rq_possible(mdev, params) &&
- (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) ||
- !mlx5e_rx_is_linear_skb(mdev, params)))
- MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
- mlx5e_set_rq_type(mdev, params);
- mlx5e_init_rq_type_params(mdev, params);
+ mlx5e_build_rq_params(mdev, params);
/* HW LRO */
@@ -4539,37 +4562,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev);
/* RSS */
- params->rss_hfunc = ETH_RSS_HASH_XOR;
- netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key));
- mlx5e_build_default_indir_rqt(params->indirection_rqt,
- MLX5E_INDIR_RQT_SIZE, max_channels);
-}
-
-static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
-{
- struct mlx5e_priv *priv = netdev_priv(netdev);
-
- priv->mdev = mdev;
- priv->netdev = netdev;
- priv->profile = profile;
- priv->ppriv = ppriv;
- priv->msglevel = MLX5E_MSG_LEVEL;
- priv->max_opened_tc = 1;
-
- mlx5e_build_nic_params(mdev, &priv->channels.params,
- profile->max_nch(mdev), netdev->mtu);
-
- mutex_init(&priv->state_lock);
-
- INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
- INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
- INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
- INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
-
- mlx5e_timestamp_init(priv);
+ mlx5e_build_rss_params(params);
}
static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
@@ -4707,7 +4700,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
mlx5e_tls_build_netdev(priv);
}
-static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
+void mlx5e_create_q_counters(struct mlx5e_priv *priv)
{
struct mlx5_core_dev *mdev = priv->mdev;
int err;
@@ -4725,7 +4718,7 @@ static void mlx5e_create_q_counters(struct mlx5e_priv *priv)
}
}
-static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
+void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
{
if (priv->q_counter)
mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
@@ -4734,15 +4727,23 @@ static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter);
}
-static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
int err;
- mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv);
+ err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
+ if (err)
+ return err;
+
+ mlx5e_build_nic_params(mdev, &priv->channels.params,
+ mlx5e_get_netdev_max_channels(netdev), netdev->mtu);
+
+ mlx5e_timestamp_init(priv);
+
err = mlx5e_ipsec_init(priv);
if (err)
mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err);
@@ -4751,12 +4752,15 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
mlx5e_build_nic_netdev(netdev);
mlx5e_build_tc2txq_maps(priv);
+
+ return 0;
}
static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
{
mlx5e_tls_cleanup(priv);
mlx5e_ipsec_cleanup(priv);
+ mlx5e_netdev_cleanup(priv->netdev, priv);
}
static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
@@ -4764,15 +4768,23 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
struct mlx5_core_dev *mdev = priv->mdev;
int err;
+ mlx5e_create_q_counters(priv);
+
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ goto err_destroy_q_counters;
+ }
+
err = mlx5e_create_indirect_rqt(priv);
if (err)
- return err;
+ goto err_close_drop_rq;
err = mlx5e_create_direct_rqts(priv);
if (err)
goto err_destroy_indirect_rqts;
- err = mlx5e_create_indirect_tirs(priv);
+ err = mlx5e_create_indirect_tirs(priv, true);
if (err)
goto err_destroy_direct_rqts;
@@ -4797,11 +4809,15 @@ err_destroy_flow_steering:
err_destroy_direct_tirs:
mlx5e_destroy_direct_tirs(priv);
err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv, true);
err_destroy_direct_rqts:
mlx5e_destroy_direct_rqts(priv);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
+err_destroy_q_counters:
+ mlx5e_destroy_q_counters(priv);
return err;
}
@@ -4810,9 +4826,11 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
mlx5e_tc_nic_cleanup(priv);
mlx5e_destroy_flow_steering(priv);
mlx5e_destroy_direct_tirs(priv);
- mlx5e_destroy_indirect_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv, true);
mlx5e_destroy_direct_rqts(priv);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_destroy_q_counters(priv);
}
static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
@@ -4905,7 +4923,6 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
.enable = mlx5e_nic_enable,
.disable = mlx5e_nic_disable,
.update_stats = mlx5e_update_ndo_stats,
- .max_nch = mlx5e_get_max_num_channels,
.update_carrier = mlx5e_update_carrier,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe,
.rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
@@ -4914,13 +4931,53 @@ static const struct mlx5e_profile mlx5e_nic_profile = {
/* mlx5e generic netdev management API (move to en_common.c) */
+/* mlx5e_netdev_init/cleanup must be called from profile->init/cleanup callbacks */
+int mlx5e_netdev_init(struct net_device *netdev,
+ struct mlx5e_priv *priv,
+ struct mlx5_core_dev *mdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
+{
+ /* priv init */
+ priv->mdev = mdev;
+ priv->netdev = netdev;
+ priv->profile = profile;
+ priv->ppriv = ppriv;
+ priv->msglevel = MLX5E_MSG_LEVEL;
+ priv->max_opened_tc = 1;
+
+ mutex_init(&priv->state_lock);
+ INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
+ INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
+ INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
+ INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
+
+ priv->wq = create_singlethread_workqueue("mlx5e");
+ if (!priv->wq)
+ return -ENOMEM;
+
+ /* netdev init */
+ netif_carrier_off(netdev);
+
+#ifdef CONFIG_MLX5_EN_ARFS
+ netdev->rx_cpu_rmap = mdev->rmap;
+#endif
+
+ return 0;
+}
+
+void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv)
+{
+ destroy_workqueue(priv->wq);
+}
+
struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
const struct mlx5e_profile *profile,
+ int nch,
void *ppriv)
{
- int nch = profile->max_nch(mdev);
struct net_device *netdev;
- struct mlx5e_priv *priv;
+ int err;
netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv),
nch * profile->max_tc,
@@ -4930,25 +4987,15 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
return NULL;
}
-#ifdef CONFIG_MLX5_EN_ARFS
- netdev->rx_cpu_rmap = mdev->rmap;
-#endif
-
- profile->init(mdev, netdev, profile, ppriv);
-
- netif_carrier_off(netdev);
-
- priv = netdev_priv(netdev);
-
- priv->wq = create_singlethread_workqueue("mlx5e");
- if (!priv->wq)
- goto err_cleanup_nic;
+ err = profile->init(mdev, netdev, profile, ppriv);
+ if (err) {
+ mlx5_core_err(mdev, "failed to init mlx5e profile %d\n", err);
+ goto err_free_netdev;
+ }
return netdev;
-err_cleanup_nic:
- if (profile->cleanup)
- profile->cleanup(priv);
+err_free_netdev:
free_netdev(netdev);
return NULL;
@@ -4956,7 +5003,6 @@ err_cleanup_nic:
int mlx5e_attach_netdev(struct mlx5e_priv *priv)
{
- struct mlx5_core_dev *mdev = priv->mdev;
const struct mlx5e_profile *profile;
int err;
@@ -4967,28 +5013,16 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
if (err)
goto out;
- mlx5e_create_q_counters(priv);
-
- err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
- if (err) {
- mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
- goto err_destroy_q_counters;
- }
-
err = profile->init_rx(priv);
if (err)
- goto err_close_drop_rq;
+ goto err_cleanup_tx;
if (profile->enable)
profile->enable(priv);
return 0;
-err_close_drop_rq:
- mlx5e_close_drop_rq(&priv->drop_rq);
-
-err_destroy_q_counters:
- mlx5e_destroy_q_counters(priv);
+err_cleanup_tx:
profile->cleanup_tx(priv);
out:
@@ -5006,10 +5040,8 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
flush_workqueue(priv->wq);
profile->cleanup_rx(priv);
- mlx5e_close_drop_rq(&priv->drop_rq);
- mlx5e_destroy_q_counters(priv);
profile->cleanup_tx(priv);
- cancel_delayed_work_sync(&priv->update_stats_work);
+ cancel_work_sync(&priv->update_stats_work);
}
void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
@@ -5017,7 +5049,6 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
const struct mlx5e_profile *profile = priv->profile;
struct net_device *netdev = priv->netdev;
- destroy_workqueue(priv->wq);
if (profile->cleanup)
profile->cleanup(priv);
free_netdev(netdev);
@@ -5066,6 +5097,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
void *rpriv = NULL;
void *priv;
int err;
+ int nch;
err = mlx5e_check_required_hca_cap(mdev);
if (err)
@@ -5081,7 +5113,8 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
}
#endif
- netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv);
+ nch = mlx5e_get_max_num_channels(mdev);
+ netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, rpriv);
if (!netdev) {
mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
goto err_free_rpriv;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index c9cc9747d21d..c3c657548824 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -46,8 +46,6 @@
#define MLX5E_REP_PARAMS_LOG_SQ_SIZE \
max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)
-#define MLX5E_REP_PARAMS_LOG_RQ_SIZE \
- max(0x6, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)
static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
@@ -182,12 +180,108 @@ static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset)
}
}
+static void mlx5e_rep_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_ringparam(priv, param);
+}
+
+static int mlx5e_rep_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *param)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ return mlx5e_ethtool_set_ringparam(priv, param);
+}
+
+static int mlx5e_replace_rep_vport_rx_rule(struct mlx5e_priv *priv,
+ struct mlx5_flow_destination *dest)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5e_rep_priv *rpriv = priv->ppriv;
+ struct mlx5_eswitch_rep *rep = rpriv->rep;
+ struct mlx5_flow_handle *flow_rule;
+
+ flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
+ rep->vport,
+ dest);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+
+ mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ rpriv->vport_rx_rule = flow_rule;
+ return 0;
+}
+
+static void mlx5e_rep_get_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+
+ mlx5e_ethtool_get_channels(priv, ch);
+}
+
+static int mlx5e_rep_set_channels(struct net_device *dev,
+ struct ethtool_channels *ch)
+{
+ struct mlx5e_priv *priv = netdev_priv(dev);
+ u16 curr_channels_amount = priv->channels.params.num_channels;
+ u32 new_channels_amount = ch->combined_count;
+ struct mlx5_flow_destination new_dest;
+ int err = 0;
+
+ err = mlx5e_ethtool_set_channels(priv, ch);
+ if (err)
+ return err;
+
+ if (curr_channels_amount == 1 && new_channels_amount > 1) {
+ new_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ new_dest.ft = priv->fs.ttc.ft.t;
+ } else if (new_channels_amount == 1 && curr_channels_amount > 1) {
+ new_dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ new_dest.tir_num = priv->direct_tir[0].tirn;
+ } else {
+ return 0;
+ }
+
+ err = mlx5e_replace_rep_vport_rx_rule(priv, &new_dest);
+ if (err) {
+ netdev_warn(priv->netdev, "Failed to update vport rx rule, when going from (%d) channels to (%d) channels\n",
+ curr_channels_amount, new_channels_amount);
+ return err;
+ }
+
+ return 0;
+}
+
+static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_rxfh_key_size(priv);
+}
+
+static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+
+ return mlx5e_ethtool_get_rxfh_indir_size(priv);
+}
+
static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
.get_drvinfo = mlx5e_rep_get_drvinfo,
.get_link = ethtool_op_get_link,
.get_strings = mlx5e_rep_get_strings,
.get_sset_count = mlx5e_rep_get_sset_count,
.get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
+ .get_ringparam = mlx5e_rep_get_ringparam,
+ .set_ringparam = mlx5e_rep_set_ringparam,
+ .get_channels = mlx5e_rep_get_channels,
+ .set_channels = mlx5e_rep_set_channels,
+ .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size,
+ .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
};
int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
@@ -759,9 +853,6 @@ static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
- return -EOPNOTSUPP;
-
switch (type) {
case TC_SETUP_CLSFLOWER:
return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS);
@@ -775,9 +866,6 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
{
struct mlx5e_priv *priv = cb_priv;
- if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data))
- return -EOPNOTSUPP;
-
switch (type) {
case TC_SETUP_CLSFLOWER:
return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS);
@@ -898,8 +986,7 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
struct mlx5e_priv *priv = netdev_priv(dev);
/* update HW stats in background for next time */
- queue_delayed_work(priv->wq, &priv->update_stats_work, 0);
-
+ mlx5e_queue_update_stats(priv);
memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
}
@@ -934,16 +1021,20 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev,
params->hard_mtu = MLX5E_ETH_HARD_MTU;
params->sw_mtu = mtu;
params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE;
- params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC;
- params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE;
+ /* RQ */
+ mlx5e_build_rq_params(mdev, params);
+
+ /* CQ moderation params */
params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
params->num_tc = 1;
- params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
+
+ /* RSS */
+ mlx5e_build_rss_params(params);
}
static void mlx5e_build_rep_netdev(struct net_device *netdev)
@@ -963,6 +1054,16 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL;
netdev->hw_features |= NETIF_F_HW_TC;
+ netdev->hw_features |= NETIF_F_SG;
+ netdev->hw_features |= NETIF_F_IP_CSUM;
+ netdev->hw_features |= NETIF_F_IPV6_CSUM;
+ netdev->hw_features |= NETIF_F_GRO;
+ netdev->hw_features |= NETIF_F_TSO;
+ netdev->hw_features |= NETIF_F_TSO6;
+ netdev->hw_features |= NETIF_F_RXCSUM;
+
+ netdev->features |= netdev->hw_features;
+
eth_hw_addr_random(netdev);
netdev->min_mtu = ETH_MIN_MTU;
@@ -970,63 +1071,127 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
}
-static void mlx5e_init_rep(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
+ int err;
- priv->mdev = mdev;
- priv->netdev = netdev;
- priv->profile = profile;
- priv->ppriv = ppriv;
-
- mutex_init(&priv->state_lock);
+ err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
+ if (err)
+ return err;
- INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
- priv->channels.params.num_channels = profile->max_nch(mdev);
+ priv->channels.params.num_channels =
+ mlx5e_get_netdev_max_channels(netdev);
mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu);
mlx5e_build_rep_netdev(netdev);
mlx5e_timestamp_init(priv);
+
+ return 0;
}
-static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+static void mlx5e_cleanup_rep(struct mlx5e_priv *priv)
+{
+ mlx5e_netdev_cleanup(priv->netdev, priv);
+}
+
+static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv)
+{
+ struct ttc_params ttc_params = {};
+ int tt, err;
+
+ priv->fs.ns = mlx5_get_flow_namespace(priv->mdev,
+ MLX5_FLOW_NAMESPACE_KERNEL);
+
+ /* The inner_ttc in the ttc params is intentionally not set */
+ ttc_params.any_tt_tirn = priv->direct_tir[0].tirn;
+ mlx5e_set_ttc_ft_params(&ttc_params);
+ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
+ ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn;
+
+ err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc);
+ if (err) {
+ netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err);
+ return err;
+ }
+ return 0;
+}
+
+static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_flow_handle *flow_rule;
+ struct mlx5_flow_destination dest;
+
+ dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
+ dest.tir_num = priv->direct_tir[0].tirn;
+ flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
+ rep->vport,
+ &dest);
+ if (IS_ERR(flow_rule))
+ return PTR_ERR(flow_rule);
+ rpriv->vport_rx_rule = flow_rule;
+ return 0;
+}
+
+static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+{
+ struct mlx5_core_dev *mdev = priv->mdev;
int err;
mlx5e_init_l2_addr(priv);
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ return err;
+ }
+
+ err = mlx5e_create_indirect_rqt(priv);
+ if (err)
+ goto err_close_drop_rq;
+
err = mlx5e_create_direct_rqts(priv);
if (err)
- return err;
+ goto err_destroy_indirect_rqts;
- err = mlx5e_create_direct_tirs(priv);
+ err = mlx5e_create_indirect_tirs(priv, false);
if (err)
goto err_destroy_direct_rqts;
- flow_rule = mlx5_eswitch_create_vport_rx_rule(esw,
- rep->vport,
- priv->direct_tir[0].tirn);
- if (IS_ERR(flow_rule)) {
- err = PTR_ERR(flow_rule);
+ err = mlx5e_create_direct_tirs(priv);
+ if (err)
+ goto err_destroy_indirect_tirs;
+
+ err = mlx5e_create_rep_ttc_table(priv);
+ if (err)
goto err_destroy_direct_tirs;
- }
- rpriv->vport_rx_rule = flow_rule;
+
+ err = mlx5e_create_rep_vport_rx_rule(priv);
+ if (err)
+ goto err_destroy_ttc_table;
return 0;
+err_destroy_ttc_table:
+ mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
err_destroy_direct_tirs:
mlx5e_destroy_direct_tirs(priv);
+err_destroy_indirect_tirs:
+ mlx5e_destroy_indirect_tirs(priv, false);
err_destroy_direct_rqts:
mlx5e_destroy_direct_rqts(priv);
+err_destroy_indirect_rqts:
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
return err;
}
@@ -1035,8 +1200,12 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
struct mlx5e_rep_priv *rpriv = priv->ppriv;
mlx5_del_flow_rules(rpriv->vport_rx_rule);
+ mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
mlx5e_destroy_direct_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv, false);
mlx5e_destroy_direct_rqts(priv);
+ mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_close_drop_rq(&priv->drop_rq);
}
static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
@@ -1051,23 +1220,17 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
return 0;
}
-static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev)
-{
-#define MLX5E_PORT_REPRESENTOR_NCH 1
- return MLX5E_PORT_REPRESENTOR_NCH;
-}
-
static const struct mlx5e_profile mlx5e_rep_profile = {
.init = mlx5e_init_rep,
+ .cleanup = mlx5e_cleanup_rep,
.init_rx = mlx5e_init_rep_rx,
.cleanup_rx = mlx5e_cleanup_rep_rx,
.init_tx = mlx5e_init_rep_tx,
.cleanup_tx = mlx5e_cleanup_nic_tx,
.update_stats = mlx5e_rep_update_hw_counters,
- .max_nch = mlx5e_get_rep_max_num_channels,
.update_carrier = NULL,
.rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep,
- .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */,
+ .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq,
.max_tc = 1,
};
@@ -1127,13 +1290,14 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
struct mlx5e_rep_priv *rpriv;
struct net_device *netdev;
struct mlx5e_priv *upriv;
- int err;
+ int nch, err;
rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
if (!rpriv)
return -ENOMEM;
- netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, rpriv);
+ nch = mlx5e_get_max_num_channels(dev);
+ netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, nch, rpriv);
if (!netdev) {
pr_warn("Failed to create representor netdev for vport %d\n",
rep->vport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 00172dee5339..2f7fb8de6967 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -37,6 +37,7 @@
#include <net/busy_poll.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
+#include <net/inet_ecn.h>
#include "en.h"
#include "en_tc.h"
#include "eswitch.h"
@@ -688,12 +689,29 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe,
skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht);
}
-static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth)
+static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth,
+ __be16 *proto)
{
- __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto;
+ *proto = ((struct ethhdr *)skb->data)->h_proto;
+ *proto = __vlan_get_protocol(skb, *proto, network_depth);
+ return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6));
+}
+
+static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb)
+{
+ int network_depth = 0;
+ __be16 proto;
+ void *ip;
+ int rc;
- ethertype = __vlan_get_protocol(skb, ethertype, network_depth);
- return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6));
+ if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto)))
+ return;
+
+ ip = skb->data + network_depth;
+ rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) :
+ IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip));
+
+ rq->stats->ecn_mark += !!rc;
}
static __be32 mlx5e_get_fcs(struct sk_buff *skb)
@@ -735,6 +753,14 @@ static __be32 mlx5e_get_fcs(struct sk_buff *skb)
return fcs_bytes;
}
+static u8 get_ip_proto(struct sk_buff *skb, __be16 proto)
+{
+ void *ip_p = skb->data + sizeof(struct ethhdr);
+
+ return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol :
+ ((struct ipv6hdr *)ip_p)->nexthdr;
+}
+
static inline void mlx5e_handle_csum(struct net_device *netdev,
struct mlx5_cqe64 *cqe,
struct mlx5e_rq *rq,
@@ -743,6 +769,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
{
struct mlx5e_rq_stats *stats = rq->stats;
int network_depth = 0;
+ __be16 proto;
if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
goto csum_none;
@@ -753,7 +780,13 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
return;
}
- if (likely(is_last_ethertype_ip(skb, &network_depth))) {
+ if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)))
+ goto csum_unnecessary;
+
+ if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) {
+ if (unlikely(get_ip_proto(skb, proto) == IPPROTO_SCTP))
+ goto csum_unnecessary;
+
skb->ip_summed = CHECKSUM_COMPLETE;
skb->csum = csum_unfold((__force __sum16)cqe->check_sum);
if (network_depth > ETH_HLEN)
@@ -771,8 +804,10 @@ static inline void mlx5e_handle_csum(struct net_device *netdev,
return;
}
+csum_unnecessary:
if (likely((cqe->hds_ip_ext & CQE_L3_OK) &&
- (cqe->hds_ip_ext & CQE_L4_OK))) {
+ ((cqe->hds_ip_ext & CQE_L4_OK) ||
+ (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (cqe_is_tunneled(cqe)) {
skb->csum_level = 1;
@@ -788,6 +823,8 @@ csum_none:
stats->csum_none++;
}
+#define MLX5E_CE_BIT_MASK 0x80
+
static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
u32 cqe_bcnt,
struct mlx5e_rq *rq,
@@ -832,6 +869,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK;
mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg);
+ /* checking CE bit in cqe - MSB in ml_path field */
+ if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK))
+ mlx5e_enable_ecn(rq, skb);
+
skb->protocol = eth_type_trans(skb, netdev);
}
@@ -1228,8 +1269,8 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
u32 cqe_bcnt,
struct sk_buff *skb)
{
- struct mlx5e_rq_stats *stats = rq->stats;
struct hwtstamp_config *tstamp;
+ struct mlx5e_rq_stats *stats;
struct net_device *netdev;
struct mlx5e_priv *priv;
char *pseudo_header;
@@ -1252,6 +1293,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
priv = mlx5i_epriv(netdev);
tstamp = &priv->tstamp;
+ stats = &priv->channel_stats[rq->ix].rq;
g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 6839481f7697..1e55b9c27ffc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -53,6 +53,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) },
@@ -92,6 +93,7 @@ static const struct counter_desc sw_stats_desc[] = {
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
+ { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) },
{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) },
@@ -131,7 +133,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
memset(s, 0, sizeof(*s));
- for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) {
+ for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) {
struct mlx5e_channel_stats *channel_stats =
&priv->channel_stats[i];
struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq;
@@ -144,6 +146,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->rx_bytes += rq_stats->bytes;
s->rx_lro_packets += rq_stats->lro_packets;
s->rx_lro_bytes += rq_stats->lro_bytes;
+ s->rx_ecn_mark += rq_stats->ecn_mark;
s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets;
s->rx_csum_none += rq_stats->csum_none;
s->rx_csum_complete += rq_stats->csum_complete;
@@ -168,6 +171,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->rx_cache_busy += rq_stats->cache_busy;
s->rx_cache_waive += rq_stats->cache_waive;
s->rx_congst_umr += rq_stats->congst_umr;
+ s->rx_arfs_err += rq_stats->arfs_err;
s->ch_events += ch_stats->events;
s->ch_poll += ch_stats->poll;
s->ch_arm += ch_stats->arm;
@@ -610,46 +614,82 @@ static const struct counter_desc pport_phy_statistical_stats_desc[] = {
{ "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) },
};
-#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc)
+static const struct counter_desc
+pport_phy_statistical_err_lanes_stats_desc[] = {
+ { "rx_err_lane_0_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane0) },
+ { "rx_err_lane_1_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane1) },
+ { "rx_err_lane_2_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane2) },
+ { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) },
+};
+
+#define NUM_PPORT_PHY_STATISTICAL_COUNTERS \
+ ARRAY_SIZE(pport_phy_statistical_stats_desc)
+#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \
+ ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc)
static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
+ int num_stats;
+
/* "1" for link_down_events special counter */
- return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ?
- NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1;
+ num_stats = 1;
+
+ num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ?
+ NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0;
+
+ num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ?
+ NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0;
+
+ return num_stats;
}
static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data,
int idx)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
int i;
strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy");
- if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
return idx;
for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
strcpy(data + (idx++) * ETH_GSTRING_LEN,
pport_phy_statistical_stats_desc[i].format);
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+ strcpy(data + (idx++) * ETH_GSTRING_LEN,
+ pport_phy_statistical_err_lanes_stats_desc[i].format);
+
return idx;
}
static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
int i;
/* link_down_events_phy has special handling since it is not stored in __be64 format */
data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters,
counter_set.phys_layer_cntrs.link_down_events);
- if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group))
+ if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group))
return idx;
for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++)
data[idx++] =
MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
pport_phy_statistical_stats_desc, i);
+
+ if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters))
+ for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++)
+ data[idx++] =
+ MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters,
+ pport_phy_statistical_err_lanes_stats_desc,
+ i);
return idx;
}
@@ -1144,6 +1184,7 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
@@ -1158,6 +1199,7 @@ static const struct counter_desc rq_stats_desc[] = {
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
+ { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
};
static const struct counter_desc sq_stats_desc[] = {
@@ -1211,7 +1253,7 @@ static const struct counter_desc ch_stats_desc[] = {
static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
{
- int max_nch = priv->profile->max_nch(priv->mdev);
+ int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
return (NUM_RQ_STATS * max_nch) +
(NUM_CH_STATS * max_nch) +
@@ -1223,7 +1265,7 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv)
static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
int idx)
{
- int max_nch = priv->profile->max_nch(priv->mdev);
+ int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
int i, j, tc;
for (i = 0; i < max_nch; i++)
@@ -1258,7 +1300,7 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data,
static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data,
int idx)
{
- int max_nch = priv->profile->max_nch(priv->mdev);
+ int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
int i, j, tc;
for (i = 0; i < max_nch; i++)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index a4c035aedd46..77f74ce11280 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -66,6 +66,7 @@ struct mlx5e_sw_stats {
u64 tx_nop;
u64 rx_lro_packets;
u64 rx_lro_bytes;
+ u64 rx_ecn_mark;
u64 rx_removed_vlan_packets;
u64 rx_csum_unnecessary;
u64 rx_csum_none;
@@ -105,6 +106,7 @@ struct mlx5e_sw_stats {
u64 rx_cache_busy;
u64 rx_cache_waive;
u64 rx_congst_umr;
+ u64 rx_arfs_err;
u64 ch_events;
u64 ch_poll;
u64 ch_arm;
@@ -184,6 +186,7 @@ struct mlx5e_rq_stats {
u64 csum_none;
u64 lro_packets;
u64 lro_bytes;
+ u64 ecn_mark;
u64 removed_vlan_packets;
u64 xdp_drop;
u64 xdp_redirect;
@@ -200,6 +203,7 @@ struct mlx5e_rq_stats {
u64 cache_busy;
u64 cache_waive;
u64 congst_umr;
+ u64 arfs_err;
};
struct mlx5e_sq_stats {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 85796727093e..608025ca5c04 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -61,6 +61,7 @@ struct mlx5_nic_flow_attr {
u32 hairpin_tirn;
u8 match_level;
struct mlx5_flow_table *hairpin_ft;
+ struct mlx5_fc *counter;
};
#define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1)
@@ -73,6 +74,7 @@ enum {
MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2),
MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3),
MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4),
+ MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5),
};
#define MLX5E_TC_MAX_SPLITS 1
@@ -81,7 +83,7 @@ struct mlx5e_tc_flow {
struct rhash_head node;
struct mlx5e_priv *priv;
u64 cookie;
- u8 flags;
+ u16 flags;
struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
struct list_head encap; /* flows sharing the same encap ID */
struct list_head mod_hdr; /* flows sharing the same mod hdr ID */
@@ -100,11 +102,6 @@ struct mlx5e_tc_flow_parse_attr {
int mirred_ifindex;
};
-enum {
- MLX5_HEADER_TYPE_VXLAN = 0x0,
- MLX5_HEADER_TYPE_NVGRE = 0x1,
-};
-
#define MLX5E_TC_TABLE_NUM_GROUPS 4
#define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
@@ -532,7 +529,8 @@ static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
#define UNKNOWN_MATCH_PRIO 8
static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
- struct mlx5_flow_spec *spec, u8 *match_prio)
+ struct mlx5_flow_spec *spec, u8 *match_prio,
+ struct netlink_ext_ack *extack)
{
void *headers_c, *headers_v;
u8 prio_val, prio_mask = 0;
@@ -540,8 +538,8 @@ static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
#ifdef CONFIG_MLX5_CORE_EN_DCB
if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
- netdev_warn(priv->netdev,
- "only PCP trust state supported for hairpin\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "only PCP trust state supported for hairpin");
return -EOPNOTSUPP;
}
#endif
@@ -557,8 +555,8 @@ static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
if (!vlan_present || !prio_mask) {
prio_val = UNKNOWN_MATCH_PRIO;
} else if (prio_mask != 0x7) {
- netdev_warn(priv->netdev,
- "masked priority match not supported for hairpin\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "masked priority match not supported for hairpin");
return -EOPNOTSUPP;
}
@@ -568,7 +566,8 @@ static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
- struct mlx5e_tc_flow_parse_attr *parse_attr)
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct netlink_ext_ack *extack)
{
int peer_ifindex = parse_attr->mirred_ifindex;
struct mlx5_hairpin_params params;
@@ -583,12 +582,13 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
- netdev_warn(priv->netdev, "hairpin is not supported\n");
+ NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
return -EOPNOTSUPP;
}
peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
- err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio);
+ err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
+ extack);
if (err)
return err;
hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
@@ -674,29 +674,28 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
}
}
-static struct mlx5_flow_handle *
+static int
mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_flow_destination dest[2] = {};
struct mlx5_flow_act flow_act = {
.action = attr->action,
- .has_flow_tag = true,
.flow_tag = attr->flow_tag,
- .encap_id = 0,
+ .reformat_id = 0,
+ .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND,
};
struct mlx5_fc *counter = NULL;
- struct mlx5_flow_handle *rule;
bool table_created = false;
int err, dest_ix = 0;
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) {
- err = mlx5e_hairpin_flow_add(priv, flow, parse_attr);
+ err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
if (err) {
- rule = ERR_PTR(err);
goto err_add_hairpin_flow;
}
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) {
@@ -716,22 +715,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
counter = mlx5_fc_create(dev, true);
if (IS_ERR(counter)) {
- rule = ERR_CAST(counter);
+ err = PTR_ERR(counter);
goto err_fc_create;
}
dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest[dest_ix].counter = counter;
+ dest[dest_ix].counter_id = mlx5_fc_id(counter);
dest_ix++;
+ attr->counter = counter;
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
flow_act.modify_id = attr->mod_hdr_id;
kfree(parse_attr->mod_hdr_actions);
- if (err) {
- rule = ERR_PTR(err);
+ if (err)
goto err_create_mod_hdr_id;
- }
}
if (IS_ERR_OR_NULL(priv->fs.tc.t)) {
@@ -753,9 +751,11 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
MLX5E_TC_TABLE_NUM_GROUPS,
MLX5E_TC_FT_LEVEL, 0);
if (IS_ERR(priv->fs.tc.t)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to create tc offload table\n");
netdev_err(priv->netdev,
"Failed to create tc offload table\n");
- rule = ERR_CAST(priv->fs.tc.t);
+ err = PTR_ERR(priv->fs.tc.t);
goto err_create_ft;
}
@@ -765,13 +765,15 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
if (attr->match_level != MLX5_MATCH_NONE)
parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
- rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
- &flow_act, dest, dest_ix);
+ flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec,
+ &flow_act, dest, dest_ix);
- if (IS_ERR(rule))
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
goto err_add_rule;
+ }
- return rule;
+ return 0;
err_add_rule:
if (table_created) {
@@ -787,7 +789,7 @@ err_fc_create:
if (flow->flags & MLX5E_TC_FLOW_HAIRPIN)
mlx5e_hairpin_flow_del(priv, flow);
err_add_hairpin_flow:
- return rule;
+ return err;
}
static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
@@ -796,7 +798,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
struct mlx5_fc *counter = NULL;
- counter = mlx5_flow_rule_counter(flow->rule[0]);
+ counter = attr->counter;
mlx5_del_flow_rules(flow->rule[0]);
mlx5_fc_destroy(priv->mdev, counter);
@@ -819,30 +821,119 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct ip_tunnel_info *tun_info,
struct net_device *mirred_dev,
struct net_device **encap_dev,
- struct mlx5e_tc_flow *flow);
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack);
static struct mlx5_flow_handle *
+mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *attr)
+{
+ struct mlx5_flow_handle *rule;
+
+ rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
+ if (IS_ERR(rule))
+ return rule;
+
+ if (attr->mirror_count) {
+ flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
+ if (IS_ERR(flow->rule[1])) {
+ mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+ return flow->rule[1];
+ }
+ }
+
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ return rule;
+}
+
+static void
+mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_esw_flow_attr *attr)
+{
+ flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
+
+ if (attr->mirror_count)
+ mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
+
+ mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+}
+
+static struct mlx5_flow_handle *
+mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_flow_spec *spec,
+ struct mlx5_esw_flow_attr *slow_attr)
+{
+ struct mlx5_flow_handle *rule;
+
+ memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+ slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+ slow_attr->mirror_count = 0,
+ slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN,
+
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
+ if (!IS_ERR(rule))
+ flow->flags |= MLX5E_TC_FLOW_SLOW;
+
+ return rule;
+}
+
+static void
+mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
+ struct mlx5e_tc_flow *flow,
+ struct mlx5_esw_flow_attr *slow_attr)
+{
+ memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr));
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
+ flow->flags &= ~MLX5E_TC_FLOW_SLOW;
+}
+
+static int
mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ u32 max_chain = mlx5_eswitch_get_chain_range(esw);
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ u16 max_prio = mlx5_eswitch_get_prio_range(esw);
struct net_device *out_dev, *encap_dev = NULL;
- struct mlx5_flow_handle *rule = NULL;
+ struct mlx5_fc *counter = NULL;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_priv *out_priv;
- int err;
+ int err = 0, encap_err = 0;
+
+ /* if prios are not supported, keep the old behaviour of using same prio
+ * for all offloaded rules.
+ */
+ if (!mlx5_eswitch_prios_supported(esw))
+ attr->prio = 1;
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+ if (attr->chain > max_chain) {
+ NL_SET_ERR_MSG(extack, "Requested chain is out of supported range");
+ err = -EOPNOTSUPP;
+ goto err_max_prio_chain;
+ }
+
+ if (attr->prio > max_prio) {
+ NL_SET_ERR_MSG(extack, "Requested priority is out of supported range");
+ err = -EOPNOTSUPP;
+ goto err_max_prio_chain;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
out_dev = __dev_get_by_index(dev_net(priv->netdev),
attr->parse_attr->mirred_ifindex);
- err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
- out_dev, &encap_dev, flow);
- if (err) {
- rule = ERR_PTR(err);
- if (err != -EAGAIN)
- goto err_attach_encap;
+ encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
+ out_dev, &encap_dev, flow,
+ extack);
+ if (encap_err && encap_err != -EAGAIN) {
+ err = encap_err;
+ goto err_attach_encap;
}
out_priv = netdev_priv(encap_dev);
rpriv = out_priv->ppriv;
@@ -851,49 +942,58 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
}
err = mlx5_eswitch_add_vlan_action(esw, attr);
- if (err) {
- rule = ERR_PTR(err);
+ if (err)
goto err_add_vlan;
- }
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
kfree(parse_attr->mod_hdr_actions);
- if (err) {
- rule = ERR_PTR(err);
+ if (err)
goto err_mod_hdr;
+ }
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ counter = mlx5_fc_create(esw->dev, true);
+ if (IS_ERR(counter)) {
+ err = PTR_ERR(counter);
+ goto err_create_counter;
}
+
+ attr->counter = counter;
}
- /* we get here if (1) there's no error (rule being null) or when
+ /* we get here if (1) there's no error or when
* (2) there's an encap action and we're on -EAGAIN (no valid neigh)
*/
- if (rule != ERR_PTR(-EAGAIN)) {
- rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
- if (IS_ERR(rule))
- goto err_add_rule;
-
- if (attr->mirror_count) {
- flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr);
- if (IS_ERR(flow->rule[1]))
- goto err_fwd_rule;
- }
+ if (encap_err == -EAGAIN) {
+ /* continue with goto slow path rule instead */
+ struct mlx5_esw_flow_attr slow_attr;
+
+ flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr);
+ } else {
+ flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
+ }
+
+ if (IS_ERR(flow->rule[0])) {
+ err = PTR_ERR(flow->rule[0]);
+ goto err_add_rule;
}
- return rule;
-err_fwd_rule:
- mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
- rule = flow->rule[1];
+ return 0;
+
err_add_rule:
+ mlx5_fc_destroy(esw->dev, counter);
+err_create_counter:
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
err_mod_hdr:
mlx5_eswitch_del_vlan_action(esw, attr);
err_add_vlan:
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
mlx5e_detach_encap(priv, flow);
err_attach_encap:
- return rule;
+err_max_prio_chain:
+ return err;
}
static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
@@ -901,36 +1001,43 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ struct mlx5_esw_flow_attr slow_attr;
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
- flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
- if (attr->mirror_count)
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ if (flow->flags & MLX5E_TC_FLOW_SLOW)
+ mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
+ else
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
}
mlx5_eswitch_del_vlan_action(esw, attr);
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) {
mlx5e_detach_encap(priv, flow);
kvfree(attr->parse_attr);
}
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
mlx5e_detach_mod_hdr(priv, flow);
+
+ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
+ mlx5_fc_destroy(esw->dev, attr->counter);
}
void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- struct mlx5_esw_flow_attr *esw_attr;
+ struct mlx5_esw_flow_attr slow_attr, *esw_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
struct mlx5e_tc_flow *flow;
int err;
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
- e->encap_size, e->encap_header,
- &e->encap_id);
+ err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+ e->encap_size, e->encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
if (err) {
mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n",
err);
@@ -942,26 +1049,20 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
list_for_each_entry(flow, &e->flows, encap) {
esw_attr = flow->esw_attr;
esw_attr->encap_id = e->encap_id;
- flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
- if (IS_ERR(flow->rule[0])) {
- err = PTR_ERR(flow->rule[0]);
+ spec = &esw_attr->parse_attr->spec;
+
+ /* update from slow path rule to encap rule */
+ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr);
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
err);
continue;
}
- if (esw_attr->mirror_count) {
- flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
- if (IS_ERR(flow->rule[1])) {
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr);
- err = PTR_ERR(flow->rule[1]);
- mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n",
- err);
- continue;
- }
- }
-
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr);
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */
+ flow->rule[0] = rule;
}
}
@@ -969,25 +1070,44 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
struct mlx5e_encap_entry *e)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ struct mlx5_esw_flow_attr slow_attr;
+ struct mlx5_flow_handle *rule;
+ struct mlx5_flow_spec *spec;
struct mlx5e_tc_flow *flow;
+ int err;
list_for_each_entry(flow, &e->flows, encap) {
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
- struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+ spec = &flow->esw_attr->parse_attr->spec;
- flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
- if (attr->mirror_count)
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr);
- mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
+ /* update from encap rule to slow path rule */
+ rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr);
+
+ if (IS_ERR(rule)) {
+ err = PTR_ERR(rule);
+ mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
+ err);
+ continue;
}
+
+ mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr);
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */
+ flow->rule[0] = rule;
}
if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
- mlx5_encap_dealloc(priv->mdev, e->encap_id);
+ mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
}
}
+static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
+{
+ if (flow->flags & MLX5E_TC_FLOW_ESWITCH)
+ return flow->esw_attr->counter;
+ else
+ return flow->nic_attr->counter;
+}
+
void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
{
struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
@@ -1013,7 +1133,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
continue;
list_for_each_entry(flow, &e->flows, encap) {
if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
- counter = mlx5_flow_rule_counter(flow->rule[0]);
+ counter = mlx5e_tc_get_counter(flow);
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
neigh_used = true;
@@ -1053,7 +1173,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
if (e->flags & MLX5_ENCAP_ENTRY_VALID)
- mlx5_encap_dealloc(priv->mdev, e->encap_id);
+ mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
hash_del_rcu(&e->encap_hlist);
kfree(e->encap_header);
@@ -1105,6 +1225,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f)
{
+ struct netlink_ext_ack *extack = f->common.extack;
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
outer_headers);
void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
@@ -1133,6 +1254,8 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
parse_vxlan_attr(spec, f);
else {
+ NL_SET_ERR_MSG_MOD(extack,
+ "port isn't an offloaded vxlan udp dport");
netdev_warn(priv->netdev,
"%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst));
return -EOPNOTSUPP;
@@ -1149,6 +1272,8 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
udp_sport, ntohs(key->src));
} else { /* udp dst port must be given */
vxlan_match_offload_err:
+ NL_SET_ERR_MSG_MOD(extack,
+ "IP tunnel decap offload supported only for vxlan, must set UDP dport");
netdev_warn(priv->netdev,
"IP tunnel decap offload supported only for vxlan, must set UDP dport\n");
return -EOPNOTSUPP;
@@ -1225,6 +1350,16 @@ vxlan_match_offload_err:
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl);
+
+ if (mask->ttl &&
+ !MLX5_CAP_ESW_FLOWTABLE_FDB
+ (priv->mdev,
+ ft_field_support.outer_ipv4_ttl)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on TTL is not supported");
+ return -EOPNOTSUPP;
+ }
+
}
/* Enforce DMAC when offloading incoming tunneled flows.
@@ -1247,6 +1382,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
struct tc_cls_flower_offload *f,
u8 *match_level)
{
+ struct netlink_ext_ack *extack = f->common.extack;
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
outer_headers);
void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
@@ -1277,6 +1413,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
BIT(FLOW_DISSECTOR_KEY_TCP) |
BIT(FLOW_DISSECTOR_KEY_IP) |
BIT(FLOW_DISSECTOR_KEY_ENC_IP))) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
f->dissector->used_keys);
return -EOPNOTSUPP;
@@ -1553,8 +1690,11 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
if (mask->ttl &&
!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
- ft_field_support.outer_ipv4_ttl))
+ ft_field_support.outer_ipv4_ttl)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Matching on TTL is not supported");
return -EOPNOTSUPP;
+ }
if (mask->tos || mask->ttl)
*match_level = MLX5_MATCH_L3;
@@ -1596,6 +1736,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
udp_dport, ntohs(key->dst));
break;
default:
+ NL_SET_ERR_MSG_MOD(extack,
+ "Only UDP and TCP transports are supported for L4 matching");
netdev_err(priv->netdev,
"Only UDP and TCP transport are supported\n");
return -EINVAL;
@@ -1632,6 +1774,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
struct mlx5_flow_spec *spec,
struct tc_cls_flower_offload *f)
{
+ struct netlink_ext_ack *extack = f->common.extack;
struct mlx5_core_dev *dev = priv->mdev;
struct mlx5_eswitch *esw = dev->priv.eswitch;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1646,6 +1789,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv,
if (rep->vport != FDB_UPLINK_VPORT &&
(esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
esw->offloads.inline_mode < match_level)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Flow is not offloaded due to min inline setting");
netdev_warn(priv->netdev,
"Flow is not offloaded due to min inline setting, required %d actual %d\n",
match_level, esw->offloads.inline_mode);
@@ -1747,7 +1892,8 @@ static struct mlx5_fields fields[] = {
*/
static int offload_pedit_fields(struct pedit_headers *masks,
struct pedit_headers *vals,
- struct mlx5e_tc_flow_parse_attr *parse_attr)
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct netlink_ext_ack *extack)
{
struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
int i, action_size, nactions, max_actions, first, last, next_z;
@@ -1786,11 +1932,15 @@ static int offload_pedit_fields(struct pedit_headers *masks,
continue;
if (s_mask && a_mask) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't set and add to the same HW field");
printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field);
return -EOPNOTSUPP;
}
if (nactions == max_actions) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "too many pedit actions, can't offload");
printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions);
return -EOPNOTSUPP;
}
@@ -1823,6 +1973,8 @@ static int offload_pedit_fields(struct pedit_headers *masks,
next_z = find_next_zero_bit(&mask, field_bsize, first);
last = find_last_bit(&mask, field_bsize);
if (first < next_z && next_z < last) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "rewrite of few sub-fields isn't supported");
printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
mask);
return -EOPNOTSUPP;
@@ -1881,7 +2033,8 @@ static const struct pedit_headers zero_masks = {};
static int parse_tc_pedit_action(struct mlx5e_priv *priv,
const struct tc_action *a, int namespace,
- struct mlx5e_tc_flow_parse_attr *parse_attr)
+ struct mlx5e_tc_flow_parse_attr *parse_attr,
+ struct netlink_ext_ack *extack)
{
struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks;
int nkeys, i, err = -EOPNOTSUPP;
@@ -1899,12 +2052,13 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv,
err = -EOPNOTSUPP; /* can't be all optimistic */
if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) {
- netdev_warn(priv->netdev, "legacy pedit isn't offloaded\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "legacy pedit isn't offloaded");
goto out_err;
}
if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) {
- netdev_warn(priv->netdev, "pedit cmd %d isn't offloaded\n", cmd);
+ NL_SET_ERR_MSG_MOD(extack, "pedit cmd isn't offloaded");
goto out_err;
}
@@ -1921,13 +2075,15 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv,
if (err)
goto out_err;
- err = offload_pedit_fields(masks, vals, parse_attr);
+ err = offload_pedit_fields(masks, vals, parse_attr, extack);
if (err < 0)
goto out_dealloc_parsed_actions;
for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
cmd_masks = &masks[cmd];
if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "attempt to offload an unsupported field");
netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
16, 1, cmd_masks, sizeof(zero_masks), true);
@@ -1944,19 +2100,26 @@ out_err:
return err;
}
-static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags)
+static bool csum_offload_supported(struct mlx5e_priv *priv,
+ u32 action,
+ u32 update_flags,
+ struct netlink_ext_ack *extack)
{
u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP |
TCA_CSUM_UPDATE_FLAG_UDP;
/* The HW recalcs checksums only if re-writing headers */
if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "TC csum action is only offloaded with pedit");
netdev_warn(priv->netdev,
"TC csum action is only offloaded with pedit\n");
return false;
}
if (update_flags & ~prot_flags) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload TC csum action for some header/s");
netdev_warn(priv->netdev,
"can't offload TC csum action for some header/s - flags %#x\n",
update_flags);
@@ -1967,7 +2130,8 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 upda
}
static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
- struct tcf_exts *exts)
+ struct tcf_exts *exts,
+ struct netlink_ext_ack *extack)
{
const struct tc_action *a;
bool modify_ip_header;
@@ -2005,6 +2169,8 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
if (modify_ip_header && ip_proto != IPPROTO_TCP &&
ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't offload re-write of non TCP/UDP");
pr_info("can't offload re-write of ip proto %d\n", ip_proto);
return false;
}
@@ -2016,7 +2182,8 @@ out_ok:
static bool actions_match_supported(struct mlx5e_priv *priv,
struct tcf_exts *exts,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
u32 actions;
@@ -2030,7 +2197,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv,
return false;
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
- return modify_header_match_supported(&parse_attr->spec, exts);
+ return modify_header_match_supported(&parse_attr->spec, exts,
+ extack);
return true;
}
@@ -2043,15 +2211,16 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
fmdev = priv->mdev;
pmdev = peer_priv->mdev;
- mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid);
- mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid);
+ fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
+ psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
return (fsystem_guid == psystem_guid);
}
static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
struct mlx5_nic_flow_attr *attr = flow->nic_attr;
const struct tc_action *a;
@@ -2075,7 +2244,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (is_tcf_pedit(a)) {
err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL,
- parse_attr);
+ parse_attr, extack);
if (err)
return err;
@@ -2086,7 +2255,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (is_tcf_csum(a)) {
if (csum_offload_supported(priv, action,
- tcf_csum_update_flags(a)))
+ tcf_csum_update_flags(a),
+ extack))
continue;
return -EOPNOTSUPP;
@@ -2102,6 +2272,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
} else {
+ NL_SET_ERR_MSG_MOD(extack,
+ "device is not on same HW, can't offload");
netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n",
peer_dev->name);
return -EINVAL;
@@ -2113,8 +2285,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
u32 mark = tcf_skbedit_mark(a);
if (mark & ~MLX5E_TC_FLOW_ID_MASK) {
- netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n",
- mark);
+ NL_SET_ERR_MSG_MOD(extack,
+ "Bad flow mark - only 16 bit is supported");
return -EINVAL;
}
@@ -2127,7 +2299,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
}
attr->action = action;
- if (!actions_match_supported(priv, exts, parse_attr, flow))
+ if (!actions_match_supported(priv, exts, parse_attr, flow, extack))
return -EOPNOTSUPP;
return 0;
@@ -2331,7 +2503,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
return -ENOMEM;
switch (e->tunnel_type) {
- case MLX5_HEADER_TYPE_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
fl4.flowi4_proto = IPPROTO_UDP;
fl4.fl4_dport = tun_key->tp_dst;
break;
@@ -2375,7 +2547,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
read_unlock_bh(&n->lock);
switch (e->tunnel_type) {
- case MLX5_HEADER_TYPE_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
gen_vxlan_header_ipv4(out_dev, encap_header,
ipv4_encap_size, e->h_dest, tos, ttl,
fl4.daddr,
@@ -2395,8 +2567,10 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
goto out;
}
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
- ipv4_encap_size, encap_header, &e->encap_id);
+ err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+ ipv4_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
if (err)
goto destroy_neigh_entry;
@@ -2440,7 +2614,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
return -ENOMEM;
switch (e->tunnel_type) {
- case MLX5_HEADER_TYPE_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
fl6.flowi6_proto = IPPROTO_UDP;
fl6.fl6_dport = tun_key->tp_dst;
break;
@@ -2484,7 +2658,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
read_unlock_bh(&n->lock);
switch (e->tunnel_type) {
- case MLX5_HEADER_TYPE_VXLAN:
+ case MLX5_REFORMAT_TYPE_L2_TO_VXLAN:
gen_vxlan_header_ipv6(out_dev, encap_header,
ipv6_encap_size, e->h_dest, tos, ttl,
&fl6.daddr,
@@ -2505,8 +2679,10 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv,
goto out;
}
- err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
- ipv6_encap_size, encap_header, &e->encap_id);
+ err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type,
+ ipv6_encap_size, encap_header,
+ MLX5_FLOW_NAMESPACE_FDB,
+ &e->encap_id);
if (err)
goto destroy_neigh_entry;
@@ -2529,7 +2705,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct ip_tunnel_info *tun_info,
struct net_device *mirred_dev,
struct net_device **encap_dev,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
unsigned short family = ip_tunnel_info_af(tun_info);
@@ -2547,6 +2724,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
/* setting udp src port isn't supported */
if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) {
vxlan_encap_offload_err:
+ NL_SET_ERR_MSG_MOD(extack,
+ "must set udp dst port and not set udp src port");
netdev_warn(priv->netdev,
"must set udp dst port and not set udp src port\n");
return -EOPNOTSUPP;
@@ -2554,8 +2733,10 @@ vxlan_encap_offload_err:
if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) &&
MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
- tunnel_type = MLX5_HEADER_TYPE_VXLAN;
+ tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN;
} else {
+ NL_SET_ERR_MSG_MOD(extack,
+ "port isn't an offloaded vxlan udp dport");
netdev_warn(priv->netdev,
"%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst));
return -EOPNOTSUPP;
@@ -2660,8 +2841,10 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv,
static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
struct mlx5e_tc_flow_parse_attr *parse_attr,
- struct mlx5e_tc_flow *flow)
+ struct mlx5e_tc_flow *flow,
+ struct netlink_ext_ack *extack)
{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_esw_flow_attr *attr = flow->esw_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct ip_tunnel_info *info = NULL;
@@ -2686,7 +2869,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (is_tcf_pedit(a)) {
err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB,
- parse_attr);
+ parse_attr, extack);
if (err)
return err;
@@ -2697,7 +2880,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
if (is_tcf_csum(a)) {
if (csum_offload_supported(priv, action,
- tcf_csum_update_flags(a)))
+ tcf_csum_update_flags(a),
+ extack))
continue;
return -EOPNOTSUPP;
@@ -2710,6 +2894,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
out_dev = tcf_mirred_dev(a);
if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't support more output ports, can't offload forwarding");
pr_err("can't support more than %d output ports, can't offload forwarding\n",
attr->out_count);
return -EOPNOTSUPP;
@@ -2728,11 +2914,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
parse_attr->mirred_ifindex = out_dev->ifindex;
parse_attr->tun_info = *info;
attr->parse_attr = parse_attr;
- action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+ action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
MLX5_FLOW_CONTEXT_ACTION_COUNT;
/* attr->out_rep is resolved when we handle encap */
} else {
+ NL_SET_ERR_MSG_MOD(extack,
+ "devices are not on same switch HW, can't offload forwarding");
pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
priv->netdev->name, out_dev->name);
return -EINVAL;
@@ -2765,14 +2953,35 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
continue;
}
+ if (is_tcf_gact_goto_chain(a)) {
+ u32 dest_chain = tcf_gact_goto_chain_index(a);
+ u32 max_chain = mlx5_eswitch_get_chain_range(esw);
+
+ if (dest_chain <= attr->chain) {
+ NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported");
+ return -EOPNOTSUPP;
+ }
+ if (dest_chain > max_chain) {
+ NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range");
+ return -EOPNOTSUPP;
+ }
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ attr->dest_chain = dest_chain;
+
+ continue;
+ }
+
return -EINVAL;
}
attr->action = action;
- if (!actions_match_supported(priv, exts, parse_attr, flow))
+ if (!actions_match_supported(priv, exts, parse_attr, flow, extack))
return -EOPNOTSUPP;
if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "current firmware doesn't support split rule for port mirroring");
netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n");
return -EOPNOTSUPP;
}
@@ -2780,9 +2989,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
return 0;
}
-static void get_flags(int flags, u8 *flow_flags)
+static void get_flags(int flags, u16 *flow_flags)
{
- u8 __flow_flags = 0;
+ u16 __flow_flags = 0;
if (flags & MLX5E_TC_INGRESS)
__flow_flags |= MLX5E_TC_FLOW_INGRESS;
@@ -2811,31 +3020,15 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv)
return &priv->fs.tc.ht;
}
-int mlx5e_configure_flower(struct mlx5e_priv *priv,
- struct tc_cls_flower_offload *f, int flags)
+static int
+mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
+ struct tc_cls_flower_offload *f, u16 flow_flags,
+ struct mlx5e_tc_flow_parse_attr **__parse_attr,
+ struct mlx5e_tc_flow **__flow)
{
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
- struct rhashtable *tc_ht = get_tc_ht(priv);
struct mlx5e_tc_flow *flow;
- int attr_size, err = 0;
- u8 flow_flags = 0;
-
- get_flags(flags, &flow_flags);
-
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
- if (flow) {
- netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie);
- return 0;
- }
-
- if (esw && esw->mode == SRIOV_OFFLOADS) {
- flow_flags |= MLX5E_TC_FLOW_ESWITCH;
- attr_size = sizeof(struct mlx5_esw_flow_attr);
- } else {
- flow_flags |= MLX5E_TC_FLOW_NIC;
- attr_size = sizeof(struct mlx5_nic_flow_attr);
- }
+ int err;
flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL);
parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
@@ -2849,45 +3042,161 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
flow->priv = priv;
err = parse_cls_flower(priv, flow, &parse_attr->spec, f);
- if (err < 0)
+ if (err)
goto err_free;
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
- err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
- if (err < 0)
- goto err_free;
- flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
- } else {
- err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
- if (err < 0)
- goto err_free;
- flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow);
- }
+ *__flow = flow;
+ *__parse_attr = parse_attr;
- if (IS_ERR(flow->rule[0])) {
- err = PTR_ERR(flow->rule[0]);
- if (err != -EAGAIN)
- goto err_free;
- }
+ return 0;
- if (err != -EAGAIN)
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+err_free:
+ kfree(flow);
+ kvfree(parse_attr);
+ return err;
+}
+
+static int
+mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_tc_flow *flow;
+ int attr_size, err;
- if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) ||
- !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP))
+ flow_flags |= MLX5E_TC_FLOW_ESWITCH;
+ attr_size = sizeof(struct mlx5_esw_flow_attr);
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+ &parse_attr, &flow);
+ if (err)
+ goto out;
+
+ flow->esw_attr->chain = f->common.chain_index;
+ flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16;
+ err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack);
+ if (err)
+ goto err_free;
+
+ err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack);
+ if (err)
+ goto err_free;
+
+ if (!(flow->esw_attr->action &
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT))
kvfree(parse_attr);
- err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
- if (err) {
- mlx5e_tc_del_flow(priv, flow);
- kfree(flow);
- }
+ *__flow = flow;
+
+ return 0;
+err_free:
+ kfree(flow);
+ kvfree(parse_attr);
+out:
return err;
+}
+
+static int
+mlx5e_add_nic_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ u16 flow_flags,
+ struct mlx5e_tc_flow **__flow)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct mlx5e_tc_flow_parse_attr *parse_attr;
+ struct mlx5e_tc_flow *flow;
+ int attr_size, err;
+
+ /* multi-chain not supported for NIC rules */
+ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
+ return -EOPNOTSUPP;
+
+ flow_flags |= MLX5E_TC_FLOW_NIC;
+ attr_size = sizeof(struct mlx5_nic_flow_attr);
+ err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
+ &parse_attr, &flow);
+ if (err)
+ goto out;
+
+ err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack);
+ if (err)
+ goto err_free;
+
+ err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack);
+ if (err)
+ goto err_free;
+
+ flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+ kvfree(parse_attr);
+ *__flow = flow;
+
+ return 0;
err_free:
+ kfree(flow);
kvfree(parse_attr);
+out:
+ return err;
+}
+
+static int
+mlx5e_tc_add_flow(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f,
+ int flags,
+ struct mlx5e_tc_flow **flow)
+{
+ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ u16 flow_flags;
+ int err;
+
+ get_flags(flags, &flow_flags);
+
+ if (!tc_can_offload_extack(priv->netdev, f->common.extack))
+ return -EOPNOTSUPP;
+
+ if (esw && esw->mode == SRIOV_OFFLOADS)
+ err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow);
+ else
+ err = mlx5e_add_nic_flow(priv, f, flow_flags, flow);
+
+ return err;
+}
+
+int mlx5e_configure_flower(struct mlx5e_priv *priv,
+ struct tc_cls_flower_offload *f, int flags)
+{
+ struct netlink_ext_ack *extack = f->common.extack;
+ struct rhashtable *tc_ht = get_tc_ht(priv);
+ struct mlx5e_tc_flow *flow;
+ int err = 0;
+
+ flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params);
+ if (flow) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "flow cookie already exists, ignoring");
+ netdev_warn_once(priv->netdev,
+ "flow cookie %lx already exists, ignoring\n",
+ f->cookie);
+ goto out;
+ }
+
+ err = mlx5e_tc_add_flow(priv, f, flags, &flow);
+ if (err)
+ goto out;
+
+ err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params);
+ if (err)
+ goto err_free;
+
+ return 0;
+
+err_free:
+ mlx5e_tc_del_flow(priv, flow);
kfree(flow);
+out:
return err;
}
@@ -2938,7 +3247,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv,
if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED))
return 0;
- counter = mlx5_flow_rule_counter(flow->rule[0]);
+ counter = mlx5e_tc_get_counter(flow);
if (!counter)
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ea7dedc2d5ad..d004957328f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -263,7 +263,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw)
esw_debug(dev, "Create FDB log_max_size(%d)\n",
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
- root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+ root_ns = mlx5_get_fdb_sub_ns(dev, 0);
if (!root_ns) {
esw_warn(dev, "Failed to get FDB flow namespace\n");
return -EOPNOTSUPP;
@@ -1198,7 +1198,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
if (counter) {
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- drop_ctr_dst.counter = counter;
+ drop_ctr_dst.counter_id = mlx5_fc_id(counter);
dst = &drop_ctr_dst;
dest_num++;
}
@@ -1285,7 +1285,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
if (counter) {
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- drop_ctr_dst.counter = counter;
+ drop_ctr_dst.counter_id = mlx5_fc_id(counter);
dst = &drop_ctr_dst;
dest_num++;
}
@@ -1746,7 +1746,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
esw->enabled_vports = 0;
esw->mode = SRIOV_NONE;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
- if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
+ if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
else
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index c17bfcab517c..aaafc9f17115 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -59,6 +59,10 @@
#define mlx5_esw_has_fwd_fdb(dev) \
MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table)
+#define FDB_MAX_CHAIN 3
+#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
+#define FDB_MAX_PRIO 16
+
struct vport_ingress {
struct mlx5_flow_table *acl;
struct mlx5_flow_group *allow_untagged_spoofchk_grp;
@@ -120,6 +124,13 @@ struct mlx5_vport {
u16 enabled_events;
};
+enum offloads_fdb_flags {
+ ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0),
+};
+
+extern const unsigned int ESW_POOLS[4];
+
+#define PRIO_LEVELS 2
struct mlx5_eswitch_fdb {
union {
struct legacy_fdb {
@@ -130,16 +141,24 @@ struct mlx5_eswitch_fdb {
} legacy;
struct offloads_fdb {
- struct mlx5_flow_table *fast_fdb;
- struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_table *slow_fdb;
struct mlx5_flow_group *send_to_vport_grp;
struct mlx5_flow_group *miss_grp;
struct mlx5_flow_handle *miss_rule_uni;
struct mlx5_flow_handle *miss_rule_multi;
int vlan_push_pop_refcount;
+
+ struct {
+ struct mlx5_flow_table *fdb;
+ u32 num_rules;
+ } fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS];
+ /* Protects fdb_prio table */
+ struct mutex fdb_prio_lock;
+
+ int fdb_left[ARRAY_SIZE(ESW_POOLS)];
} offloads;
};
+ u32 flags;
};
struct mlx5_esw_offload {
@@ -181,6 +200,7 @@ struct mlx5_eswitch {
struct mlx5_esw_offload offloads;
int mode;
+ int nvports;
};
void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
@@ -228,9 +248,23 @@ void
mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
struct mlx5_esw_flow_attr *attr);
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr);
+
+bool
+mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw);
+
+u16
+mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw);
+
+u32
+mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw);
struct mlx5_flow_handle *
-mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport,
+ struct mlx5_flow_destination *dest);
enum {
SET_VLAN_STRIP = BIT(0),
@@ -265,15 +299,22 @@ struct mlx5_esw_flow_attr {
u32 encap_id;
u32 mod_hdr_id;
u8 match_level;
+ struct mlx5_fc *counter;
+ u32 chain;
+ u16 prio;
+ u32 dest_chain;
struct mlx5e_tc_flow_parse_attr *parse_attr;
};
-int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
-int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode);
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
+ struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap);
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+ struct netlink_ext_ack *extack);
int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap);
void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
@@ -314,6 +355,11 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {}
static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {}
static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; }
static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {}
+
+#define FDB_MAX_CHAIN 1
+#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1)
+#define FDB_MAX_PRIO 1
+
#endif /* CONFIG_MLX5_ESWITCH */
#endif /* __MLX5_ESWITCH_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 3028e8d90920..9eac137790f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -37,33 +37,59 @@
#include <linux/mlx5/fs.h>
#include "mlx5_core.h"
#include "eswitch.h"
+#include "en.h"
+#include "fs_core.h"
enum {
FDB_FAST_PATH = 0,
FDB_SLOW_PATH
};
+#define fdb_prio_table(esw, chain, prio, level) \
+ (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)]
+
+static struct mlx5_flow_table *
+esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
+static void
+esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level);
+
+bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw)
+{
+ return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED));
+}
+
+u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw)
+{
+ if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
+ return FDB_MAX_CHAIN;
+
+ return 0;
+}
+
+u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw)
+{
+ if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)
+ return FDB_MAX_PRIO;
+
+ return 1;
+}
+
struct mlx5_flow_handle *
mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_spec *spec,
struct mlx5_esw_flow_attr *attr)
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
- struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_table *ft = NULL;
- struct mlx5_fc *counter = NULL;
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ bool mirror = !!(attr->mirror_count);
struct mlx5_flow_handle *rule;
+ struct mlx5_flow_table *fdb;
int j, i = 0;
void *misc;
if (esw->mode != SRIOV_OFFLOADS)
return ERR_PTR(-EOPNOTSUPP);
- if (attr->mirror_count)
- ft = esw->fdb_table.offloads.fwd_fdb;
- else
- ft = esw->fdb_table.offloads.fast_fdb;
-
flow_act.action = attr->action;
/* if per flow vlan pop/push is emulated, don't set that into the firmware */
if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1))
@@ -81,23 +107,33 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
- for (j = attr->mirror_count; j < attr->out_count; j++) {
- dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
- dest[i].vport.num = attr->out_rep[j]->vport;
- dest[i].vport.vhca_id =
- MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
- dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ if (attr->dest_chain) {
+ struct mlx5_flow_table *ft;
+
+ ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0);
+ if (IS_ERR(ft)) {
+ rule = ERR_CAST(ft);
+ goto err_create_goto_table;
+ }
+
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ dest[i].ft = ft;
i++;
+ } else {
+ for (j = attr->mirror_count; j < attr->out_count; j++) {
+ dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ dest[i].vport.num = attr->out_rep[j]->vport;
+ dest[i].vport.vhca_id =
+ MLX5_CAP_GEN(attr->out_mdev[j], vhca_id);
+ dest[i].vport.vhca_id_valid =
+ !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
+ i++;
+ }
}
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
- counter = mlx5_fc_create(esw->dev, true);
- if (IS_ERR(counter)) {
- rule = ERR_CAST(counter);
- goto err_counter_alloc;
- }
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
- dest[i].counter = counter;
+ dest[i].counter_id = mlx5_fc_id(attr->counter);
i++;
}
@@ -127,10 +163,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
flow_act.modify_id = attr->mod_hdr_id;
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
- flow_act.encap_id = attr->encap_id;
+ if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
+ flow_act.reformat_id = attr->encap_id;
- rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i);
+ fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ if (IS_ERR(fdb)) {
+ rule = ERR_CAST(fdb);
+ goto err_esw_get;
+ }
+
+ rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i);
if (IS_ERR(rule))
goto err_add_rule;
else
@@ -139,8 +181,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
return rule;
err_add_rule:
- mlx5_fc_destroy(esw->dev, counter);
-err_counter_alloc:
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+err_esw_get:
+ if (attr->dest_chain)
+ esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+err_create_goto_table:
return rule;
}
@@ -150,11 +195,25 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
struct mlx5_esw_flow_attr *attr)
{
struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {};
- struct mlx5_flow_act flow_act = {0};
+ struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, };
+ struct mlx5_flow_table *fast_fdb;
+ struct mlx5_flow_table *fwd_fdb;
struct mlx5_flow_handle *rule;
void *misc;
int i;
+ fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0);
+ if (IS_ERR(fast_fdb)) {
+ rule = ERR_CAST(fast_fdb);
+ goto err_get_fast;
+ }
+
+ fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1);
+ if (IS_ERR(fwd_fdb)) {
+ rule = ERR_CAST(fwd_fdb);
+ goto err_get_fwd;
+ }
+
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
for (i = 0; i < attr->mirror_count; i++) {
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
@@ -164,7 +223,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch);
}
dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
- dest[i].ft = esw->fdb_table.offloads.fwd_fdb,
+ dest[i].ft = fwd_fdb,
i++;
misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
@@ -187,25 +246,57 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
MLX5_MATCH_MISC_PARAMETERS;
- rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i);
+ rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i);
- if (!IS_ERR(rule))
- esw->offloads.num_flows++;
+ if (IS_ERR(rule))
+ goto add_err;
+ esw->offloads.num_flows++;
+
+ return rule;
+add_err:
+ esw_put_prio_table(esw, attr->chain, attr->prio, 1);
+err_get_fwd:
+ esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+err_get_fast:
return rule;
}
+static void
+__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr,
+ bool fwd_rule)
+{
+ bool mirror = (attr->mirror_count > 0);
+
+ mlx5_del_flow_rules(rule);
+ esw->offloads.num_flows--;
+
+ if (fwd_rule) {
+ esw_put_prio_table(esw, attr->chain, attr->prio, 1);
+ esw_put_prio_table(esw, attr->chain, attr->prio, 0);
+ } else {
+ esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror);
+ if (attr->dest_chain)
+ esw_put_prio_table(esw, attr->dest_chain, 1, 0);
+ }
+}
+
void
mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw,
struct mlx5_flow_handle *rule,
struct mlx5_esw_flow_attr *attr)
{
- struct mlx5_fc *counter = NULL;
+ __mlx5_eswitch_del_rule(esw, rule, attr, false);
+}
- counter = mlx5_flow_rule_counter(rule);
- mlx5_del_flow_rules(rule);
- mlx5_fc_destroy(esw->dev, counter);
- esw->offloads.num_flows--;
+void
+mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw,
+ struct mlx5_flow_handle *rule,
+ struct mlx5_esw_flow_attr *attr)
+{
+ __mlx5_eswitch_del_rule(esw, rule, attr, true);
}
static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val)
@@ -294,7 +385,8 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH);
pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
- fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+ fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
+ !attr->dest_chain);
err = esw_add_vlan_action_check(attr, push, pop, fwd);
if (err)
@@ -501,74 +593,170 @@ out:
#define ESW_OFFLOADS_NUM_GROUPS 4
-static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS),
+ * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated
+ * for each flow table pool. We can allocate up to 16M of each pool,
+ * and we keep track of how much we used via put/get_sz_to_pool.
+ * Firmware doesn't report any of this for now.
+ * ESW_POOL is expected to be sorted from large to small
+ */
+#define ESW_SIZE (16 * 1024 * 1024)
+const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024,
+ 64 * 1024, 4 * 1024 };
+
+static int
+get_sz_from_pool(struct mlx5_eswitch *esw)
+{
+ int sz = 0, i;
+
+ for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
+ if (esw->fdb_table.offloads.fdb_left[i]) {
+ --esw->fdb_table.offloads.fdb_left[i];
+ sz = ESW_POOLS[i];
+ break;
+ }
+ }
+
+ return sz;
+}
+
+static void
+put_sz_to_pool(struct mlx5_eswitch *esw, int sz)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) {
+ if (sz >= ESW_POOLS[i]) {
+ ++esw->fdb_table.offloads.fdb_left[i];
+ break;
+ }
+ }
+}
+
+static struct mlx5_flow_table *
+create_next_size_table(struct mlx5_eswitch *esw,
+ struct mlx5_flow_namespace *ns,
+ u16 table_prio,
+ int level,
+ u32 flags)
+{
+ struct mlx5_flow_table *fdb;
+ int sz;
+
+ sz = get_sz_from_pool(esw);
+ if (!sz)
+ return ERR_PTR(-ENOSPC);
+
+ fdb = mlx5_create_auto_grouped_flow_table(ns,
+ table_prio,
+ sz,
+ ESW_OFFLOADS_NUM_GROUPS,
+ level,
+ flags);
+ if (IS_ERR(fdb)) {
+ esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n",
+ (int)PTR_ERR(fdb), table_prio, level, sz);
+ put_sz_to_pool(esw, sz);
+ }
+
+ return fdb;
+}
+
+static struct mlx5_flow_table *
+esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
{
struct mlx5_core_dev *dev = esw->dev;
- struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *fdb = NULL;
- int esw_size, err = 0;
+ struct mlx5_flow_namespace *ns;
+ int table_prio, l = 0;
u32 flags = 0;
- u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
- MLX5_CAP_GEN(dev, max_flow_counter_15_0);
- root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
- if (!root_ns) {
- esw_warn(dev, "Failed to get FDB flow namespace\n");
- err = -EOPNOTSUPP;
- goto out_namespace;
- }
+ if (chain == FDB_SLOW_PATH_CHAIN)
+ return esw->fdb_table.offloads.slow_fdb;
- esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n",
- MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
- max_flow_counter, ESW_OFFLOADS_NUM_GROUPS);
+ mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
- esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS,
- 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
+ fdb = fdb_prio_table(esw, chain, prio, level).fdb;
+ if (fdb) {
+ /* take ref on earlier levels as well */
+ while (level >= 0)
+ fdb_prio_table(esw, chain, prio, level--).num_rules++;
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+ return fdb;
+ }
- if (mlx5_esw_has_fwd_fdb(dev))
- esw_size >>= 1;
+ ns = mlx5_get_fdb_sub_ns(dev, chain);
+ if (!ns) {
+ esw_warn(dev, "Failed to get FDB sub namespace\n");
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
- flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
+ flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
- fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
- esw_size,
- ESW_OFFLOADS_NUM_GROUPS, 0,
- flags);
- if (IS_ERR(fdb)) {
- err = PTR_ERR(fdb);
- esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
- goto out_namespace;
- }
- esw->fdb_table.offloads.fast_fdb = fdb;
+ table_prio = (chain * FDB_MAX_PRIO) + prio - 1;
+
+ /* create earlier levels for correct fs_core lookup when
+ * connecting tables
+ */
+ for (l = 0; l <= level; l++) {
+ if (fdb_prio_table(esw, chain, prio, l).fdb) {
+ fdb_prio_table(esw, chain, prio, l).num_rules++;
+ continue;
+ }
- if (!mlx5_esw_has_fwd_fdb(dev))
- goto out_namespace;
+ fdb = create_next_size_table(esw, ns, table_prio, l, flags);
+ if (IS_ERR(fdb)) {
+ l--;
+ goto err_create_fdb;
+ }
- fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
- esw_size,
- ESW_OFFLOADS_NUM_GROUPS, 1,
- flags);
- if (IS_ERR(fdb)) {
- err = PTR_ERR(fdb);
- esw_warn(dev, "Failed to create fwd table err %d\n", err);
- goto out_ft;
+ fdb_prio_table(esw, chain, prio, l).fdb = fdb;
+ fdb_prio_table(esw, chain, prio, l).num_rules = 1;
}
- esw->fdb_table.offloads.fwd_fdb = fdb;
- return err;
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+ return fdb;
-out_ft:
- mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
-out_namespace:
- return err;
+err_create_fdb:
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
+ if (l >= 0)
+ esw_put_prio_table(esw, chain, prio, l);
+
+ return fdb;
+}
+
+static void
+esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level)
+{
+ int l;
+
+ if (chain == FDB_SLOW_PATH_CHAIN)
+ return;
+
+ mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock);
+
+ for (l = level; l >= 0; l--) {
+ if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0)
+ continue;
+
+ put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte);
+ mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb);
+ fdb_prio_table(esw, chain, prio, l).fdb = NULL;
+ }
+
+ mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock);
}
-static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw)
{
- if (mlx5_esw_has_fwd_fdb(esw->dev))
- mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb);
- mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb);
+ /* If lazy creation isn't supported, deref the fast path tables */
+ if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) {
+ esw_put_prio_table(esw, 0, 1, 1);
+ esw_put_prio_table(esw, 0, 1, 0);
+ }
}
#define MAX_PF_SQ 256
@@ -579,12 +767,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_core_dev *dev = esw->dev;
+ u32 *flow_group_in, max_flow_counter;
struct mlx5_flow_namespace *root_ns;
struct mlx5_flow_table *fdb = NULL;
- int table_size, ix, err = 0;
+ int table_size, ix, err = 0, i;
struct mlx5_flow_group *g;
+ u32 flags = 0, fdb_max;
void *match_criteria;
- u32 *flow_group_in;
u8 *dmac;
esw_debug(esw->dev, "Create offloads FDB Tables\n");
@@ -599,12 +788,29 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
goto ns_err;
}
- err = esw_create_offloads_fast_fdb_table(esw);
- if (err)
- goto fast_fdb_err;
+ max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
+ MLX5_CAP_GEN(dev, max_flow_counter_15_0);
+ fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size);
+
+ esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n",
+ MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size),
+ max_flow_counter, ESW_OFFLOADS_NUM_GROUPS,
+ fdb_max);
+
+ for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++)
+ esw->fdb_table.offloads.fdb_left[i] =
+ ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0;
table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2;
+ /* create the slow path fdb with encap set, so further table instances
+ * can be created at run time while VFs are probed if the FW allows that.
+ */
+ if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE)
+ flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
+ MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+
+ ft_attr.flags = flags;
ft_attr.max_fte = table_size;
ft_attr.prio = FDB_SLOW_PATH;
@@ -616,6 +822,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
}
esw->fdb_table.offloads.slow_fdb = fdb;
+ /* If lazy creation isn't supported, open the fast path tables now */
+ if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) &&
+ esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) {
+ esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+ esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n");
+ esw_get_prio_table(esw, 0, 1, 0);
+ esw_get_prio_table(esw, 0, 1, 1);
+ } else {
+ esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n");
+ esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED;
+ }
+
/* create send-to-vport group */
memset(flow_group_in, 0, inlen);
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
@@ -663,6 +881,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports)
if (err)
goto miss_rule_err;
+ esw->nvports = nvports;
kvfree(flow_group_in);
return 0;
@@ -671,10 +890,9 @@ miss_rule_err:
miss_err:
mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
send_vport_err:
+ esw_destroy_offloads_fast_fdb_tables(esw);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
slow_fdb_err:
- esw_destroy_offloads_fast_fdb_table(esw);
-fast_fdb_err:
ns_err:
kvfree(flow_group_in);
return err;
@@ -682,7 +900,7 @@ ns_err:
static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
{
- if (!esw->fdb_table.offloads.fast_fdb)
+ if (!esw->fdb_table.offloads.slow_fdb)
return;
esw_debug(esw->dev, "Destroy offloads FDB Tables\n");
@@ -692,7 +910,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw)
mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb);
- esw_destroy_offloads_fast_fdb_table(esw);
+ esw_destroy_offloads_fast_fdb_tables(esw);
}
static int esw_create_offloads_table(struct mlx5_eswitch *esw)
@@ -775,10 +993,10 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
}
struct mlx5_flow_handle *
-mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
+mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport,
+ struct mlx5_flow_destination *dest)
{
struct mlx5_flow_act flow_act = {0};
- struct mlx5_flow_destination dest = {};
struct mlx5_flow_handle *flow_rule;
struct mlx5_flow_spec *spec;
void *misc;
@@ -796,12 +1014,10 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
- dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
- dest.tir_num = tirn;
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
- &flow_act, &dest, 1);
+ &flow_act, dest, 1);
if (IS_ERR(flow_rule)) {
esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
goto out;
@@ -812,29 +1028,35 @@ out:
return flow_rule;
}
-static int esw_offloads_start(struct mlx5_eswitch *esw)
+static int esw_offloads_start(struct mlx5_eswitch *esw,
+ struct netlink_ext_ack *extack)
{
int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
if (esw->mode != SRIOV_LEGACY) {
- esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't set offloads mode, SRIOV legacy not enabled");
return -EINVAL;
}
mlx5_eswitch_disable_sriov(esw);
err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
if (err) {
- esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err);
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed setting eswitch to offloads");
err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
- if (err1)
- esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err1);
+ if (err1) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed setting eswitch back to legacy");
+ }
}
if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
if (mlx5_eswitch_inline_mode_get(esw,
num_vfs,
&esw->offloads.inline_mode)) {
esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
- esw_warn(esw->dev, "Inline mode is different between vports\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Inline mode is different between vports");
}
}
return err;
@@ -945,6 +1167,8 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports)
{
int err;
+ mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
+
err = esw_create_offloads_fdb_tables(esw, nvports);
if (err)
return err;
@@ -975,17 +1199,20 @@ create_ft_err:
return err;
}
-static int esw_offloads_stop(struct mlx5_eswitch *esw)
+static int esw_offloads_stop(struct mlx5_eswitch *esw,
+ struct netlink_ext_ack *extack)
{
int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs;
mlx5_eswitch_disable_sriov(esw);
err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY);
if (err) {
- esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err);
+ NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy");
err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS);
- if (err1)
- esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err);
+ if (err1) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed setting eswitch back to offloads");
+ }
}
/* enable back PF RoCE */
@@ -1094,7 +1321,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink)
return 0;
}
-int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
+int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
u16 cur_mlx5_mode, mlx5_mode = 0;
@@ -1113,9 +1341,9 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
return 0;
if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV)
- return esw_offloads_start(dev->priv.eswitch);
+ return esw_offloads_start(dev->priv.eswitch, extack);
else if (mode == DEVLINK_ESWITCH_MODE_LEGACY)
- return esw_offloads_stop(dev->priv.eswitch);
+ return esw_offloads_stop(dev->priv.eswitch, extack);
else
return -EINVAL;
}
@@ -1132,7 +1360,8 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
}
-int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode,
+ struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -1149,14 +1378,15 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
return 0;
/* fall through */
case MLX5_CAP_INLINE_MODE_L2:
- esw_warn(dev, "Inline mode can't be set\n");
+ NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set");
return -EOPNOTSUPP;
case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
break;
}
if (esw->offloads.num_flows > 0) {
- esw_warn(dev, "Can't set inline mode when flows are configured\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't set inline mode when flows are configured");
return -EOPNOTSUPP;
}
@@ -1167,8 +1397,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
for (vport = 1; vport < esw->enabled_vports; vport++) {
err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
if (err) {
- esw_warn(dev, "Failed to set min inline on vport %d\n",
- vport);
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed to set min inline on vport");
goto revert_inline_mode;
}
}
@@ -1234,7 +1464,8 @@ out:
return 0;
}
-int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
+int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap,
+ struct netlink_ext_ack *extack)
{
struct mlx5_core_dev *dev = devlink_priv(devlink);
struct mlx5_eswitch *esw = dev->priv.eswitch;
@@ -1245,7 +1476,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
return err;
if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE &&
- (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) ||
+ (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) ||
!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)))
return -EOPNOTSUPP;
@@ -1261,19 +1492,24 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap)
return 0;
if (esw->offloads.num_flows > 0) {
- esw_warn(dev, "Can't set encapsulation when flows are configured\n");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Can't set encapsulation when flows are configured");
return -EOPNOTSUPP;
}
- esw_destroy_offloads_fast_fdb_table(esw);
+ esw_destroy_offloads_fdb_tables(esw);
esw->offloads.encap = encap;
- err = esw_create_offloads_fast_fdb_table(esw);
+
+ err = esw_create_offloads_fdb_tables(esw, esw->nvports);
+
if (err) {
- esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err);
+ NL_SET_ERR_MSG_MOD(extack,
+ "Failed re-creating fast FDB table");
esw->offloads.encap = !encap;
- (void)esw_create_offloads_fast_fdb_table(esw);
+ (void)esw_create_offloads_fdb_tables(esw, esw->nvports);
}
+
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index b8ee9101c506..515e3d6de051 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -649,7 +649,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev,
(match_criteria_enable &
~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) ||
(flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) ||
- flow_act->has_flow_tag)
+ (flow_act->flags & FLOW_ACT_HAS_TAG))
return false;
return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 8e01f818021b..08a891f9aade 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -152,7 +152,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
struct mlx5_flow_table *next_ft,
unsigned int *table_id, u32 flags)
{
- int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
+ int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
+ int en_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0};
int err;
@@ -169,9 +170,9 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
}
MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
- en_encap_decap);
- MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en,
- en_encap_decap);
+ en_decap);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
+ en_encap);
switch (op_mod) {
case FS_FT_OP_MOD_NORMAL:
@@ -343,7 +344,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag);
MLX5_SET(flow_context, in_flow_context, action, fte->action.action);
- MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id);
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+ fte->action.reformat_id);
MLX5_SET(flow_context, in_flow_context, modify_header_id,
fte->action.modify_id);
@@ -417,7 +419,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
continue;
MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
- dst->dest_attr.counter->id);
+ dst->dest_attr.counter_id);
in_dests += MLX5_ST_SZ_BYTES(dest_format_struct);
list_size++;
}
@@ -594,62 +596,78 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
*bytes = MLX5_GET64(traffic_counter, stats, octets);
}
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
- int header_type,
- size_t size,
- void *encap_header,
- u32 *encap_id)
+int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
+ int reformat_type,
+ size_t size,
+ void *reformat_data,
+ enum mlx5_flow_namespace_type namespace,
+ u32 *packet_reformat_id)
{
- int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
- u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
- void *encap_header_in;
- void *header;
+ u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)];
+ void *packet_reformat_context_in;
+ int max_encap_size;
+ void *reformat;
int inlen;
int err;
u32 *in;
+ if (namespace == MLX5_FLOW_NAMESPACE_FDB)
+ max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
+ else
+ max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size);
+
if (size > max_encap_size) {
mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n",
size, max_encap_size);
return -EINVAL;
}
- in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size,
+ in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size,
GFP_KERNEL);
if (!in)
return -ENOMEM;
- encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header);
- header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header);
- inlen = header - (void *)in + size;
+ packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in,
+ in, packet_reformat_context);
+ reformat = MLX5_ADDR_OF(packet_reformat_context_in,
+ packet_reformat_context_in,
+ reformat_data);
+ inlen = reformat - (void *)in + size;
memset(in, 0, inlen);
- MLX5_SET(alloc_encap_header_in, in, opcode,
- MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
- MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
- MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
- memcpy(header, encap_header, size);
+ MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_data_size, size);
+ MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
+ reformat_type, reformat_type);
+ memcpy(reformat, reformat_data, size);
memset(out, 0, sizeof(out));
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
- *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+ *packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out,
+ out, packet_reformat_id);
kfree(in);
return err;
}
+EXPORT_SYMBOL(mlx5_packet_reformat_alloc);
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
+void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
+ u32 packet_reformat_id)
{
- u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
- u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+ u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)];
+ u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)];
memset(in, 0, sizeof(in));
- MLX5_SET(dealloc_encap_header_in, in, opcode,
- MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
- MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+ MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
+ MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
+ MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
+ packet_reformat_id);
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_packet_reformat_dealloc);
int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
u8 namespace, u8 num_actions,
@@ -667,9 +685,14 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
table_type = FS_FT_FDB;
break;
case MLX5_FLOW_NAMESPACE_KERNEL:
+ case MLX5_FLOW_NAMESPACE_BYPASS:
max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions);
table_type = FS_FT_NIC_RX;
break;
+ case MLX5_FLOW_NAMESPACE_EGRESS:
+ max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions);
+ table_type = FS_FT_NIC_TX;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -702,6 +725,7 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
kfree(in);
return err;
}
+EXPORT_SYMBOL(mlx5_modify_header_alloc);
void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
{
@@ -716,6 +740,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id)
mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
+EXPORT_SYMBOL(mlx5_modify_header_dealloc);
static const struct mlx5_flow_cmds mlx5_flow_cmds = {
.create_flow_table = mlx5_cmd_create_flow_table,
@@ -760,8 +785,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
case FS_FT_FDB:
case FS_FT_SNIFFER_RX:
case FS_FT_SNIFFER_TX:
- return mlx5_fs_cmd_get_fw_cmds();
case FS_FT_NIC_TX:
+ return mlx5_fs_cmd_get_fw_cmds();
default:
return mlx5_fs_cmd_get_stub_cmds();
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 37d114c668b7..9d73eb955f75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -40,6 +40,7 @@
#include "diag/fs_tracepoint.h"
#include "accel/ipsec.h"
#include "fpga/ipsec.h"
+#include "eswitch.h"
#define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\
sizeof(struct init_tree_node))
@@ -76,6 +77,14 @@
FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \
FS_CAP(flow_table_properties_nic_receive.flow_table_modify))
+#define FS_CHAINING_CAPS_EGRESS \
+ FS_REQUIRED_CAPS( \
+ FS_CAP(flow_table_properties_nic_transmit.flow_modify_en), \
+ FS_CAP(flow_table_properties_nic_transmit.modify_root), \
+ FS_CAP(flow_table_properties_nic_transmit \
+ .identified_miss_table_mode), \
+ FS_CAP(flow_table_properties_nic_transmit.flow_table_modify))
+
#define LEFTOVERS_NUM_LEVELS 1
#define LEFTOVERS_NUM_PRIOS 1
@@ -151,6 +160,17 @@ static struct init_tree_node {
}
};
+static struct init_tree_node egress_root_fs = {
+ .type = FS_TYPE_NAMESPACE,
+ .ar_size = 1,
+ .children = (struct init_tree_node[]) {
+ ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0,
+ FS_CHAINING_CAPS_EGRESS,
+ ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS,
+ BY_PASS_PRIO_NUM_LEVELS))),
+ }
+};
+
enum fs_i_lock_class {
FS_LOCK_GRANDPARENT,
FS_LOCK_PARENT,
@@ -694,7 +714,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
struct fs_node *iter = list_entry(start, struct fs_node, list);
struct mlx5_flow_table *ft = NULL;
- if (!root)
+ if (!root || root->type == FS_TYPE_PRIO_CHAINS)
return NULL;
list_for_each_advance_continue(iter, &root->children, reverse) {
@@ -1388,7 +1408,7 @@ static bool check_conflicting_actions(u32 action1, u32 action2)
return false;
if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP |
- MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+ MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT |
MLX5_FLOW_CONTEXT_ACTION_DECAP |
MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
@@ -1408,7 +1428,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act
return -EEXIST;
}
- if (flow_act->has_flow_tag &&
+ if ((flow_act->flags & FLOW_ACT_HAS_TAG) &&
fte->action.flow_tag != flow_act->flow_tag) {
mlx5_core_warn(get_dev(&fte->node),
"FTE flow tag %u already exists with different flow tag %u\n",
@@ -1455,29 +1475,8 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
return handle;
}
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
+static bool counter_is_valid(u32 action)
{
- struct mlx5_flow_rule *dst;
- struct fs_fte *fte;
-
- fs_get_obj(fte, handle->rule[0]->node.parent);
-
- fs_for_each_dst(dst, fte) {
- if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
- return dst->dest_attr.counter;
- }
-
- return NULL;
-}
-
-static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
-{
- if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT))
- return !counter;
-
- if (!counter)
- return false;
-
return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
}
@@ -1487,7 +1486,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
struct mlx5_flow_table *ft)
{
if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER))
- return counter_is_valid(dest->counter, action);
+ return counter_is_valid(action);
if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
return true;
@@ -1629,6 +1628,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft,
search_again_locked:
version = matched_fgs_get_version(match_head);
+ if (flow_act->flags & FLOW_ACT_NO_APPEND)
+ goto skip_search;
/* Try to find a fg that already contains a matching fte */
list_for_each_entry(iter, match_head, list) {
struct fs_fte *fte_tmp;
@@ -1645,6 +1646,11 @@ search_again_locked:
return rule;
}
+skip_search:
+ /* No group with matching fte found, or we skipped the search.
+ * Try to add a new fte to any matching fg.
+ */
+
/* Check the ft version, for case that new flow group
* was added while the fgs weren't locked
*/
@@ -1975,12 +1981,24 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg)
fg->id);
}
+struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev,
+ int n)
+{
+ struct mlx5_flow_steering *steering = dev->priv.steering;
+
+ if (!steering || !steering->fdb_sub_ns)
+ return NULL;
+
+ return steering->fdb_sub_ns[n];
+}
+EXPORT_SYMBOL(mlx5_get_fdb_sub_ns);
+
struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
struct mlx5_flow_root_namespace *root_ns;
- int prio;
+ int prio = 0;
struct fs_prio *fs_prio;
struct mlx5_flow_namespace *ns;
@@ -1988,40 +2006,29 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
return NULL;
switch (type) {
- case MLX5_FLOW_NAMESPACE_BYPASS:
- case MLX5_FLOW_NAMESPACE_LAG:
- case MLX5_FLOW_NAMESPACE_OFFLOADS:
- case MLX5_FLOW_NAMESPACE_ETHTOOL:
- case MLX5_FLOW_NAMESPACE_KERNEL:
- case MLX5_FLOW_NAMESPACE_LEFTOVERS:
- case MLX5_FLOW_NAMESPACE_ANCHOR:
- prio = type;
- break;
case MLX5_FLOW_NAMESPACE_FDB:
if (steering->fdb_root_ns)
return &steering->fdb_root_ns->ns;
- else
- return NULL;
+ return NULL;
case MLX5_FLOW_NAMESPACE_SNIFFER_RX:
if (steering->sniffer_rx_root_ns)
return &steering->sniffer_rx_root_ns->ns;
- else
- return NULL;
+ return NULL;
case MLX5_FLOW_NAMESPACE_SNIFFER_TX:
if (steering->sniffer_tx_root_ns)
return &steering->sniffer_tx_root_ns->ns;
- else
- return NULL;
- case MLX5_FLOW_NAMESPACE_EGRESS:
- if (steering->egress_root_ns)
- return &steering->egress_root_ns->ns;
- else
- return NULL;
- default:
return NULL;
+ default:
+ break;
+ }
+
+ if (type == MLX5_FLOW_NAMESPACE_EGRESS) {
+ root_ns = steering->egress_root_ns;
+ } else { /* Must be NIC RX */
+ root_ns = steering->root_ns;
+ prio = type;
}
- root_ns = steering->root_ns;
if (!root_ns)
return NULL;
@@ -2064,8 +2071,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
}
}
-static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
- unsigned int prio, int num_levels)
+static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio,
+ int num_levels,
+ enum fs_node_type type)
{
struct fs_prio *fs_prio;
@@ -2073,7 +2082,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
if (!fs_prio)
return ERR_PTR(-ENOMEM);
- fs_prio->node.type = FS_TYPE_PRIO;
+ fs_prio->node.type = type;
tree_init_node(&fs_prio->node, NULL, del_sw_prio);
tree_add_node(&fs_prio->node, &ns->node);
fs_prio->num_levels = num_levels;
@@ -2083,6 +2092,19 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
return fs_prio;
}
+static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns,
+ unsigned int prio,
+ int num_levels)
+{
+ return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS);
+}
+
+static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
+ unsigned int prio, int num_levels)
+{
+ return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO);
+}
+
static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace
*ns)
{
@@ -2387,6 +2409,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
cleanup_egress_acls_root_ns(dev);
cleanup_ingress_acls_root_ns(dev);
cleanup_root_ns(steering->fdb_root_ns);
+ steering->fdb_root_ns = NULL;
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
cleanup_root_ns(steering->sniffer_rx_root_ns);
cleanup_root_ns(steering->sniffer_tx_root_ns);
cleanup_root_ns(steering->egress_root_ns);
@@ -2432,27 +2457,64 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering)
static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
{
- struct fs_prio *prio;
+ struct mlx5_flow_namespace *ns;
+ struct fs_prio *maj_prio;
+ struct fs_prio *min_prio;
+ int levels;
+ int chain;
+ int prio;
+ int err;
steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB);
if (!steering->fdb_root_ns)
return -ENOMEM;
- prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2);
- if (IS_ERR(prio))
+ steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) *
+ (FDB_MAX_CHAIN + 1), GFP_KERNEL);
+ if (!steering->fdb_sub_ns)
+ return -ENOMEM;
+
+ levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1);
+ maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, 0,
+ levels);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
goto out_err;
+ }
+
+ for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) {
+ ns = fs_create_namespace(maj_prio);
+ if (IS_ERR(ns)) {
+ err = PTR_ERR(ns);
+ goto out_err;
+ }
+
+ for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) {
+ min_prio = fs_create_prio(ns, prio, 2);
+ if (IS_ERR(min_prio)) {
+ err = PTR_ERR(min_prio);
+ goto out_err;
+ }
+ }
+
+ steering->fdb_sub_ns[chain] = ns;
+ }
- prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
- if (IS_ERR(prio))
+ maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1);
+ if (IS_ERR(maj_prio)) {
+ err = PTR_ERR(maj_prio);
goto out_err;
+ }
set_prio_attrs(steering->fdb_root_ns);
return 0;
out_err:
cleanup_root_ns(steering->fdb_root_ns);
+ kfree(steering->fdb_sub_ns);
+ steering->fdb_sub_ns = NULL;
steering->fdb_root_ns = NULL;
- return PTR_ERR(prio);
+ return err;
}
static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport)
@@ -2537,16 +2599,23 @@ cleanup_root_ns:
static int init_egress_root_ns(struct mlx5_flow_steering *steering)
{
- struct fs_prio *prio;
+ int err;
steering->egress_root_ns = create_root_ns(steering,
FS_FT_NIC_TX);
if (!steering->egress_root_ns)
return -ENOMEM;
- /* create 1 prio*/
- prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1);
- return PTR_ERR_OR_ZERO(prio);
+ err = init_root_tree(steering, &egress_root_fs,
+ &steering->egress_root_ns->ns.node);
+ if (err)
+ goto cleanup;
+ set_prio_attrs(steering->egress_root_ns);
+ return 0;
+cleanup:
+ cleanup_root_ns(steering->egress_root_ns);
+ steering->egress_root_ns = NULL;
+ return err;
}
int mlx5_init_fs(struct mlx5_core_dev *dev)
@@ -2614,7 +2683,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
goto err;
}
- if (MLX5_IPSEC_DEV(dev)) {
+ if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
err = init_egress_root_ns(steering);
if (err)
goto err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 32070e5d993d..b51ad217da32 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -36,10 +36,23 @@
#include <linux/refcount.h>
#include <linux/mlx5/fs.h>
#include <linux/rhashtable.h>
+#include <linux/llist.h>
+
+/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only,
+ * and those are in parallel to one another when going over them to connect
+ * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one
+ * parallel namespace will not automatically connect to the first flow table
+ * found in any prio in any next namespace, but skip the entire containing
+ * TYPE_PRIO_CHAINS prio.
+ *
+ * This is used to implement tc chains, each chain of prios is a different
+ * namespace inside a containing TYPE_PRIO_CHAINS prio.
+ */
enum fs_node_type {
FS_TYPE_NAMESPACE,
FS_TYPE_PRIO,
+ FS_TYPE_PRIO_CHAINS,
FS_TYPE_FLOW_TABLE,
FS_TYPE_FLOW_GROUP,
FS_TYPE_FLOW_ENTRY,
@@ -72,6 +85,7 @@ struct mlx5_flow_steering {
struct kmem_cache *ftes_cache;
struct mlx5_flow_root_namespace *root_ns;
struct mlx5_flow_root_namespace *fdb_root_ns;
+ struct mlx5_flow_namespace **fdb_sub_ns;
struct mlx5_flow_root_namespace **esw_egress_root_ns;
struct mlx5_flow_root_namespace **esw_ingress_root_ns;
struct mlx5_flow_root_namespace *sniffer_tx_root_ns;
@@ -138,8 +152,9 @@ struct mlx5_fc_cache {
};
struct mlx5_fc {
- struct rb_node node;
struct list_head list;
+ struct llist_node addlist;
+ struct llist_node dellist;
/* last{packets,bytes} members are used when calculating the delta since
* last reading
@@ -148,7 +163,6 @@ struct mlx5_fc {
u64 lastbytes;
u32 id;
- bool deleted;
bool aging;
struct mlx5_fc_cache cache ____cacheline_aligned_in_smp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
index 58af6be13dfa..32accd6b041b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c
@@ -52,11 +52,13 @@
* access to counter list:
* - create (user context)
* - mlx5_fc_create() only adds to an addlist to be used by
- * mlx5_fc_stats_query_work(). addlist is protected by a spinlock.
+ * mlx5_fc_stats_query_work(). addlist is a lockless single linked list
+ * that doesn't require any additional synchronization when adding single
+ * node.
* - spawn thread to do the actual destroy
*
* - destroy (user context)
- * - mark a counter as deleted
+ * - add a counter to lockless dellist
* - spawn thread to do the actual del
*
* - dump (user context)
@@ -71,36 +73,55 @@
* elapsed, the thread will actually query the hardware.
*/
-static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter)
+static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev,
+ u32 id)
{
- struct rb_node **new = &root->rb_node;
- struct rb_node *parent = NULL;
-
- while (*new) {
- struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node);
- int result = counter->id - this->id;
-
- parent = *new;
- if (result < 0)
- new = &((*new)->rb_left);
- else
- new = &((*new)->rb_right);
- }
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ unsigned long next_id = (unsigned long)id + 1;
+ struct mlx5_fc *counter;
+
+ rcu_read_lock();
+ /* skip counters that are in idr, but not yet in counters list */
+ while ((counter = idr_get_next_ul(&fc_stats->counters_idr,
+ &next_id)) != NULL &&
+ list_empty(&counter->list))
+ next_id++;
+ rcu_read_unlock();
+
+ return counter ? &counter->list : &fc_stats->counters;
+}
+
+static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev,
+ struct mlx5_fc *counter)
+{
+ struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id);
+
+ list_add_tail(&counter->list, next);
+}
+
+static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev,
+ struct mlx5_fc *counter)
+{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
- /* Add new node and rebalance tree. */
- rb_link_node(&counter->node, parent, new);
- rb_insert_color(&counter->node, root);
+ list_del(&counter->list);
+
+ spin_lock(&fc_stats->counters_idr_lock);
+ WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id));
+ spin_unlock(&fc_stats->counters_idr_lock);
}
-/* The function returns the last node that was queried so the caller
+/* The function returns the last counter that was queried so the caller
* function can continue calling it till all counters are queried.
*/
-static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
+static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
struct mlx5_fc *first,
u32 last_id)
{
+ struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct mlx5_fc *counter = NULL;
struct mlx5_cmd_fc_bulk *b;
- struct rb_node *node = NULL;
+ bool more = false;
u32 afirst_id;
int num;
int err;
@@ -130,14 +151,16 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
goto out;
}
- for (node = &first->node; node; node = rb_next(node)) {
- struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node);
+ counter = first;
+ list_for_each_entry_from(counter, &fc_stats->counters, list) {
struct mlx5_fc_cache *c = &counter->cache;
u64 packets;
u64 bytes;
- if (counter->id > last_id)
+ if (counter->id > last_id) {
+ more = true;
break;
+ }
mlx5_cmd_fc_bulk_get(dev, b,
counter->id, &packets, &bytes);
@@ -153,7 +176,14 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev,
out:
mlx5_cmd_fc_bulk_free(b);
- return node;
+ return more ? counter : NULL;
+}
+
+static void mlx5_free_fc(struct mlx5_core_dev *dev,
+ struct mlx5_fc *counter)
+{
+ mlx5_cmd_fc_free(dev, counter->id);
+ kfree(counter);
}
static void mlx5_fc_stats_work(struct work_struct *work)
@@ -161,52 +191,36 @@ static void mlx5_fc_stats_work(struct work_struct *work)
struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev,
priv.fc_stats.work.work);
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ /* Take dellist first to ensure that counters cannot be deleted before
+ * they are inserted.
+ */
+ struct llist_node *dellist = llist_del_all(&fc_stats->dellist);
+ struct llist_node *addlist = llist_del_all(&fc_stats->addlist);
+ struct mlx5_fc *counter = NULL, *last = NULL, *tmp;
unsigned long now = jiffies;
- struct mlx5_fc *counter = NULL;
- struct mlx5_fc *last = NULL;
- struct rb_node *node;
- LIST_HEAD(tmplist);
- spin_lock(&fc_stats->addlist_lock);
-
- list_splice_tail_init(&fc_stats->addlist, &tmplist);
-
- if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters))
+ if (addlist || !list_empty(&fc_stats->counters))
queue_delayed_work(fc_stats->wq, &fc_stats->work,
fc_stats->sampling_interval);
- spin_unlock(&fc_stats->addlist_lock);
-
- list_for_each_entry(counter, &tmplist, list)
- mlx5_fc_stats_insert(&fc_stats->counters, counter);
+ llist_for_each_entry(counter, addlist, addlist)
+ mlx5_fc_stats_insert(dev, counter);
- node = rb_first(&fc_stats->counters);
- while (node) {
- counter = rb_entry(node, struct mlx5_fc, node);
+ llist_for_each_entry_safe(counter, tmp, dellist, dellist) {
+ mlx5_fc_stats_remove(dev, counter);
- node = rb_next(node);
-
- if (counter->deleted) {
- rb_erase(&counter->node, &fc_stats->counters);
-
- mlx5_cmd_fc_free(dev, counter->id);
-
- kfree(counter);
- continue;
- }
-
- last = counter;
+ mlx5_free_fc(dev, counter);
}
- if (time_before(now, fc_stats->next_query) || !last)
+ if (time_before(now, fc_stats->next_query) ||
+ list_empty(&fc_stats->counters))
return;
+ last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list);
- node = rb_first(&fc_stats->counters);
- while (node) {
- counter = rb_entry(node, struct mlx5_fc, node);
-
- node = mlx5_fc_stats_query(dev, counter, last->id);
- }
+ counter = list_first_entry(&fc_stats->counters, struct mlx5_fc,
+ list);
+ while (counter)
+ counter = mlx5_fc_stats_query(dev, counter, last->id);
fc_stats->next_query = now + fc_stats->sampling_interval;
}
@@ -220,24 +234,38 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging)
counter = kzalloc(sizeof(*counter), GFP_KERNEL);
if (!counter)
return ERR_PTR(-ENOMEM);
+ INIT_LIST_HEAD(&counter->list);
err = mlx5_cmd_fc_alloc(dev, &counter->id);
if (err)
goto err_out;
if (aging) {
+ u32 id = counter->id;
+
counter->cache.lastuse = jiffies;
counter->aging = true;
- spin_lock(&fc_stats->addlist_lock);
- list_add(&counter->list, &fc_stats->addlist);
- spin_unlock(&fc_stats->addlist_lock);
+ idr_preload(GFP_KERNEL);
+ spin_lock(&fc_stats->counters_idr_lock);
+
+ err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id,
+ GFP_NOWAIT);
+
+ spin_unlock(&fc_stats->counters_idr_lock);
+ idr_preload_end();
+ if (err)
+ goto err_out_alloc;
+
+ llist_add(&counter->addlist, &fc_stats->addlist);
mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
}
return counter;
+err_out_alloc:
+ mlx5_cmd_fc_free(dev, counter->id);
err_out:
kfree(counter);
@@ -245,6 +273,12 @@ err_out:
}
EXPORT_SYMBOL(mlx5_fc_create);
+u32 mlx5_fc_id(struct mlx5_fc *counter)
+{
+ return counter->id;
+}
+EXPORT_SYMBOL(mlx5_fc_id);
+
void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
@@ -253,13 +287,12 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter)
return;
if (counter->aging) {
- counter->deleted = true;
+ llist_add(&counter->dellist, &fc_stats->dellist);
mod_delayed_work(fc_stats->wq, &fc_stats->work, 0);
return;
}
- mlx5_cmd_fc_free(dev, counter->id);
- kfree(counter);
+ mlx5_free_fc(dev, counter);
}
EXPORT_SYMBOL(mlx5_fc_destroy);
@@ -267,9 +300,11 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
- fc_stats->counters = RB_ROOT;
- INIT_LIST_HEAD(&fc_stats->addlist);
- spin_lock_init(&fc_stats->addlist_lock);
+ spin_lock_init(&fc_stats->counters_idr_lock);
+ idr_init(&fc_stats->counters_idr);
+ INIT_LIST_HEAD(&fc_stats->counters);
+ init_llist_head(&fc_stats->addlist);
+ init_llist_head(&fc_stats->dellist);
fc_stats->wq = create_singlethread_workqueue("mlx5_fc");
if (!fc_stats->wq)
@@ -284,34 +319,22 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev)
void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev)
{
struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats;
+ struct llist_node *tmplist;
struct mlx5_fc *counter;
struct mlx5_fc *tmp;
- struct rb_node *node;
cancel_delayed_work_sync(&dev->priv.fc_stats.work);
destroy_workqueue(dev->priv.fc_stats.wq);
dev->priv.fc_stats.wq = NULL;
- list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) {
- list_del(&counter->list);
-
- mlx5_cmd_fc_free(dev, counter->id);
+ idr_destroy(&fc_stats->counters_idr);
- kfree(counter);
- }
-
- node = rb_first(&fc_stats->counters);
- while (node) {
- counter = rb_entry(node, struct mlx5_fc, node);
-
- node = rb_next(node);
+ tmplist = llist_del_all(&fc_stats->addlist);
+ llist_for_each_entry_safe(counter, tmp, tmplist, addlist)
+ mlx5_free_fc(dev, counter);
- rb_erase(&counter->node, &fc_stats->counters);
-
- mlx5_cmd_fc_free(dev, counter->id);
-
- kfree(counter);
- }
+ list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list)
+ mlx5_free_fc(dev, counter);
}
int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 41ad24f0de2c..1ab6f7e3bec6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -250,7 +250,7 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
if (ret)
return ret;
- force_state = MLX5_GET(teardown_hca_out, out, force_state);
+ force_state = MLX5_GET(teardown_hca_out, out, state);
if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n");
return -EIO;
@@ -259,6 +259,54 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
return 0;
}
+#define MLX5_FAST_TEARDOWN_WAIT_MS 3000
+int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
+{
+ unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS;
+ u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
+ u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+ int state;
+ int ret;
+
+ if (!MLX5_CAP_GEN(dev, fast_teardown)) {
+ mlx5_core_dbg(dev, "fast teardown is not supported in the firmware\n");
+ return -EOPNOTSUPP;
+ }
+
+ MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
+ MLX5_SET(teardown_hca_in, in, profile,
+ MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN);
+
+ ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (ret)
+ return ret;
+
+ state = MLX5_GET(teardown_hca_out, out, state);
+ if (state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) {
+ mlx5_core_warn(dev, "teardown with fast mode failed\n");
+ return -EIO;
+ }
+
+ mlx5_set_nic_state(dev, MLX5_NIC_IFC_DISABLED);
+
+ /* Loop until device state turns to disable */
+ end = jiffies + msecs_to_jiffies(delay_ms);
+ do {
+ if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ break;
+
+ cond_resched();
+ } while (!time_after(jiffies, end));
+
+ if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
+ dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
+ mlx5_get_nic_state(dev), delay_ms);
+ return -EIO;
+ }
+
+ return 0;
+}
+
enum mlxsw_reg_mcc_instruction {
MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01,
MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 9f39aeca863f..43118de8ee99 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -59,22 +59,25 @@ enum {
};
enum {
- MLX5_NIC_IFC_FULL = 0,
- MLX5_NIC_IFC_DISABLED = 1,
- MLX5_NIC_IFC_NO_DRAM_NIC = 2,
- MLX5_NIC_IFC_INVALID = 3
-};
-
-enum {
MLX5_DROP_NEW_HEALTH_WORK,
MLX5_DROP_NEW_RECOVERY_WORK,
};
-static u8 get_nic_state(struct mlx5_core_dev *dev)
+u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
{
return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3;
}
+void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
+{
+ u32 cur_cmdq_addr_l_sz;
+
+ cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
+ iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
+ state << MLX5_NIC_IFC_OFFSET,
+ &dev->iseg->cmdq_addr_l_sz);
+}
+
static void trigger_cmd_completions(struct mlx5_core_dev *dev)
{
unsigned long flags;
@@ -103,7 +106,7 @@ static int in_fatal(struct mlx5_core_dev *dev)
struct mlx5_core_health *health = &dev->priv.health;
struct health_buffer __iomem *h = health->health;
- if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
+ if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
return 1;
if (ioread32be(&h->fw_ver) == 0xffffffff)
@@ -133,7 +136,7 @@ unlock:
static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
{
- u8 nic_interface = get_nic_state(dev);
+ u8 nic_interface = mlx5_get_nic_state(dev);
switch (nic_interface) {
case MLX5_NIC_IFC_FULL:
@@ -168,7 +171,7 @@ static void health_recover(struct work_struct *work)
priv = container_of(health, struct mlx5_priv, health);
dev = container_of(priv, struct mlx5_core_dev, priv);
- nic_state = get_nic_state(dev);
+ nic_state = mlx5_get_nic_state(dev);
if (nic_state == MLX5_NIC_IFC_INVALID) {
dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n");
return;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index e3797a44e074..b59953daf8b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -45,6 +45,7 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu);
static const struct net_device_ops mlx5i_netdev_ops = {
.ndo_open = mlx5i_open,
.ndo_stop = mlx5i_close,
+ .ndo_get_stats64 = mlx5i_get_stats,
.ndo_init = mlx5i_dev_init,
.ndo_uninit = mlx5i_dev_cleanup,
.ndo_change_mtu = mlx5i_change_mtu,
@@ -70,26 +71,25 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
}
/* Called directly after IPoIB netdevice was created to initialize SW structs */
-void mlx5i_init(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+int mlx5i_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
u16 max_mtu;
+ int err;
- /* priv init */
- priv->mdev = mdev;
- priv->netdev = netdev;
- priv->profile = profile;
- priv->ppriv = ppriv;
- mutex_init(&priv->state_lock);
+ err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv);
+ if (err)
+ return err;
mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
netdev->mtu = max_mtu;
mlx5e_build_nic_params(mdev, &priv->channels.params,
- profile->max_nch(mdev), netdev->mtu);
+ mlx5e_get_netdev_max_channels(netdev),
+ netdev->mtu);
mlx5i_build_nic_params(mdev, &priv->channels.params);
mlx5e_timestamp_init(priv);
@@ -106,12 +106,56 @@ void mlx5i_init(struct mlx5_core_dev *mdev,
netdev->netdev_ops = &mlx5i_netdev_ops;
netdev->ethtool_ops = &mlx5i_ethtool_ops;
+
+ return 0;
}
/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
-static void mlx5i_cleanup(struct mlx5e_priv *priv)
+void mlx5i_cleanup(struct mlx5e_priv *priv)
+{
+ mlx5e_netdev_cleanup(priv->netdev, priv);
+}
+
+static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv)
+{
+ int max_nch = mlx5e_get_netdev_max_channels(priv->netdev);
+ struct mlx5e_sw_stats s = { 0 };
+ int i, j;
+
+ for (i = 0; i < max_nch; i++) {
+ struct mlx5e_channel_stats *channel_stats;
+ struct mlx5e_rq_stats *rq_stats;
+
+ channel_stats = &priv->channel_stats[i];
+ rq_stats = &channel_stats->rq;
+
+ s.rx_packets += rq_stats->packets;
+ s.rx_bytes += rq_stats->bytes;
+
+ for (j = 0; j < priv->max_opened_tc; j++) {
+ struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+
+ s.tx_packets += sq_stats->packets;
+ s.tx_bytes += sq_stats->bytes;
+ s.tx_queue_dropped += sq_stats->dropped;
+ }
+ }
+
+ memcpy(&priv->stats.sw, &s, sizeof(s));
+}
+
+void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
{
- /* Do nothing .. */
+ struct mlx5e_priv *priv = mlx5i_epriv(dev);
+ struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+
+ mlx5i_grp_sw_update_stats(priv);
+
+ stats->rx_packets = sstats->rx_packets;
+ stats->rx_bytes = sstats->rx_bytes;
+ stats->tx_packets = sstats->tx_packets;
+ stats->tx_bytes = sstats->tx_bytes;
+ stats->tx_dropped = sstats->tx_queue_dropped;
}
int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
@@ -306,17 +350,26 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv)
static int mlx5i_init_rx(struct mlx5e_priv *priv)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
int err;
+ mlx5e_create_q_counters(priv);
+
+ err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+ if (err) {
+ mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+ goto err_destroy_q_counters;
+ }
+
err = mlx5e_create_indirect_rqt(priv);
if (err)
- return err;
+ goto err_close_drop_rq;
err = mlx5e_create_direct_rqts(priv);
if (err)
goto err_destroy_indirect_rqts;
- err = mlx5e_create_indirect_tirs(priv);
+ err = mlx5e_create_indirect_tirs(priv, true);
if (err)
goto err_destroy_direct_rqts;
@@ -333,11 +386,15 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv)
err_destroy_direct_tirs:
mlx5e_destroy_direct_tirs(priv);
err_destroy_indirect_tirs:
- mlx5e_destroy_indirect_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv, true);
err_destroy_direct_rqts:
mlx5e_destroy_direct_rqts(priv);
err_destroy_indirect_rqts:
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+err_close_drop_rq:
+ mlx5e_close_drop_rq(&priv->drop_rq);
+err_destroy_q_counters:
+ mlx5e_destroy_q_counters(priv);
return err;
}
@@ -345,9 +402,11 @@ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv)
{
mlx5i_destroy_flow_steering(priv);
mlx5e_destroy_direct_tirs(priv);
- mlx5e_destroy_indirect_tirs(priv);
+ mlx5e_destroy_indirect_tirs(priv, true);
mlx5e_destroy_direct_rqts(priv);
mlx5e_destroy_rqt(priv, &priv->indir_rqt);
+ mlx5e_close_drop_rq(&priv->drop_rq);
+ mlx5e_destroy_q_counters(priv);
}
static const struct mlx5e_profile mlx5i_nic_profile = {
@@ -360,7 +419,6 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
.enable = NULL, /* mlx5i_enable */
.disable = NULL, /* mlx5i_disable */
.update_stats = NULL, /* mlx5i_update_stats */
- .max_nch = mlx5e_get_max_num_channels,
.update_carrier = NULL, /* no HW update in IB link */
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
@@ -592,7 +650,6 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev)
mlx5e_detach_netdev(priv);
profile->cleanup(priv);
- destroy_workqueue(priv->wq);
if (!ipriv->sub_interface) {
mlx5i_pkey_qpn_ht_cleanup(netdev);
@@ -600,58 +657,37 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev)
}
}
-struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
- struct ib_device *ibdev,
- const char *name,
- void (*setup)(struct net_device *))
+static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev)
+{
+ return mdev->mlx5e_res.pdn != 0;
+}
+
+static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev)
+{
+ if (mlx5_is_sub_interface(mdev))
+ return mlx5i_pkey_get_profile();
+ return &mlx5i_nic_profile;
+}
+
+static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num,
+ struct net_device *netdev, void *param)
{
- const struct mlx5e_profile *profile;
- struct net_device *netdev;
+ struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param;
+ const struct mlx5e_profile *prof = mlx5_get_profile(mdev);
struct mlx5i_priv *ipriv;
struct mlx5e_priv *epriv;
struct rdma_netdev *rn;
- bool sub_interface;
- int nch;
int err;
- if (mlx5i_check_required_hca_cap(mdev)) {
- mlx5_core_warn(mdev, "Accelerated mode is not supported\n");
- return ERR_PTR(-EOPNOTSUPP);
- }
-
- /* TODO: Need to find a better way to check if child device*/
- sub_interface = (mdev->mlx5e_res.pdn != 0);
-
- if (sub_interface)
- profile = mlx5i_pkey_get_profile();
- else
- profile = &mlx5i_nic_profile;
-
- nch = profile->max_nch(mdev);
-
- netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv),
- name, NET_NAME_UNKNOWN,
- setup,
- nch * MLX5E_MAX_NUM_TC,
- nch);
- if (!netdev) {
- mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n");
- return NULL;
- }
-
ipriv = netdev_priv(netdev);
epriv = mlx5i_epriv(netdev);
- epriv->wq = create_singlethread_workqueue("mlx5i");
- if (!epriv->wq)
- goto err_free_netdev;
-
- ipriv->sub_interface = sub_interface;
+ ipriv->sub_interface = mlx5_is_sub_interface(mdev);
if (!ipriv->sub_interface) {
err = mlx5i_pkey_qpn_ht_init(netdev);
if (err) {
mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n");
- goto destroy_wq;
+ return err;
}
/* This should only be called once per mdev */
@@ -660,7 +696,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
goto destroy_ht;
}
- profile->init(mdev, netdev, profile, ipriv);
+ prof->init(mdev, netdev, prof, ipriv);
mlx5e_attach_netdev(epriv);
netif_carrier_off(netdev);
@@ -676,15 +712,35 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev,
netdev->priv_destructor = mlx5_rdma_netdev_free;
netdev->needs_free_netdev = 1;
- return netdev;
+ return 0;
destroy_ht:
mlx5i_pkey_qpn_ht_cleanup(netdev);
-destroy_wq:
- destroy_workqueue(epriv->wq);
-err_free_netdev:
- free_netdev(netdev);
+ return err;
+}
- return NULL;
+int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev,
+ struct ib_device *device,
+ struct rdma_netdev_alloc_params *params)
+{
+ int nch;
+ int rc;
+
+ rc = mlx5i_check_required_hca_cap(mdev);
+ if (rc)
+ return rc;
+
+ nch = mlx5e_get_max_num_channels(mdev);
+
+ *params = (struct rdma_netdev_alloc_params){
+ .sizeof_priv = sizeof(struct mlx5i_priv) +
+ sizeof(struct mlx5e_priv),
+ .txqs = nch * MLX5E_MAX_NUM_TC,
+ .rxqs = nch,
+ .param = mdev,
+ .initialize_rdma_netdev = mlx5_rdma_setup_rn,
+ };
+
+ return 0;
}
-EXPORT_SYMBOL(mlx5_rdma_netdev_alloc);
+EXPORT_SYMBOL(mlx5_rdma_rn_get_params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 0982c579ec74..9165ca567047 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -84,10 +84,11 @@ void mlx5i_dev_cleanup(struct net_device *dev);
int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
/* Parent profile functions */
-void mlx5i_init(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv);
+int mlx5i_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv);
+void mlx5i_cleanup(struct mlx5e_priv *priv);
/* Get child interface nic profile */
const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
@@ -120,6 +121,7 @@ static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
struct mlx5_av *av, u32 dqpn, u32 dqkey);
void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
#endif /* CONFIG_MLX5_CORE_IPOIB */
#endif /* __MLX5E_IPOB_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index 54a188f41f90..b491b8f5fd6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -146,6 +146,7 @@ static const struct net_device_ops mlx5i_pkey_netdev_ops = {
.ndo_open = mlx5i_pkey_open,
.ndo_stop = mlx5i_pkey_close,
.ndo_init = mlx5i_pkey_dev_init,
+ .ndo_get_stats64 = mlx5i_get_stats,
.ndo_uninit = mlx5i_pkey_dev_cleanup,
.ndo_change_mtu = mlx5i_pkey_change_mtu,
.ndo_do_ioctl = mlx5i_pkey_ioctl,
@@ -274,14 +275,17 @@ static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu)
}
/* Called directly after IPoIB netdevice was created to initialize SW structs */
-static void mlx5i_pkey_init(struct mlx5_core_dev *mdev,
- struct net_device *netdev,
- const struct mlx5e_profile *profile,
- void *ppriv)
+static int mlx5i_pkey_init(struct mlx5_core_dev *mdev,
+ struct net_device *netdev,
+ const struct mlx5e_profile *profile,
+ void *ppriv)
{
struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+ int err;
- mlx5i_init(mdev, netdev, profile, ppriv);
+ err = mlx5i_init(mdev, netdev, profile, ppriv);
+ if (err)
+ return err;
/* Override parent ndo */
netdev->netdev_ops = &mlx5i_pkey_netdev_ops;
@@ -291,12 +295,14 @@ static void mlx5i_pkey_init(struct mlx5_core_dev *mdev,
/* Use dummy rqs */
priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
+
+ return 0;
}
/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv)
{
- /* Do nothing .. */
+ mlx5i_cleanup(priv);
}
static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv)
@@ -345,7 +351,6 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
.enable = NULL,
.disable = NULL,
.update_stats = NULL,
- .max_nch = mlx5e_get_max_num_channels,
.rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe,
.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
.max_tc = MLX5I_MAX_NUM_TC,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 3f767cde4c1d..0d90b1b4a3d3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -111,10 +111,10 @@ static void mlx5_pps_out(struct work_struct *work)
for (i = 0; i < clock->ptp_info.n_pins; i++) {
u64 tstart;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
tstart = clock->pps_info.start[i];
clock->pps_info.start[i] = 0;
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
if (!tstart)
continue;
@@ -132,10 +132,10 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
overflow_work);
unsigned long flags;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
timecounter_read(&clock->tc);
mlx5_update_clock_info_page(clock->mdev);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
}
@@ -147,10 +147,10 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
u64 ns = timespec64_to_ns(ts);
unsigned long flags;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
timecounter_init(&clock->tc, &clock->cycles, ns);
mlx5_update_clock_info_page(clock->mdev);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
}
@@ -162,9 +162,9 @@ static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
u64 ns;
unsigned long flags;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
ns = timecounter_read(&clock->tc);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
*ts = ns_to_timespec64(ns);
@@ -177,10 +177,10 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
ptp_info);
unsigned long flags;
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
timecounter_adjtime(&clock->tc, delta);
mlx5_update_clock_info_page(clock->mdev);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
}
@@ -203,12 +203,12 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
adj *= delta;
diff = div_u64(adj, 1000000000ULL);
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
timecounter_read(&clock->tc);
clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
clock->nominal_c_mult + diff;
mlx5_update_clock_info_page(clock->mdev);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
return 0;
}
@@ -307,12 +307,12 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
ts.tv_nsec = rq->perout.start.nsec;
ns = timespec64_to_ns(&ts);
cycles_now = mlx5_read_internal_timer(mdev);
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
nsec_delta = ns - nsec_now;
cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
clock->cycles.mult);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
time_stamp = cycles_now + cycles_delta;
field_select = MLX5_MTPPS_FS_PIN_MODE |
MLX5_MTPPS_FS_PATTERN |
@@ -471,14 +471,14 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
ts.tv_sec += 1;
ts.tv_nsec = 0;
ns = timespec64_to_ns(&ts);
- write_lock_irqsave(&clock->lock, flags);
+ write_seqlock_irqsave(&clock->lock, flags);
nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
nsec_delta = ns - nsec_now;
cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
clock->cycles.mult);
clock->pps_info.start[pin] = cycles_now + cycles_delta;
schedule_work(&clock->pps_info.out_work);
- write_unlock_irqrestore(&clock->lock, flags);
+ write_sequnlock_irqrestore(&clock->lock, flags);
break;
default:
mlx5_core_err(mdev, " Unhandled event\n");
@@ -498,7 +498,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
return;
}
- rwlock_init(&clock->lock);
+ seqlock_init(&clock->lock);
clock->cycles.read = read_internal_timer;
clock->cycles.shift = MLX5_CYCLES_SHIFT;
clock->cycles.mult = clocksource_khz2mult(dev_freq,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
index 02e2e4575e4f..263cb6e2aeee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
@@ -46,11 +46,13 @@ static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
u64 timestamp)
{
+ unsigned int seq;
u64 nsec;
- read_lock(&clock->lock);
- nsec = timecounter_cyc2time(&clock->tc, timestamp);
- read_unlock(&clock->lock);
+ do {
+ seq = read_seqbegin(&clock->lock);
+ nsec = timecounter_cyc2time(&clock->tc, timestamp);
+ } while (read_seqretry(&clock->lock, seq));
return ns_to_ktime(nsec);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b5e9f664fc66..28132c7dc05f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1594,12 +1594,17 @@ static const struct pci_error_handlers mlx5_err_handler = {
static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
{
- int ret;
+ bool fast_teardown = false, force_teardown = false;
+ int ret = 1;
+
+ fast_teardown = MLX5_CAP_GEN(dev, fast_teardown);
+ force_teardown = MLX5_CAP_GEN(dev, force_teardown);
+
+ mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown);
+ mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown);
- if (!MLX5_CAP_GEN(dev, force_teardown)) {
- mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n");
+ if (!fast_teardown && !force_teardown)
return -EOPNOTSUPP;
- }
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
mlx5_core_dbg(dev, "Device in internal error state, giving up\n");
@@ -1612,13 +1617,19 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev)
mlx5_drain_health_wq(dev);
mlx5_stop_health_poll(dev, false);
+ ret = mlx5_cmd_fast_teardown_hca(dev);
+ if (!ret)
+ goto succeed;
+
ret = mlx5_cmd_force_teardown_hca(dev);
- if (ret) {
- mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
- mlx5_start_health_poll(dev);
- return ret;
- }
+ if (!ret)
+ goto succeed;
+
+ mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret);
+ mlx5_start_health_poll(dev);
+ return ret;
+succeed:
mlx5_enter_error_state(dev, true);
/* Some platforms requiring freeing the IRQ's in the shutdown
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index b4134fa0bba3..0594d0961cb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -39,6 +39,7 @@
#include <linux/if_link.h>
#include <linux/firmware.h>
#include <linux/mlx5/cq.h>
+#include <linux/mlx5/fs.h>
#define DRIVER_NAME "mlx5_core"
#define DRIVER_VERSION "5.0-0"
@@ -95,6 +96,8 @@ int mlx5_query_board_id(struct mlx5_core_dev *dev);
int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id);
int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev);
+int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev);
+
void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
unsigned long param);
void mlx5_core_page_fault(struct mlx5_core_dev *dev,
@@ -169,17 +172,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
void mlx5_dev_list_lock(void);
void mlx5_dev_list_unlock(void);
int mlx5_dev_list_trylock(void);
-int mlx5_encap_alloc(struct mlx5_core_dev *dev,
- int header_type,
- size_t size,
- void *encap_header,
- u32 *encap_id);
-void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
-
-int mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
- u8 namespace, u8 num_actions,
- void *modify_actions, u32 *modify_header_id);
-void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id);
bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
@@ -214,4 +206,14 @@ int mlx5_lag_allow(struct mlx5_core_dev *dev);
int mlx5_lag_forbid(struct mlx5_core_dev *dev);
void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);
+
+enum {
+ MLX5_NIC_IFC_FULL = 0,
+ MLX5_NIC_IFC_DISABLED = 1,
+ MLX5_NIC_IFC_NO_DRAM_NIC = 2,
+ MLX5_NIC_IFC_INVALID = 3
+};
+
+u8 mlx5_get_nic_state(struct mlx5_core_dev *dev);
+void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state);
#endif /* __MLX5_CORE_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 4ca07bfb6b14..91b8139a388d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -211,6 +211,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev,
}
qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+ qp->uid = MLX5_GET(create_dct_in, in, uid);
err = create_resource_common(dev, qp, MLX5_RES_DCT);
if (err)
goto err_cmd;
@@ -219,6 +220,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev,
err_cmd:
MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
MLX5_SET(destroy_dct_in, din, dctn, qp->qpn);
+ MLX5_SET(destroy_dct_in, din, uid, qp->uid);
mlx5_cmd_exec(dev, (void *)&in, sizeof(din),
(void *)&out, sizeof(dout));
return err;
@@ -240,6 +242,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
if (err)
return err;
+ qp->uid = MLX5_GET(create_qp_in, in, uid);
qp->qpn = MLX5_GET(create_qp_out, out, qpn);
mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
@@ -261,6 +264,7 @@ err_cmd:
memset(dout, 0, sizeof(dout));
MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, din, uid, qp->uid);
mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
return err;
}
@@ -275,6 +279,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+ MLX5_SET(drain_dct_in, in, uid, qp->uid);
return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
(void *)&out, sizeof(out));
}
@@ -301,6 +306,7 @@ destroy:
destroy_resource_common(dev, &dct->mqp);
MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+ MLX5_SET(destroy_dct_in, in, uid, qp->uid);
err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
(void *)&out, sizeof(out));
return err;
@@ -320,6 +326,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+ MLX5_SET(destroy_qp_in, in, uid, qp->uid);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
@@ -373,7 +380,7 @@ static void mbox_free(struct mbox_info *mbox)
static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
u32 opt_param_mask, void *qpc,
- struct mbox_info *mbox)
+ struct mbox_info *mbox, u16 uid)
{
mbox->out = NULL;
mbox->in = NULL;
@@ -381,26 +388,32 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
#define MBOX_ALLOC(mbox, typ) \
mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
-#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \
- MLX5_SET(typ##_in, in, opcode, _opcode); \
- MLX5_SET(typ##_in, in, qpn, _qpn)
-
-#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \
- MOD_QP_IN_SET(typ, in, _opcode, _qpn); \
- MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
- memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc))
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid) \
+ do { \
+ MLX5_SET(typ##_in, in, opcode, _opcode); \
+ MLX5_SET(typ##_in, in, qpn, _qpn); \
+ MLX5_SET(typ##_in, in, uid, _uid); \
+ } while (0)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid) \
+ do { \
+ MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid); \
+ MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \
+ memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, \
+ MLX5_ST_SZ_BYTES(qpc)); \
+ } while (0)
switch (opcode) {
/* 2RST & 2ERR */
case MLX5_CMD_OP_2RST_QP:
if (MBOX_ALLOC(mbox, qp_2rst))
return -ENOMEM;
- MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn);
+ MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
break;
case MLX5_CMD_OP_2ERR_QP:
if (MBOX_ALLOC(mbox, qp_2err))
return -ENOMEM;
- MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn);
+ MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
break;
/* MODIFY with QPC */
@@ -408,37 +421,37 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
if (MBOX_ALLOC(mbox, rst2init_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_INIT2RTR_QP:
if (MBOX_ALLOC(mbox, init2rtr_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_RTR2RTS_QP:
if (MBOX_ALLOC(mbox, rtr2rts_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_RTS2RTS_QP:
if (MBOX_ALLOC(mbox, rts2rts_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_SQERR2RTS_QP:
if (MBOX_ALLOC(mbox, sqerr2rts_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
case MLX5_CMD_OP_INIT2INIT_QP:
if (MBOX_ALLOC(mbox, init2init_qp))
return -ENOMEM;
MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
- opt_param_mask, qpc);
+ opt_param_mask, qpc, uid);
break;
default:
mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
@@ -456,7 +469,7 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
int err;
err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
- opt_param_mask, qpc, &mbox);
+ opt_param_mask, qpc, &mbox, qp->uid);
if (err)
return err;
@@ -531,6 +544,17 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
}
EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
+static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {};
+
+ MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ MLX5_SET(destroy_rq_in, in, rqn, rqn);
+ MLX5_SET(destroy_rq_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *rq)
{
@@ -541,6 +565,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
if (err)
return err;
+ rq->uid = MLX5_GET(create_rq_in, in, uid);
rq->qpn = rqn;
err = create_resource_common(dev, rq, MLX5_RES_RQ);
if (err)
@@ -549,7 +574,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
return 0;
err_destroy_rq:
- mlx5_core_destroy_rq(dev, rq->qpn);
+ destroy_rq_tracked(dev, rq->qpn, rq->uid);
return err;
}
@@ -559,10 +584,21 @@ void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
struct mlx5_core_qp *rq)
{
destroy_resource_common(dev, rq);
- mlx5_core_destroy_rq(dev, rq->qpn);
+ destroy_rq_tracked(dev, rq->qpn, rq->uid);
}
EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
+static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {};
+
+ MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ MLX5_SET(destroy_sq_in, in, sqn, sqn);
+ MLX5_SET(destroy_sq_in, in, uid, uid);
+ mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
struct mlx5_core_qp *sq)
{
@@ -573,6 +609,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
if (err)
return err;
+ sq->uid = MLX5_GET(create_sq_in, in, uid);
sq->qpn = sqn;
err = create_resource_common(dev, sq, MLX5_RES_SQ);
if (err)
@@ -581,7 +618,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
return 0;
err_destroy_sq:
- mlx5_core_destroy_sq(dev, sq->qpn);
+ destroy_sq_tracked(dev, sq->qpn, sq->uid);
return err;
}
@@ -591,7 +628,7 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
struct mlx5_core_qp *sq)
{
destroy_resource_common(dev, sq);
- mlx5_core_destroy_sq(dev, sq->qpn);
+ destroy_sq_tracked(dev, sq->qpn, sq->uid);
}
EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
index 23cc337a96c9..6a6fc9be01e6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c
@@ -73,7 +73,7 @@ static int get_pas_size(struct mlx5_srq_attr *in)
u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride);
u32 page_size = 1 << log_page_size;
u32 rq_sz_po = rq_sz + (page_offset * po_quanta);
- u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size;
+ u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size);
return rq_num_pas * sizeof(u64);
}
@@ -166,6 +166,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
if (!create_in)
return -ENOMEM;
+ MLX5_SET(create_srq_in, create_in, uid, in->uid);
srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry);
pas = MLX5_ADDR_OF(create_srq_in, create_in, pas);
@@ -178,8 +179,10 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
sizeof(create_out));
kvfree(create_in);
- if (!err)
+ if (!err) {
srq->srqn = MLX5_GET(create_srq_out, create_out, srqn);
+ srq->uid = in->uid;
+ }
return err;
}
@@ -193,6 +196,7 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_srq_in, srq_in, opcode,
MLX5_CMD_OP_DESTROY_SRQ);
MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn);
+ MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
srq_out, sizeof(srq_out));
@@ -208,6 +212,7 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ);
MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn);
MLX5_SET(arm_rq_in, srq_in, lwm, lwm);
+ MLX5_SET(arm_rq_in, srq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in),
srq_out, sizeof(srq_out));
@@ -260,6 +265,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
if (!create_in)
return -ENOMEM;
+ MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid);
xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in,
xrc_srq_context_entry);
pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas);
@@ -277,6 +283,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev,
goto out;
srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn);
+ srq->uid = in->uid;
out:
kvfree(create_in);
return err;
@@ -291,6 +298,7 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode,
MLX5_CMD_OP_DESTROY_XRC_SRQ);
MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
+ MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
xrcsrq_out, sizeof(xrcsrq_out));
@@ -306,6 +314,7 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ);
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn);
MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm);
+ MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid);
return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in),
xrcsrq_out, sizeof(xrcsrq_out));
@@ -365,10 +374,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
wq = MLX5_ADDR_OF(rmpc, rmpc, wq);
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
+ MLX5_SET(create_rmp_in, create_in, uid, in->uid);
set_wq(wq, in);
memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size);
err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn);
+ if (!err)
+ srq->uid = in->uid;
kvfree(create_in);
return err;
@@ -377,7 +389,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
static int destroy_rmp_cmd(struct mlx5_core_dev *dev,
struct mlx5_core_srq *srq)
{
- return mlx5_core_destroy_rmp(dev, srq->srqn);
+ u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {};
+
+ MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(destroy_rmp_in, in, uid, srq->uid);
+ return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
static int arm_rmp_cmd(struct mlx5_core_dev *dev,
@@ -400,6 +418,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev,
MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY);
MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn);
+ MLX5_SET(modify_rmp_in, in, uid, srq->uid);
MLX5_SET(wq, wq, lwm, lwm);
MLX5_SET(rmp_bitmask, bitmask, lwm, 1);
MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY);
@@ -469,11 +488,14 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq,
MLX5_SET(xrqc, xrqc, user_index, in->user_index);
MLX5_SET(xrqc, xrqc, cqn, in->cqn);
MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ);
+ MLX5_SET(create_xrq_in, create_in, uid, in->uid);
err = mlx5_cmd_exec(dev, create_in, inlen, create_out,
sizeof(create_out));
kvfree(create_in);
- if (!err)
+ if (!err) {
srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn);
+ srq->uid = in->uid;
+ }
return err;
}
@@ -485,6 +507,7 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq)
MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ);
MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn);
+ MLX5_SET(destroy_xrq_in, in, uid, srq->uid);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
@@ -500,6 +523,7 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev,
MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ);
MLX5_SET(arm_rq_in, in, srq_number, srq->srqn);
MLX5_SET(arm_rq_in, in, lwm, lwm);
+ MLX5_SET(arm_rq_in, in, uid, srq->uid);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index b02af317c125..cfbea66b4879 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -1201,3 +1201,12 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev)
return err;
}
EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport);
+
+u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev)
+{
+ if (!mdev->sys_image_guid)
+ mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid);
+
+ return mdev->sys_image_guid;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index ddca327e8950..2dcbf1ebfd6a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -49,54 +49,37 @@ u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
return (u32)wq->fbc.sz_m1 + 1;
}
-static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq)
+static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride)
{
- return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq)
-{
- return mlx5_wq_cyc_get_byte_size(&wq->rq) +
- mlx5_wq_cyc_get_byte_size(&wq->sq);
-}
-
-static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq)
-{
- return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride;
-}
-
-static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq)
-{
- return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride;
+ return ((u32)1 << log_sz) << log_stride;
}
int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_cyc *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
+ u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+ u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz);
struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
int err;
- mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
- MLX5_GET(wq, wqc, log_wq_sz),
- fbc);
- wq->sz = wq->fbc.sz_m1 + 1;
-
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
return err;
}
- err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq),
+ wq->db = wq_ctrl->db.db;
+
+ err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
&wq_ctrl->buf, param->buf_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
goto err_db_free;
}
- fbc->frag_buf = wq_ctrl->buf;
- wq->db = wq_ctrl->db.db;
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
+ wq->sz = mlx5_wq_cyc_get_size(wq);
wq_ctrl->mdev = mdev;
@@ -108,46 +91,19 @@ err_db_free:
return err;
}
-static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf,
- struct mlx5_wq_qp *qp)
-{
- struct mlx5_frag_buf_ctrl *sq_fbc;
- struct mlx5_frag_buf *rqb, *sqb;
-
- rqb = &qp->rq.fbc.frag_buf;
- *rqb = *buf;
- rqb->size = mlx5_wq_cyc_get_byte_size(&qp->rq);
- rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE);
-
- sq_fbc = &qp->sq.fbc;
- sqb = &sq_fbc->frag_buf;
- *sqb = *buf;
- sqb->size = mlx5_wq_cyc_get_byte_size(&qp->sq);
- sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE);
- sqb->frags += rqb->npages; /* first part is for the rq */
- if (sq_fbc->strides_offset)
- sqb->frags--;
-}
-
int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *qpc, struct mlx5_wq_qp *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
- u16 sq_strides_offset;
- u32 rq_pg_remainder;
- int err;
+ u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4;
+ u8 log_rq_sz = MLX5_GET(qpc, qpc, log_rq_size);
+ u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB);
+ u8 log_sq_sz = MLX5_GET(qpc, qpc, log_sq_size);
- mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
- MLX5_GET(qpc, qpc, log_rq_size),
- &wq->rq.fbc);
+ u32 rq_byte_size;
+ int err;
- rq_pg_remainder = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE;
- sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB;
- mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
- MLX5_GET(qpc, qpc, log_sq_size),
- sq_strides_offset,
- &wq->sq.fbc);
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
@@ -155,14 +111,32 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
return err;
}
- err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq),
+ err = mlx5_frag_buf_alloc_node(mdev,
+ wq_get_byte_sz(log_rq_sz, log_rq_stride) +
+ wq_get_byte_sz(log_sq_sz, log_sq_stride),
&wq_ctrl->buf, param->buf_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
goto err_db_free;
}
- mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq);
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc);
+
+ rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride);
+
+ if (rq_byte_size < PAGE_SIZE) {
+ /* SQ starts within the same page of the RQ */
+ u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB;
+
+ mlx5_init_fbc_offset(wq_ctrl->buf.frags,
+ log_sq_stride, log_sq_sz, sq_strides_offset,
+ &wq->sq.fbc);
+ } else {
+ u16 rq_npages = rq_byte_size >> PAGE_SHIFT;
+
+ mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages,
+ log_sq_stride, log_sq_sz, &wq->sq.fbc);
+ }
wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR];
wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR];
@@ -181,17 +155,19 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *cqc, struct mlx5_cqwq *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
+ u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6;
+ u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size);
int err;
- mlx5_core_init_cq_frag_buf(&wq->fbc, cqc);
-
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
return err;
}
- err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+ wq->db = wq_ctrl->db.db;
+
+ err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
&wq_ctrl->buf,
param->buf_numa_node);
if (err) {
@@ -200,8 +176,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
goto err_db_free;
}
- wq->fbc.frag_buf = wq_ctrl->buf;
- wq->db = wq_ctrl->db.db;
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc);
wq_ctrl->mdev = mdev;
@@ -217,30 +192,29 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
void *wqc, struct mlx5_wq_ll *wq,
struct mlx5_wq_ctrl *wq_ctrl)
{
+ u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride);
+ u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz);
struct mlx5_frag_buf_ctrl *fbc = &wq->fbc;
struct mlx5_wqe_srq_next_seg *next_seg;
int err;
int i;
- mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride),
- MLX5_GET(wq, wqc, log_wq_sz),
- fbc);
-
err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err);
return err;
}
- err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq),
+ wq->db = wq_ctrl->db.db;
+
+ err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride),
&wq_ctrl->buf, param->buf_numa_node);
if (err) {
mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err);
goto err_db_free;
}
- wq->fbc.frag_buf = wq_ctrl->buf;
- wq->db = wq_ctrl->db.db;
+ mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc);
for (i = 0; i < fbc->sz_m1; i++) {
next_seg = mlx5_wq_ll_get_wqe(wq, i);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 68fa44a41485..1f77e97e2d7a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -27,7 +27,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \
spectrum_acl_flex_keys.o \
spectrum1_mr_tcam.o spectrum2_mr_tcam.o \
spectrum_mr_tcam.o spectrum_mr.o \
- spectrum_qdisc.o spectrum_span.o
+ spectrum_qdisc.o spectrum_span.o \
+ spectrum_nve.o spectrum_nve_vxlan.o
mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o
mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o
obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
index 83f452b7ccbb..bb99f6d41fe0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -221,7 +221,7 @@ MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8);
MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8);
/* pci_eqe_cqn
- * Completion Queue that triggeret this EQE.
+ * Completion Queue that triggered this EQE.
*/
MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6e8b619b769b..32cb6718bb17 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -295,6 +295,7 @@ enum mlxsw_reg_sfd_rec_type {
MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0,
MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1,
MLXSW_REG_SFD_REC_TYPE_MULTICAST = 0x2,
+ MLXSW_REG_SFD_REC_TYPE_UNICAST_TUNNEL = 0xC,
};
/* reg_sfd_rec_type
@@ -525,6 +526,61 @@ mlxsw_reg_sfd_mc_pack(char *payload, int rec_index,
mlxsw_reg_sfd_mc_mid_set(payload, rec_index, mid);
}
+/* reg_sfd_uc_tunnel_uip_msb
+ * When protocol is IPv4, the most significant byte of the underlay IPv4
+ * destination IP.
+ * When protocol is IPv6, reserved.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_uip_msb, MLXSW_REG_SFD_BASE_LEN, 24,
+ 8, MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+/* reg_sfd_uc_tunnel_fid
+ * Filtering ID.
+ * Access: Index
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_fid, MLXSW_REG_SFD_BASE_LEN, 0, 16,
+ MLXSW_REG_SFD_REC_LEN, 0x08, false);
+
+enum mlxsw_reg_sfd_uc_tunnel_protocol {
+ MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4,
+ MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV6,
+};
+
+/* reg_sfd_uc_tunnel_protocol
+ * IP protocol.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_protocol, MLXSW_REG_SFD_BASE_LEN, 27,
+ 1, MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+/* reg_sfd_uc_tunnel_uip_lsb
+ * When protocol is IPv4, the least significant bytes of the underlay
+ * IPv4 destination IP.
+ * When protocol is IPv6, pointer to the underlay IPv6 destination IP
+ * which is configured by RIPS.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_uip_lsb, MLXSW_REG_SFD_BASE_LEN, 0,
+ 24, MLXSW_REG_SFD_REC_LEN, 0x0C, false);
+
+static inline void
+mlxsw_reg_sfd_uc_tunnel_pack(char *payload, int rec_index,
+ enum mlxsw_reg_sfd_rec_policy policy,
+ const char *mac, u16 fid,
+ enum mlxsw_reg_sfd_rec_action action, u32 uip,
+ enum mlxsw_reg_sfd_uc_tunnel_protocol proto)
+{
+ mlxsw_reg_sfd_rec_pack(payload, rec_index,
+ MLXSW_REG_SFD_REC_TYPE_UNICAST_TUNNEL, mac,
+ action);
+ mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy);
+ mlxsw_reg_sfd_uc_tunnel_uip_msb_set(payload, rec_index, uip >> 24);
+ mlxsw_reg_sfd_uc_tunnel_uip_lsb_set(payload, rec_index, uip);
+ mlxsw_reg_sfd_uc_tunnel_fid_set(payload, rec_index, fid);
+ mlxsw_reg_sfd_uc_tunnel_protocol_set(payload, rec_index, proto);
+}
+
/* SFN - Switch FDB Notification Register
* -------------------------------------------
* The switch provides notifications on newly learned FDB entries and
@@ -1069,6 +1125,8 @@ enum mlxsw_reg_sfdf_flush_type {
MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID,
MLXSW_REG_SFDF_FLUSH_PER_LAG,
MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID,
+ MLXSW_REG_SFDF_FLUSH_PER_NVE,
+ MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID,
};
/* reg_sfdf_flush_type
@@ -1079,6 +1137,10 @@ enum mlxsw_reg_sfdf_flush_type {
* 3 - All FID dynamic entries pointing to port are flushed.
* 4 - All dynamic entries pointing to LAG are flushed.
* 5 - All FID dynamic entries pointing to LAG are flushed.
+ * 6 - All entries of type "Unicast Tunnel" or "Multicast Tunnel" are
+ * flushed.
+ * 7 - All entries of type "Unicast Tunnel" or "Multicast Tunnel" are
+ * flushed, per FID.
* Access: RW
*/
MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4);
@@ -1315,12 +1377,19 @@ MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4);
*/
MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20);
-static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash)
+/* reg_slcr_seed
+ * LAG seed value. The seed is the same for all ports.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, slcr, seed, 0x08, 0, 32);
+
+static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash, u32 seed)
{
MLXSW_REG_ZERO(slcr, payload);
mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL);
mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC);
mlxsw_reg_slcr_lag_hash_set(payload, lag_hash);
+ mlxsw_reg_slcr_seed_set(payload, seed);
}
/* SLCOR - Switch LAG Collector Register
@@ -8279,6 +8348,508 @@ static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index,
mlxsw_reg_mgpc_opcode_set(payload, opcode);
}
+/* MPRS - Monitoring Parsing State Register
+ * ----------------------------------------
+ * The MPRS register is used for setting up the parsing for hash,
+ * policy-engine and routing.
+ */
+#define MLXSW_REG_MPRS_ID 0x9083
+#define MLXSW_REG_MPRS_LEN 0x14
+
+MLXSW_REG_DEFINE(mprs, MLXSW_REG_MPRS_ID, MLXSW_REG_MPRS_LEN);
+
+/* reg_mprs_parsing_depth
+ * Minimum parsing depth.
+ * Need to enlarge parsing depth according to L3, MPLS, tunnels, ACL
+ * rules, traps, hash, etc. Default is 96 bytes. Reserved when SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mprs, parsing_depth, 0x00, 0, 16);
+
+/* reg_mprs_parsing_en
+ * Parsing enable.
+ * Bit 0 - Enable parsing of NVE of types VxLAN, VxLAN-GPE, GENEVE and
+ * NVGRE. Default is enabled. Reserved when SwitchX-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mprs, parsing_en, 0x04, 0, 16);
+
+/* reg_mprs_vxlan_udp_dport
+ * VxLAN UDP destination port.
+ * Used for identifying VxLAN packets and for dport field in
+ * encapsulation. Default is 4789.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mprs, vxlan_udp_dport, 0x10, 0, 16);
+
+static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth,
+ u16 vxlan_udp_dport)
+{
+ MLXSW_REG_ZERO(mprs, payload);
+ mlxsw_reg_mprs_parsing_depth_set(payload, parsing_depth);
+ mlxsw_reg_mprs_parsing_en_set(payload, true);
+ mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport);
+}
+
+/* TNGCR - Tunneling NVE General Configuration Register
+ * ----------------------------------------------------
+ * The TNGCR register is used for setting up the NVE Tunneling configuration.
+ */
+#define MLXSW_REG_TNGCR_ID 0xA001
+#define MLXSW_REG_TNGCR_LEN 0x44
+
+MLXSW_REG_DEFINE(tngcr, MLXSW_REG_TNGCR_ID, MLXSW_REG_TNGCR_LEN);
+
+enum mlxsw_reg_tngcr_type {
+ MLXSW_REG_TNGCR_TYPE_VXLAN,
+ MLXSW_REG_TNGCR_TYPE_VXLAN_GPE,
+ MLXSW_REG_TNGCR_TYPE_GENEVE,
+ MLXSW_REG_TNGCR_TYPE_NVGRE,
+};
+
+/* reg_tngcr_type
+ * Tunnel type for encapsulation and decapsulation. The types are mutually
+ * exclusive.
+ * Note: For Spectrum the NVE parsing must be enabled in MPRS.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, type, 0x00, 0, 4);
+
+/* reg_tngcr_nve_valid
+ * The VTEP is valid. Allows adding FDB entries for tunnel encapsulation.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_valid, 0x04, 31, 1);
+
+/* reg_tngcr_nve_ttl_uc
+ * The TTL for NVE tunnel encapsulation underlay unicast packets.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_ttl_uc, 0x04, 0, 8);
+
+/* reg_tngcr_nve_ttl_mc
+ * The TTL for NVE tunnel encapsulation underlay multicast packets.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_ttl_mc, 0x08, 0, 8);
+
+enum {
+ /* Do not copy flow label. Calculate flow label using nve_flh. */
+ MLXSW_REG_TNGCR_FL_NO_COPY,
+ /* Copy flow label from inner packet if packet is IPv6 and
+ * encapsulation is by IPv6. Otherwise, calculate flow label using
+ * nve_flh.
+ */
+ MLXSW_REG_TNGCR_FL_COPY,
+};
+
+/* reg_tngcr_nve_flc
+ * For NVE tunnel encapsulation: Flow label copy from inner packet.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_flc, 0x0C, 25, 1);
+
+enum {
+ /* Flow label is static. In Spectrum this means '0'. Spectrum-2
+ * uses {nve_fl_prefix, nve_fl_suffix}.
+ */
+ MLXSW_REG_TNGCR_FL_NO_HASH,
+ /* 8 LSBs of the flow label are calculated from ECMP hash of the
+ * inner packet. 12 MSBs are configured by nve_fl_prefix.
+ */
+ MLXSW_REG_TNGCR_FL_HASH,
+};
+
+/* reg_tngcr_nve_flh
+ * NVE flow label hash.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_flh, 0x0C, 24, 1);
+
+/* reg_tngcr_nve_fl_prefix
+ * NVE flow label prefix. Constant 12 MSBs of the flow label.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_fl_prefix, 0x0C, 8, 12);
+
+/* reg_tngcr_nve_fl_suffix
+ * NVE flow label suffix. Constant 8 LSBs of the flow label.
+ * Reserved when nve_flh=1 and for Spectrum.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_fl_suffix, 0x0C, 0, 8);
+
+enum {
+ /* Source UDP port is fixed (default '0') */
+ MLXSW_REG_TNGCR_UDP_SPORT_NO_HASH,
+ /* Source UDP port is calculated based on hash */
+ MLXSW_REG_TNGCR_UDP_SPORT_HASH,
+};
+
+/* reg_tngcr_nve_udp_sport_type
+ * NVE UDP source port type.
+ * Spectrum uses LAG hash (SLCRv2). Spectrum-2 uses ECMP hash (RECRv2).
+ * When the source UDP port is calculated based on hash, then the 8 LSBs
+ * are calculated from hash the 8 MSBs are configured by
+ * nve_udp_sport_prefix.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_udp_sport_type, 0x10, 24, 1);
+
+/* reg_tngcr_nve_udp_sport_prefix
+ * NVE UDP source port prefix. Constant 8 MSBs of the UDP source port.
+ * Reserved when NVE type is NVGRE.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_udp_sport_prefix, 0x10, 8, 8);
+
+/* reg_tngcr_nve_group_size_mc
+ * The amount of sequential linked lists of MC entries. The first linked
+ * list is configured by SFD.underlay_mc_ptr.
+ * Valid values: 1, 2, 4, 8, 16, 32, 64
+ * The linked list are configured by TNUMT.
+ * The hash is set by LAG hash.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_group_size_mc, 0x18, 0, 8);
+
+/* reg_tngcr_nve_group_size_flood
+ * The amount of sequential linked lists of flooding entries. The first
+ * linked list is configured by SFMR.nve_tunnel_flood_ptr
+ * Valid values: 1, 2, 4, 8, 16, 32, 64
+ * The linked list are configured by TNUMT.
+ * The hash is set by LAG hash.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, nve_group_size_flood, 0x1C, 0, 8);
+
+/* reg_tngcr_learn_enable
+ * During decapsulation, whether to learn from NVE port.
+ * Reserved when Spectrum-2. See TNPC.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, learn_enable, 0x20, 31, 1);
+
+/* reg_tngcr_underlay_virtual_router
+ * Underlay virtual router.
+ * Reserved when Spectrum-2.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, underlay_virtual_router, 0x20, 0, 16);
+
+/* reg_tngcr_underlay_rif
+ * Underlay ingress router interface. RIF type should be loopback generic.
+ * Reserved when Spectrum.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, underlay_rif, 0x24, 0, 16);
+
+/* reg_tngcr_usipv4
+ * Underlay source IPv4 address of the NVE.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tngcr, usipv4, 0x28, 0, 32);
+
+/* reg_tngcr_usipv6
+ * Underlay source IPv6 address of the NVE. For Spectrum, must not be
+ * modified under traffic of NVE tunneling encapsulation.
+ * Access: RW
+ */
+MLXSW_ITEM_BUF(reg, tngcr, usipv6, 0x30, 16);
+
+static inline void mlxsw_reg_tngcr_pack(char *payload,
+ enum mlxsw_reg_tngcr_type type,
+ bool valid, u8 ttl)
+{
+ MLXSW_REG_ZERO(tngcr, payload);
+ mlxsw_reg_tngcr_type_set(payload, type);
+ mlxsw_reg_tngcr_nve_valid_set(payload, valid);
+ mlxsw_reg_tngcr_nve_ttl_uc_set(payload, ttl);
+ mlxsw_reg_tngcr_nve_ttl_mc_set(payload, ttl);
+ mlxsw_reg_tngcr_nve_flc_set(payload, MLXSW_REG_TNGCR_FL_NO_COPY);
+ mlxsw_reg_tngcr_nve_flh_set(payload, 0);
+ mlxsw_reg_tngcr_nve_udp_sport_type_set(payload,
+ MLXSW_REG_TNGCR_UDP_SPORT_HASH);
+ mlxsw_reg_tngcr_nve_udp_sport_prefix_set(payload, 0);
+ mlxsw_reg_tngcr_nve_group_size_mc_set(payload, 1);
+ mlxsw_reg_tngcr_nve_group_size_flood_set(payload, 1);
+}
+
+/* TNUMT - Tunneling NVE Underlay Multicast Table Register
+ * -------------------------------------------------------
+ * The TNUMT register is for building the underlay MC table. It is used
+ * for MC, flooding and BC traffic into the NVE tunnel.
+ */
+#define MLXSW_REG_TNUMT_ID 0xA003
+#define MLXSW_REG_TNUMT_LEN 0x20
+
+MLXSW_REG_DEFINE(tnumt, MLXSW_REG_TNUMT_ID, MLXSW_REG_TNUMT_LEN);
+
+enum mlxsw_reg_tnumt_record_type {
+ MLXSW_REG_TNUMT_RECORD_TYPE_IPV4,
+ MLXSW_REG_TNUMT_RECORD_TYPE_IPV6,
+ MLXSW_REG_TNUMT_RECORD_TYPE_LABEL,
+};
+
+/* reg_tnumt_record_type
+ * Record type.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, record_type, 0x00, 28, 4);
+
+enum mlxsw_reg_tnumt_tunnel_port {
+ MLXSW_REG_TNUMT_TUNNEL_PORT_NVE,
+ MLXSW_REG_TNUMT_TUNNEL_PORT_VPLS,
+ MLXSW_REG_TNUMT_TUNNEL_FLEX_TUNNEL0,
+ MLXSW_REG_TNUMT_TUNNEL_FLEX_TUNNEL1,
+};
+
+/* reg_tnumt_tunnel_port
+ * Tunnel port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, tunnel_port, 0x00, 24, 4);
+
+/* reg_tnumt_underlay_mc_ptr
+ * Index to the underlay multicast table.
+ * For Spectrum the index is to the KVD linear.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tnumt, underlay_mc_ptr, 0x00, 0, 24);
+
+/* reg_tnumt_vnext
+ * The next_underlay_mc_ptr is valid.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, vnext, 0x04, 31, 1);
+
+/* reg_tnumt_next_underlay_mc_ptr
+ * The next index to the underlay multicast table.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, next_underlay_mc_ptr, 0x04, 0, 24);
+
+/* reg_tnumt_record_size
+ * Number of IP addresses in the record.
+ * Range is 1..cap_max_nve_mc_entries_ipv{4,6}
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnumt, record_size, 0x08, 0, 3);
+
+/* reg_tnumt_udip
+ * The underlay IPv4 addresses. udip[i] is reserved if i >= size
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, tnumt, udip, 0x0C, 0, 32, 0x04, 0x00, false);
+
+/* reg_tnumt_udip_ptr
+ * The pointer to the underlay IPv6 addresses. udip_ptr[i] is reserved if
+ * i >= size. The IPv6 addresses are configured by RIPS.
+ * Access: RW
+ */
+MLXSW_ITEM32_INDEXED(reg, tnumt, udip_ptr, 0x0C, 0, 24, 0x04, 0x00, false);
+
+static inline void mlxsw_reg_tnumt_pack(char *payload,
+ enum mlxsw_reg_tnumt_record_type type,
+ enum mlxsw_reg_tnumt_tunnel_port tport,
+ u32 underlay_mc_ptr, bool vnext,
+ u32 next_underlay_mc_ptr,
+ u8 record_size)
+{
+ MLXSW_REG_ZERO(tnumt, payload);
+ mlxsw_reg_tnumt_record_type_set(payload, type);
+ mlxsw_reg_tnumt_tunnel_port_set(payload, tport);
+ mlxsw_reg_tnumt_underlay_mc_ptr_set(payload, underlay_mc_ptr);
+ mlxsw_reg_tnumt_vnext_set(payload, vnext);
+ mlxsw_reg_tnumt_next_underlay_mc_ptr_set(payload, next_underlay_mc_ptr);
+ mlxsw_reg_tnumt_record_size_set(payload, record_size);
+}
+
+/* TNQCR - Tunneling NVE QoS Configuration Register
+ * ------------------------------------------------
+ * The TNQCR register configures how QoS is set in encapsulation into the
+ * underlay network.
+ */
+#define MLXSW_REG_TNQCR_ID 0xA010
+#define MLXSW_REG_TNQCR_LEN 0x0C
+
+MLXSW_REG_DEFINE(tnqcr, MLXSW_REG_TNQCR_ID, MLXSW_REG_TNQCR_LEN);
+
+/* reg_tnqcr_enc_set_dscp
+ * For encapsulation: How to set DSCP field:
+ * 0 - Copy the DSCP from the overlay (inner) IP header to the underlay
+ * (outer) IP header. If there is no IP header, use TNQDR.dscp
+ * 1 - Set the DSCP field as TNQDR.dscp
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnqcr, enc_set_dscp, 0x04, 28, 1);
+
+static inline void mlxsw_reg_tnqcr_pack(char *payload)
+{
+ MLXSW_REG_ZERO(tnqcr, payload);
+ mlxsw_reg_tnqcr_enc_set_dscp_set(payload, 0);
+}
+
+/* TNQDR - Tunneling NVE QoS Default Register
+ * ------------------------------------------
+ * The TNQDR register configures the default QoS settings for NVE
+ * encapsulation.
+ */
+#define MLXSW_REG_TNQDR_ID 0xA011
+#define MLXSW_REG_TNQDR_LEN 0x08
+
+MLXSW_REG_DEFINE(tnqdr, MLXSW_REG_TNQDR_ID, MLXSW_REG_TNQDR_LEN);
+
+/* reg_tnqdr_local_port
+ * Local port number (receive port). CPU port is supported.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tnqdr, local_port, 0x00, 16, 8);
+
+/* reg_tnqdr_dscp
+ * For encapsulation, the default DSCP.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnqdr, dscp, 0x04, 0, 6);
+
+static inline void mlxsw_reg_tnqdr_pack(char *payload, u8 local_port)
+{
+ MLXSW_REG_ZERO(tnqdr, payload);
+ mlxsw_reg_tnqdr_local_port_set(payload, local_port);
+ mlxsw_reg_tnqdr_dscp_set(payload, 0);
+}
+
+/* TNEEM - Tunneling NVE Encapsulation ECN Mapping Register
+ * --------------------------------------------------------
+ * The TNEEM register maps ECN of the IP header at the ingress to the
+ * encapsulation to the ECN of the underlay network.
+ */
+#define MLXSW_REG_TNEEM_ID 0xA012
+#define MLXSW_REG_TNEEM_LEN 0x0C
+
+MLXSW_REG_DEFINE(tneem, MLXSW_REG_TNEEM_ID, MLXSW_REG_TNEEM_LEN);
+
+/* reg_tneem_overlay_ecn
+ * ECN of the IP header in the overlay network.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tneem, overlay_ecn, 0x04, 24, 2);
+
+/* reg_tneem_underlay_ecn
+ * ECN of the IP header in the underlay network.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tneem, underlay_ecn, 0x04, 16, 2);
+
+static inline void mlxsw_reg_tneem_pack(char *payload, u8 overlay_ecn,
+ u8 underlay_ecn)
+{
+ MLXSW_REG_ZERO(tneem, payload);
+ mlxsw_reg_tneem_overlay_ecn_set(payload, overlay_ecn);
+ mlxsw_reg_tneem_underlay_ecn_set(payload, underlay_ecn);
+}
+
+/* TNDEM - Tunneling NVE Decapsulation ECN Mapping Register
+ * --------------------------------------------------------
+ * The TNDEM register configures the actions that are done in the
+ * decapsulation.
+ */
+#define MLXSW_REG_TNDEM_ID 0xA013
+#define MLXSW_REG_TNDEM_LEN 0x0C
+
+MLXSW_REG_DEFINE(tndem, MLXSW_REG_TNDEM_ID, MLXSW_REG_TNDEM_LEN);
+
+/* reg_tndem_underlay_ecn
+ * ECN field of the IP header in the underlay network.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tndem, underlay_ecn, 0x04, 24, 2);
+
+/* reg_tndem_overlay_ecn
+ * ECN field of the IP header in the overlay network.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tndem, overlay_ecn, 0x04, 16, 2);
+
+/* reg_tndem_eip_ecn
+ * Egress IP ECN. ECN field of the IP header of the packet which goes out
+ * from the decapsulation.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tndem, eip_ecn, 0x04, 8, 2);
+
+/* reg_tndem_trap_en
+ * Trap enable:
+ * 0 - No trap due to decap ECN
+ * 1 - Trap enable with trap_id
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tndem, trap_en, 0x08, 28, 4);
+
+/* reg_tndem_trap_id
+ * Trap ID. Either DECAP_ECN0 or DECAP_ECN1.
+ * Reserved when trap_en is '0'.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tndem, trap_id, 0x08, 0, 9);
+
+static inline void mlxsw_reg_tndem_pack(char *payload, u8 underlay_ecn,
+ u8 overlay_ecn, u8 ecn, bool trap_en,
+ u16 trap_id)
+{
+ MLXSW_REG_ZERO(tndem, payload);
+ mlxsw_reg_tndem_underlay_ecn_set(payload, underlay_ecn);
+ mlxsw_reg_tndem_overlay_ecn_set(payload, overlay_ecn);
+ mlxsw_reg_tndem_eip_ecn_set(payload, ecn);
+ mlxsw_reg_tndem_trap_en_set(payload, trap_en);
+ mlxsw_reg_tndem_trap_id_set(payload, trap_id);
+}
+
+/* TNPC - Tunnel Port Configuration Register
+ * -----------------------------------------
+ * The TNPC register is used for tunnel port configuration.
+ * Reserved when Spectrum.
+ */
+#define MLXSW_REG_TNPC_ID 0xA020
+#define MLXSW_REG_TNPC_LEN 0x18
+
+MLXSW_REG_DEFINE(tnpc, MLXSW_REG_TNPC_ID, MLXSW_REG_TNPC_LEN);
+
+enum mlxsw_reg_tnpc_tunnel_port {
+ MLXSW_REG_TNPC_TUNNEL_PORT_NVE,
+ MLXSW_REG_TNPC_TUNNEL_PORT_VPLS,
+ MLXSW_REG_TNPC_TUNNEL_FLEX_TUNNEL0,
+ MLXSW_REG_TNPC_TUNNEL_FLEX_TUNNEL1,
+};
+
+/* reg_tnpc_tunnel_port
+ * Tunnel port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, tnpc, tunnel_port, 0x00, 0, 4);
+
+/* reg_tnpc_learn_enable_v6
+ * During IPv6 underlay decapsulation, whether to learn from tunnel port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnpc, learn_enable_v6, 0x04, 1, 1);
+
+/* reg_tnpc_learn_enable_v4
+ * During IPv4 underlay decapsulation, whether to learn from tunnel port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, tnpc, learn_enable_v4, 0x04, 0, 1);
+
+static inline void mlxsw_reg_tnpc_pack(char *payload,
+ enum mlxsw_reg_tnpc_tunnel_port tport,
+ bool learn_enable)
+{
+ MLXSW_REG_ZERO(tnpc, payload);
+ mlxsw_reg_tnpc_tunnel_port_set(payload, tport);
+ mlxsw_reg_tnpc_learn_enable_v4_set(payload, learn_enable);
+ mlxsw_reg_tnpc_learn_enable_v6_set(payload, learn_enable);
+}
+
/* TIGCR - Tunneling IPinIP General Configuration Register
* -------------------------------------------------------
* The TIGCR register is used for setting up the IPinIP Tunnel configuration.
@@ -8336,8 +8907,15 @@ MLXSW_ITEM32(reg, sbpr, dir, 0x00, 24, 2);
*/
MLXSW_ITEM32(reg, sbpr, pool, 0x00, 0, 4);
+/* reg_sbpr_infi_size
+ * Size is infinite.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbpr, infi_size, 0x04, 31, 1);
+
/* reg_sbpr_size
* Pool size in buffer cells.
+ * Reserved when infi_size = 1.
* Access: RW
*/
MLXSW_ITEM32(reg, sbpr, size, 0x04, 0, 24);
@@ -8355,13 +8933,15 @@ MLXSW_ITEM32(reg, sbpr, mode, 0x08, 0, 4);
static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool,
enum mlxsw_reg_sbxx_dir dir,
- enum mlxsw_reg_sbpr_mode mode, u32 size)
+ enum mlxsw_reg_sbpr_mode mode, u32 size,
+ bool infi_size)
{
MLXSW_REG_ZERO(sbpr, payload);
mlxsw_reg_sbpr_pool_set(payload, pool);
mlxsw_reg_sbpr_dir_set(payload, dir);
mlxsw_reg_sbpr_mode_set(payload, mode);
mlxsw_reg_sbpr_size_set(payload, size);
+ mlxsw_reg_sbpr_infi_size_set(payload, infi_size);
}
/* SBCM - Shared Buffer Class Management Register
@@ -8409,6 +8989,12 @@ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24);
#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1
#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14
+/* reg_sbcm_infi_max
+ * Max buffer is infinite.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, sbcm, infi_max, 0x1C, 31, 1);
+
/* reg_sbcm_max_buff
* When the pool associated to the port-pg/tclass is configured to
* static, Maximum buffer size for the limiter configured in cells.
@@ -8418,6 +9004,7 @@ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24);
* 0: 0
* i: (1/128)*2^(i-1), for i=1..14
* 0xFF: Infinity
+ * Reserved when infi_max = 1.
* Access: RW
*/
MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24);
@@ -8430,7 +9017,8 @@ MLXSW_ITEM32(reg, sbcm, pool, 0x24, 0, 4);
static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff,
enum mlxsw_reg_sbxx_dir dir,
- u32 min_buff, u32 max_buff, u8 pool)
+ u32 min_buff, u32 max_buff,
+ bool infi_max, u8 pool)
{
MLXSW_REG_ZERO(sbcm, payload);
mlxsw_reg_sbcm_local_port_set(payload, local_port);
@@ -8438,6 +9026,7 @@ static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff,
mlxsw_reg_sbcm_dir_set(payload, dir);
mlxsw_reg_sbcm_min_buff_set(payload, min_buff);
mlxsw_reg_sbcm_max_buff_set(payload, max_buff);
+ mlxsw_reg_sbcm_infi_max_set(payload, infi_max);
mlxsw_reg_sbcm_pool_set(payload, pool);
}
@@ -8810,6 +9399,14 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(mcc),
MLXSW_REG(mcda),
MLXSW_REG(mgpc),
+ MLXSW_REG(mprs),
+ MLXSW_REG(tngcr),
+ MLXSW_REG(tnumt),
+ MLXSW_REG(tnqcr),
+ MLXSW_REG(tnqdr),
+ MLXSW_REG(tneem),
+ MLXSW_REG(tndem),
+ MLXSW_REG(tnpc),
MLXSW_REG(tigcr),
MLXSW_REG(sbpr),
MLXSW_REG(sbcm),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
index 79a31de7c825..99b341539870 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/resources.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -46,6 +46,8 @@ enum mlxsw_res_id {
MLXSW_RES_ID_MAX_RIFS,
MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES,
MLXSW_RES_ID_MAX_LPM_TREES,
+ MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4,
+ MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6,
/* Internal resources.
* Determined by the SW, not queried from the HW.
@@ -96,6 +98,8 @@ static u16 mlxsw_res_ids[] = {
[MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
[MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10,
[MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30,
+ [MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4] = 0x2E02,
+ [MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6] = 0x2E03,
};
struct mlxsw_res {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 30bb2c533cec..8a4983adae94 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -21,6 +21,7 @@
#include <linux/dcbnl.h>
#include <linux/inetdevice.h>
#include <linux/netlink.h>
+#include <linux/random.h>
#include <net/switchdev.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_mirred.h>
@@ -331,7 +332,10 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
return -EINVAL;
}
if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
- MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor))
+ MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) &&
+ (rev->minor > req_rev->minor ||
+ (rev->minor == req_rev->minor &&
+ rev->subminor >= req_rev->subminor)))
return 0;
dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
@@ -2804,6 +2808,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
MLXSW_REG_QEEC_MAS_DIS);
if (err)
return err;
+
+ err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HIERARCY_TC,
+ i + 8, i,
+ MLXSW_REG_QEEC_MAS_DIS);
+ if (err)
+ return err;
}
/* Map all priorities to traffic class 0. */
@@ -2983,6 +2994,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
goto err_port_qdiscs_init;
}
+ err = mlxsw_sp_port_nve_init(mlxsw_sp_port);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n",
+ mlxsw_sp_port->local_port);
+ goto err_port_nve_init;
+ }
+
mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1);
if (IS_ERR(mlxsw_sp_port_vlan)) {
dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n",
@@ -3011,6 +3029,8 @@ err_register_netdev:
mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan);
err_port_vlan_get:
+ mlxsw_sp_port_nve_fini(mlxsw_sp_port);
+err_port_nve_init:
mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
err_port_qdiscs_init:
mlxsw_sp_port_fids_fini(mlxsw_sp_port);
@@ -3050,6 +3070,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
mlxsw_sp->ports[local_port] = NULL;
mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
mlxsw_sp_port_vlan_flush(mlxsw_sp_port);
+ mlxsw_sp_port_nve_fini(mlxsw_sp_port);
mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port);
mlxsw_sp_port_fids_fini(mlxsw_sp_port);
mlxsw_sp_port_dcb_fini(mlxsw_sp_port);
@@ -3459,6 +3480,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
/* PKT Sample trap */
@@ -3472,6 +3494,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false),
MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
+ /* NVE traps */
+ MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false),
};
static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
@@ -3656,8 +3680,10 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
{
char slcr_pl[MLXSW_REG_SLCR_LEN];
+ u32 seed;
int err;
+ get_random_bytes(&seed, sizeof(seed));
mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC |
MLXSW_REG_SLCR_LAG_HASH_DMAC |
MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE |
@@ -3666,7 +3692,7 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
MLXSW_REG_SLCR_LAG_HASH_DIP |
MLXSW_REG_SLCR_LAG_HASH_SPORT |
MLXSW_REG_SLCR_LAG_HASH_DPORT |
- MLXSW_REG_SLCR_LAG_HASH_IPPROTO);
+ MLXSW_REG_SLCR_LAG_HASH_IPPROTO, seed);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl);
if (err)
return err;
@@ -3779,6 +3805,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
goto err_afa_init;
}
+ err = mlxsw_sp_nve_init(mlxsw_sp);
+ if (err) {
+ dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n");
+ goto err_nve_init;
+ }
+
err = mlxsw_sp_router_init(mlxsw_sp);
if (err) {
dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n");
@@ -3825,6 +3857,8 @@ err_acl_init:
err_netdev_notifier:
mlxsw_sp_router_fini(mlxsw_sp);
err_router_init:
+ mlxsw_sp_nve_fini(mlxsw_sp);
+err_nve_init:
mlxsw_sp_afa_fini(mlxsw_sp);
err_afa_init:
mlxsw_sp_counter_pool_fini(mlxsw_sp);
@@ -3857,6 +3891,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops;
mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops;
mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops;
+ mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
}
@@ -3871,6 +3906,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core,
mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops;
mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops;
mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops;
+ mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr;
return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info);
}
@@ -3884,6 +3920,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_acl_fini(mlxsw_sp);
unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb);
mlxsw_sp_router_fini(mlxsw_sp);
+ mlxsw_sp_nve_fini(mlxsw_sp);
mlxsw_sp_afa_fini(mlxsw_sp);
mlxsw_sp_counter_pool_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
@@ -4550,6 +4587,41 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port)
mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false);
}
+static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev)
+{
+ unsigned int num_vxlans = 0;
+ struct net_device *dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(br_dev, dev, iter) {
+ if (netif_is_vxlan(dev))
+ num_vxlans++;
+ }
+
+ return num_vxlans > 1;
+}
+
+static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev,
+ struct netlink_ext_ack *extack)
+{
+ if (br_multicast_enabled(br_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device");
+ return false;
+ }
+
+ if (br_vlan_enabled(br_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "VLAN filtering can not be enabled on a bridge with a VxLAN device");
+ return false;
+ }
+
+ if (mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge");
+ return false;
+ }
+
+ return true;
+}
+
static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
struct net_device *dev,
unsigned long event, void *ptr)
@@ -4579,6 +4651,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev,
}
if (!info->linking)
break;
+ if (netif_is_bridge_master(upper_dev) &&
+ !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+ mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+ !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+ return -EOPNOTSUPP;
if (netdev_has_any_upper_dev(upper_dev) &&
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4736,6 +4813,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev,
}
if (!info->linking)
break;
+ if (netif_is_bridge_master(upper_dev) &&
+ !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) &&
+ mlxsw_sp_bridge_has_vxlan(upper_dev) &&
+ !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+ return -EOPNOTSUPP;
if (netdev_has_any_upper_dev(upper_dev) &&
(!netif_is_bridge_master(upper_dev) ||
!mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp,
@@ -4882,6 +4964,63 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr)
return netif_is_l3_master(info->upper_dev);
}
+static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp,
+ struct net_device *dev,
+ unsigned long event, void *ptr)
+{
+ struct netdev_notifier_changeupper_info *cu_info;
+ struct netdev_notifier_info *info = ptr;
+ struct netlink_ext_ack *extack;
+ struct net_device *upper_dev;
+
+ extack = netdev_notifier_info_to_extack(info);
+
+ switch (event) {
+ case NETDEV_CHANGEUPPER:
+ cu_info = container_of(info,
+ struct netdev_notifier_changeupper_info,
+ info);
+ upper_dev = cu_info->upper_dev;
+ if (!netif_is_bridge_master(upper_dev))
+ return 0;
+ if (!mlxsw_sp_lower_get(upper_dev))
+ return 0;
+ if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack))
+ return -EOPNOTSUPP;
+ if (cu_info->linking) {
+ if (!netif_running(dev))
+ return 0;
+ return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev,
+ dev, extack);
+ } else {
+ mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
+ }
+ break;
+ case NETDEV_PRE_UP:
+ upper_dev = netdev_master_upper_dev_get(dev);
+ if (!upper_dev)
+ return 0;
+ if (!netif_is_bridge_master(upper_dev))
+ return 0;
+ if (!mlxsw_sp_lower_get(upper_dev))
+ return 0;
+ return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev,
+ extack);
+ case NETDEV_DOWN:
+ upper_dev = netdev_master_upper_dev_get(dev);
+ if (!upper_dev)
+ return 0;
+ if (!netif_is_bridge_master(upper_dev))
+ return 0;
+ if (!mlxsw_sp_lower_get(upper_dev))
+ return 0;
+ mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev);
+ break;
+ }
+
+ return 0;
+}
+
static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
@@ -4898,6 +5037,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb,
}
mlxsw_sp_span_respin(mlxsw_sp);
+ if (netif_is_vxlan(dev))
+ err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr);
if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev))
err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev,
event, ptr);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 3cdb7aca90b7..0875a79cbe7b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -16,6 +16,7 @@
#include <net/psample.h>
#include <net/pkt_cls.h>
#include <net/red.h>
+#include <net/vxlan.h>
#include "port.h"
#include "core.h"
@@ -55,6 +56,8 @@ enum mlxsw_sp_resource_id {
struct mlxsw_sp_port;
struct mlxsw_sp_rif;
struct mlxsw_sp_span_entry;
+enum mlxsw_sp_l3proto;
+union mlxsw_sp_l3addr;
struct mlxsw_sp_upper {
struct net_device *dev;
@@ -113,9 +116,11 @@ struct mlxsw_sp_acl;
struct mlxsw_sp_counter_pool;
struct mlxsw_sp_fid_core;
struct mlxsw_sp_kvdl;
+struct mlxsw_sp_nve;
struct mlxsw_sp_kvdl_ops;
struct mlxsw_sp_mr_tcam_ops;
struct mlxsw_sp_acl_tcam_ops;
+struct mlxsw_sp_nve_ops;
struct mlxsw_sp {
struct mlxsw_sp_port **ports;
@@ -132,6 +137,7 @@ struct mlxsw_sp {
struct mlxsw_sp_acl *acl;
struct mlxsw_sp_fid_core *fid_core;
struct mlxsw_sp_kvdl *kvdl;
+ struct mlxsw_sp_nve *nve;
struct notifier_block netdevice_nb;
struct mlxsw_sp_counter_pool *counter_pool;
@@ -146,6 +152,7 @@ struct mlxsw_sp {
const struct mlxsw_afk_ops *afk_ops;
const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops;
const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
+ const struct mlxsw_sp_nve_ops **nve_ops_arr;
};
static inline struct mlxsw_sp_upper *
@@ -235,6 +242,25 @@ struct mlxsw_sp_port {
struct mlxsw_sp_acl_block *eg_acl_block;
};
+static inline struct net_device *
+mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev)
+{
+ struct net_device *dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(br_dev, dev, iter) {
+ if (netif_is_vxlan(dev))
+ return dev;
+ }
+
+ return NULL;
+}
+
+static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev)
+{
+ return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev);
+}
+
static inline bool
mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port)
{
@@ -330,6 +356,13 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
struct net_device *br_dev);
bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *br_dev);
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack);
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ const struct net_device *vxlan_dev);
/* spectrum.c */
int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -383,6 +416,17 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port)
#endif
/* spectrum_router.c */
+enum mlxsw_sp_l3proto {
+ MLXSW_SP_L3_PROTO_IPV4,
+ MLXSW_SP_L3_PROTO_IPV6,
+#define MLXSW_SP_L3_PROTO_MAX (MLXSW_SP_L3_PROTO_IPV6 + 1)
+};
+
+union mlxsw_sp_l3addr {
+ __be32 addr4;
+ struct in6_addr addr6;
+};
+
int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp);
int mlxsw_sp_netdevice_router_port_event(struct net_device *dev);
@@ -416,6 +460,19 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan);
void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
struct net_device *dev);
+struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *dev);
+u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
+struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+ enum mlxsw_sp_l3proto ul_proto,
+ const union mlxsw_sp_l3addr *ul_sip,
+ u32 tunnel_index);
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+ enum mlxsw_sp_l3proto ul_proto,
+ const union mlxsw_sp_l3addr *ul_sip);
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+ u16 *vr_id);
/* spectrum_kvdl.c */
enum mlxsw_sp_kvdl_entry_type {
@@ -423,6 +480,7 @@ enum mlxsw_sp_kvdl_entry_type {
MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET,
MLXSW_SP_KVDL_ENTRY_TYPE_PBS,
MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR,
+ MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT,
};
static inline unsigned int
@@ -433,6 +491,7 @@ mlxsw_sp_kvdl_entry_size(enum mlxsw_sp_kvdl_entry_type type)
case MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET: /* fall through */
case MLXSW_SP_KVDL_ENTRY_TYPE_PBS: /* fall through */
case MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR: /* fall through */
+ case MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT: /* fall through */
default:
return 1;
}
@@ -662,6 +721,16 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p);
/* spectrum_fid.c */
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+ __be32 vni);
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni);
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+ u32 nve_flood_index);
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid);
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni);
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid);
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid);
int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid,
enum mlxsw_sp_flood_type packet_type, u8 local_port,
bool member);
@@ -680,6 +749,8 @@ u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid);
struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid);
struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
int br_ifindex);
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+ int br_ifindex);
struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp);
@@ -725,4 +796,39 @@ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops;
/* spectrum2_mr_tcam.c */
extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops;
+/* spectrum_nve.c */
+enum mlxsw_sp_nve_type {
+ MLXSW_SP_NVE_TYPE_VXLAN,
+};
+
+struct mlxsw_sp_nve_params {
+ enum mlxsw_sp_nve_type type;
+ __be32 vni;
+ const struct net_device *dev;
+};
+
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[];
+extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[];
+
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr);
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr);
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp);
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+ u32 tb_id, __be32 addr);
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_nve_params *params,
+ struct netlink_ext_ack *extack);
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid);
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp);
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp);
+
#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c
index 68c8b148bef2..8d14770766b4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c
@@ -35,6 +35,7 @@ static const struct mlxsw_sp2_kvdl_part_info mlxsw_sp2_kvdl_parts_info[] = {
MAX_KVD_ACTION_SETS),
MLXSW_SP2_KVDL_PART_INFO(PBS, 0x24, KVD_SIZE, KVD_SIZE),
MLXSW_SP2_KVDL_PART_INFO(MCRIGR, 0x26, KVD_SIZE, KVD_SIZE),
+ MLXSW_SP2_KVDL_PART_INFO(TNUMT, 0x29, KVD_SIZE, KVD_SIZE),
};
#define MLXSW_SP2_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp2_kvdl_parts_info)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 3589432d1643..12c61e0cc570 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -25,28 +25,52 @@ struct mlxsw_cp_sb_occ {
struct mlxsw_sp_sb_cm {
u32 min_buff;
u32 max_buff;
- u8 pool;
+ u16 pool_index;
struct mlxsw_cp_sb_occ occ;
};
+#define MLXSW_SP_SB_INFI -1U
+
struct mlxsw_sp_sb_pm {
u32 min_buff;
u32 max_buff;
struct mlxsw_cp_sb_occ occ;
};
-#define MLXSW_SP_SB_POOL_COUNT 4
-#define MLXSW_SP_SB_TC_COUNT 8
+struct mlxsw_sp_sb_pool_des {
+ enum mlxsw_reg_sbxx_dir dir;
+ u8 pool;
+};
+
+/* Order ingress pools before egress pools. */
+static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = {
+ {MLXSW_REG_SBXX_DIR_INGRESS, 0},
+ {MLXSW_REG_SBXX_DIR_INGRESS, 1},
+ {MLXSW_REG_SBXX_DIR_INGRESS, 2},
+ {MLXSW_REG_SBXX_DIR_INGRESS, 3},
+ {MLXSW_REG_SBXX_DIR_EGRESS, 0},
+ {MLXSW_REG_SBXX_DIR_EGRESS, 1},
+ {MLXSW_REG_SBXX_DIR_EGRESS, 2},
+ {MLXSW_REG_SBXX_DIR_EGRESS, 3},
+ {MLXSW_REG_SBXX_DIR_EGRESS, 15},
+};
+
+#define MLXSW_SP_SB_POOL_DESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pool_dess)
+
+#define MLXSW_SP_SB_ING_TC_COUNT 8
+#define MLXSW_SP_SB_EG_TC_COUNT 16
struct mlxsw_sp_sb_port {
- struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT];
- struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT];
+ struct mlxsw_sp_sb_cm ing_cms[MLXSW_SP_SB_ING_TC_COUNT];
+ struct mlxsw_sp_sb_cm eg_cms[MLXSW_SP_SB_EG_TC_COUNT];
+ struct mlxsw_sp_sb_pm pms[MLXSW_SP_SB_POOL_DESS_LEN];
};
struct mlxsw_sp_sb {
- struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT];
+ struct mlxsw_sp_sb_pr prs[MLXSW_SP_SB_POOL_DESS_LEN];
struct mlxsw_sp_sb_port *ports;
u32 cell_size;
+ u64 sb_size;
};
u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells)
@@ -60,95 +84,122 @@ u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes)
}
static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp,
- u8 pool,
- enum mlxsw_reg_sbxx_dir dir)
+ u16 pool_index)
{
- return &mlxsw_sp->sb->prs[dir][pool];
+ return &mlxsw_sp->sb->prs[pool_index];
+}
+
+static bool mlxsw_sp_sb_cm_exists(u8 pg_buff, enum mlxsw_reg_sbxx_dir dir)
+{
+ if (dir == MLXSW_REG_SBXX_DIR_INGRESS)
+ return pg_buff < MLXSW_SP_SB_ING_TC_COUNT;
+ else
+ return pg_buff < MLXSW_SP_SB_EG_TC_COUNT;
}
static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp,
u8 local_port, u8 pg_buff,
enum mlxsw_reg_sbxx_dir dir)
{
- return &mlxsw_sp->sb->ports[local_port].cms[dir][pg_buff];
+ struct mlxsw_sp_sb_port *sb_port = &mlxsw_sp->sb->ports[local_port];
+
+ WARN_ON(!mlxsw_sp_sb_cm_exists(pg_buff, dir));
+ if (dir == MLXSW_REG_SBXX_DIR_INGRESS)
+ return &sb_port->ing_cms[pg_buff];
+ else
+ return &sb_port->eg_cms[pg_buff];
}
static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp,
- u8 local_port, u8 pool,
- enum mlxsw_reg_sbxx_dir dir)
+ u8 local_port, u16 pool_index)
{
- return &mlxsw_sp->sb->ports[local_port].pms[dir][pool];
+ return &mlxsw_sp->sb->ports[local_port].pms[pool_index];
}
-static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool,
- enum mlxsw_reg_sbxx_dir dir,
- enum mlxsw_reg_sbpr_mode mode, u32 size)
+static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
+ enum mlxsw_reg_sbpr_mode mode,
+ u32 size, bool infi_size)
{
+ const struct mlxsw_sp_sb_pool_des *des =
+ &mlxsw_sp_sb_pool_dess[pool_index];
char sbpr_pl[MLXSW_REG_SBPR_LEN];
struct mlxsw_sp_sb_pr *pr;
int err;
- mlxsw_reg_sbpr_pack(sbpr_pl, pool, dir, mode, size);
+ mlxsw_reg_sbpr_pack(sbpr_pl, des->pool, des->dir, mode,
+ size, infi_size);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl);
if (err)
return err;
- pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+ if (infi_size)
+ size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp->sb->sb_size);
+ pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
pr->mode = mode;
pr->size = size;
return 0;
}
static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port,
- u8 pg_buff, enum mlxsw_reg_sbxx_dir dir,
- u32 min_buff, u32 max_buff, u8 pool)
+ u8 pg_buff, u32 min_buff, u32 max_buff,
+ bool infi_max, u16 pool_index)
{
+ const struct mlxsw_sp_sb_pool_des *des =
+ &mlxsw_sp_sb_pool_dess[pool_index];
char sbcm_pl[MLXSW_REG_SBCM_LEN];
+ struct mlxsw_sp_sb_cm *cm;
int err;
- mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, dir,
- min_buff, max_buff, pool);
+ mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, des->dir,
+ min_buff, max_buff, infi_max, des->pool);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl);
if (err)
return err;
- if (pg_buff < MLXSW_SP_SB_TC_COUNT) {
- struct mlxsw_sp_sb_cm *cm;
- cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir);
+ if (mlxsw_sp_sb_cm_exists(pg_buff, des->dir)) {
+ if (infi_max)
+ max_buff = mlxsw_sp_bytes_cells(mlxsw_sp,
+ mlxsw_sp->sb->sb_size);
+
+ cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff,
+ des->dir);
cm->min_buff = min_buff;
cm->max_buff = max_buff;
- cm->pool = pool;
+ cm->pool_index = pool_index;
}
return 0;
}
static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port,
- u8 pool, enum mlxsw_reg_sbxx_dir dir,
- u32 min_buff, u32 max_buff)
+ u16 pool_index, u32 min_buff, u32 max_buff)
{
+ const struct mlxsw_sp_sb_pool_des *des =
+ &mlxsw_sp_sb_pool_dess[pool_index];
char sbpm_pl[MLXSW_REG_SBPM_LEN];
struct mlxsw_sp_sb_pm *pm;
int err;
- mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false,
+ mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, false,
min_buff, max_buff);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl);
if (err)
return err;
- pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir);
+ pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index);
pm->min_buff = min_buff;
pm->max_buff = max_buff;
return 0;
}
static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port,
- u8 pool, enum mlxsw_reg_sbxx_dir dir,
- struct list_head *bulk_list)
+ u16 pool_index, struct list_head *bulk_list)
{
+ const struct mlxsw_sp_sb_pool_des *des =
+ &mlxsw_sp_sb_pool_dess[pool_index];
char sbpm_pl[MLXSW_REG_SBPM_LEN];
- mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, true, 0, 0);
+ mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir,
+ true, 0, 0);
return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl,
bulk_list, NULL, 0);
}
@@ -163,14 +214,16 @@ static void mlxsw_sp_sb_pm_occ_query_cb(struct mlxsw_core *mlxsw_core,
}
static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port,
- u8 pool, enum mlxsw_reg_sbxx_dir dir,
- struct list_head *bulk_list)
+ u16 pool_index, struct list_head *bulk_list)
{
+ const struct mlxsw_sp_sb_pool_des *des =
+ &mlxsw_sp_sb_pool_dess[pool_index];
char sbpm_pl[MLXSW_REG_SBPM_LEN];
struct mlxsw_sp_sb_pm *pm;
- pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir);
- mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, 0, 0);
+ pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index);
+ mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir,
+ false, 0, 0);
return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl,
bulk_list,
mlxsw_sp_sb_pm_occ_query_cb,
@@ -254,63 +307,54 @@ static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp)
.size = _size, \
}
-static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = {
+static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs[] = {
+ /* Ingress pools. */
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
MLXSW_SP_SB_PR_INGRESS_SIZE),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC,
MLXSW_SP_SB_PR_INGRESS_MNG_SIZE),
-};
-
-#define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress)
-
-static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = {
+ /* Egress pools. */
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0),
+ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI),
};
-#define MLXSW_SP_SB_PRS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_egress)
+#define MLXSW_SP_SB_PRS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs)
-static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
- enum mlxsw_reg_sbxx_dir dir,
- const struct mlxsw_sp_sb_pr *prs,
- size_t prs_len)
+static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_sb_pr *prs,
+ size_t prs_len)
{
int i;
int err;
for (i = 0; i < prs_len; i++) {
- u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size);
-
- err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, prs[i].mode, size);
+ u32 size = prs[i].size;
+ u32 size_cells;
+
+ if (size == MLXSW_SP_SB_INFI) {
+ err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
+ 0, true);
+ } else {
+ size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size);
+ err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode,
+ size_cells, false);
+ }
if (err)
return err;
}
return 0;
}
-static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp)
-{
- int err;
-
- err = __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS,
- mlxsw_sp_sb_prs_ingress,
- MLXSW_SP_SB_PRS_INGRESS_LEN);
- if (err)
- return err;
- return __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS,
- mlxsw_sp_sb_prs_egress,
- MLXSW_SP_SB_PRS_EGRESS_LEN);
-}
-
#define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool) \
{ \
.min_buff = _min_buff, \
.max_buff = _max_buff, \
- .pool = _pool, \
+ .pool_index = _pool, \
}
static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = {
@@ -329,38 +373,38 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = {
#define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress)
static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = {
- MLXSW_SP_SB_CM(1500, 9, 0),
- MLXSW_SP_SB_CM(1500, 9, 0),
- MLXSW_SP_SB_CM(1500, 9, 0),
- MLXSW_SP_SB_CM(1500, 9, 0),
- MLXSW_SP_SB_CM(1500, 9, 0),
- MLXSW_SP_SB_CM(1500, 9, 0),
- MLXSW_SP_SB_CM(1500, 9, 0),
- MLXSW_SP_SB_CM(1500, 9, 0),
- MLXSW_SP_SB_CM(0, 140000, 15),
- MLXSW_SP_SB_CM(0, 140000, 15),
- MLXSW_SP_SB_CM(0, 140000, 15),
- MLXSW_SP_SB_CM(0, 140000, 15),
- MLXSW_SP_SB_CM(0, 140000, 15),
- MLXSW_SP_SB_CM(0, 140000, 15),
- MLXSW_SP_SB_CM(0, 140000, 15),
- MLXSW_SP_SB_CM(0, 140000, 15),
- MLXSW_SP_SB_CM(1, 0xff, 0),
+ MLXSW_SP_SB_CM(1500, 9, 4),
+ MLXSW_SP_SB_CM(1500, 9, 4),
+ MLXSW_SP_SB_CM(1500, 9, 4),
+ MLXSW_SP_SB_CM(1500, 9, 4),
+ MLXSW_SP_SB_CM(1500, 9, 4),
+ MLXSW_SP_SB_CM(1500, 9, 4),
+ MLXSW_SP_SB_CM(1500, 9, 4),
+ MLXSW_SP_SB_CM(1500, 9, 4),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8),
+ MLXSW_SP_SB_CM(1, 0xff, 4),
};
#define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress)
-#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 0)
+#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 4)
static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
MLXSW_SP_CPU_PORT_SB_CM,
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
MLXSW_SP_CPU_PORT_SB_CM,
- MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0),
+ MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4),
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
MLXSW_SP_CPU_PORT_SB_CM,
@@ -390,6 +434,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
#define MLXSW_SP_CPU_PORT_SB_MCS_LEN \
ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms)
+static bool
+mlxsw_sp_sb_pool_is_static(struct mlxsw_sp *mlxsw_sp, u16 pool_index)
+{
+ struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
+
+ return pr->mode == MLXSW_REG_SBPR_MODE_STATIC;
+}
+
static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
enum mlxsw_reg_sbxx_dir dir,
const struct mlxsw_sp_sb_cm *cms,
@@ -401,16 +453,29 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
for (i = 0; i < cms_len; i++) {
const struct mlxsw_sp_sb_cm *cm;
u32 min_buff;
+ u32 max_buff;
if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS)
continue; /* PG number 8 does not exist, skip it */
cm = &cms[i];
- /* All pools are initialized using dynamic thresholds,
- * therefore 'max_buff' isn't specified in cells.
- */
+ if (WARN_ON(mlxsw_sp_sb_pool_dess[cm->pool_index].dir != dir))
+ continue;
+
min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff);
- err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir,
- min_buff, cm->max_buff, cm->pool);
+ max_buff = cm->max_buff;
+ if (max_buff == MLXSW_SP_SB_INFI) {
+ err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i,
+ min_buff, 0,
+ true, cm->pool_index);
+ } else {
+ if (mlxsw_sp_sb_pool_is_static(mlxsw_sp,
+ cm->pool_index))
+ max_buff = mlxsw_sp_bytes_cells(mlxsw_sp,
+ max_buff);
+ err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i,
+ min_buff, max_buff,
+ false, cm->pool_index);
+ }
if (err)
return err;
}
@@ -448,91 +513,74 @@ static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp)
.max_buff = _max_buff, \
}
-static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_ingress[] = {
+static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = {
+ /* Ingress pools. */
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX),
-};
-
-#define MLXSW_SP_SB_PMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_ingress)
-
-static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_egress[] = {
+ /* Egress pools. */
MLXSW_SP_SB_PM(0, 7),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN),
+ MLXSW_SP_SB_PM(10000, 90000),
};
-#define MLXSW_SP_SB_PMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_egress)
+#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms)
-static int __mlxsw_sp_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port,
- enum mlxsw_reg_sbxx_dir dir,
- const struct mlxsw_sp_sb_pm *pms,
- size_t pms_len)
+static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
int i;
int err;
- for (i = 0; i < pms_len; i++) {
- const struct mlxsw_sp_sb_pm *pm;
+ for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) {
+ const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp_sb_pms[i];
+ u32 max_buff;
+ u32 min_buff;
- pm = &pms[i];
- err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, dir,
- pm->min_buff, pm->max_buff);
+ min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, pm->min_buff);
+ max_buff = pm->max_buff;
+ if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, i))
+ max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, max_buff);
+ err = mlxsw_sp_sb_pm_write(mlxsw_sp, mlxsw_sp_port->local_port,
+ i, min_buff, max_buff);
if (err)
return err;
}
return 0;
}
-static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port)
-{
- int err;
-
- err = __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp,
- mlxsw_sp_port->local_port,
- MLXSW_REG_SBXX_DIR_INGRESS,
- mlxsw_sp_sb_pms_ingress,
- MLXSW_SP_SB_PMS_INGRESS_LEN);
- if (err)
- return err;
- return __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp,
- mlxsw_sp_port->local_port,
- MLXSW_REG_SBXX_DIR_EGRESS,
- mlxsw_sp_sb_pms_egress,
- MLXSW_SP_SB_PMS_EGRESS_LEN);
-}
-
struct mlxsw_sp_sb_mm {
u32 min_buff;
u32 max_buff;
- u8 pool;
+ u16 pool_index;
};
#define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \
{ \
.min_buff = _min_buff, \
.max_buff = _max_buff, \
- .pool = _pool, \
+ .pool_index = _pool, \
}
static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = {
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
- MLXSW_SP_SB_MM(20000, 0xff, 0),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
+ MLXSW_SP_SB_MM(0, 6, 4),
};
#define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms)
@@ -544,16 +592,18 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp)
int err;
for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) {
+ const struct mlxsw_sp_sb_pool_des *des;
const struct mlxsw_sp_sb_mm *mc;
u32 min_buff;
mc = &mlxsw_sp_sb_mms[i];
- /* All pools are initialized using dynamic thresholds,
- * therefore 'max_buff' isn't specified in cells.
+ des = &mlxsw_sp_sb_pool_dess[mc->pool_index];
+ /* All pools used by sb_mm's are initialized using dynamic
+ * thresholds, therefore 'max_buff' isn't specified in cells.
*/
min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff);
mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff,
- mc->pool);
+ des->pool);
err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl);
if (err)
return err;
@@ -561,9 +611,24 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp)
return 0;
}
+static void mlxsw_sp_pool_count(u16 *p_ingress_len, u16 *p_egress_len)
+{
+ int i;
+
+ for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; ++i)
+ if (mlxsw_sp_sb_pool_dess[i].dir == MLXSW_REG_SBXX_DIR_EGRESS)
+ goto out;
+ WARN(1, "No egress pools\n");
+
+out:
+ *p_ingress_len = i;
+ *p_egress_len = MLXSW_SP_SB_POOL_DESS_LEN - i;
+}
+
int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
{
- u64 sb_size;
+ u16 ing_pool_count;
+ u16 eg_pool_count;
int err;
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE))
@@ -571,17 +636,19 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE))
return -EIO;
- sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE);
mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL);
if (!mlxsw_sp->sb)
return -ENOMEM;
mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE);
+ mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+ MAX_BUFFER_SIZE);
err = mlxsw_sp_sb_ports_init(mlxsw_sp);
if (err)
goto err_sb_ports_init;
- err = mlxsw_sp_sb_prs_init(mlxsw_sp);
+ err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp_sb_prs,
+ MLXSW_SP_SB_PRS_LEN);
if (err)
goto err_sb_prs_init;
err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp);
@@ -590,11 +657,13 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp)
err = mlxsw_sp_sb_mms_init(mlxsw_sp);
if (err)
goto err_sb_mms_init;
- err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size,
- MLXSW_SP_SB_POOL_COUNT,
- MLXSW_SP_SB_POOL_COUNT,
- MLXSW_SP_SB_TC_COUNT,
- MLXSW_SP_SB_TC_COUNT);
+ mlxsw_sp_pool_count(&ing_pool_count, &eg_pool_count);
+ err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0,
+ mlxsw_sp->sb->sb_size,
+ ing_pool_count,
+ eg_pool_count,
+ MLXSW_SP_SB_ING_TC_COUNT,
+ MLXSW_SP_SB_EG_TC_COUNT);
if (err)
goto err_devlink_sb_register;
@@ -632,36 +701,15 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port)
return err;
}
-static u8 pool_get(u16 pool_index)
-{
- return pool_index % MLXSW_SP_SB_POOL_COUNT;
-}
-
-static u16 pool_index_get(u8 pool, enum mlxsw_reg_sbxx_dir dir)
-{
- u16 pool_index;
-
- pool_index = pool;
- if (dir == MLXSW_REG_SBXX_DIR_EGRESS)
- pool_index += MLXSW_SP_SB_POOL_COUNT;
- return pool_index;
-}
-
-static enum mlxsw_reg_sbxx_dir dir_get(u16 pool_index)
-{
- return pool_index < MLXSW_SP_SB_POOL_COUNT ?
- MLXSW_REG_SBXX_DIR_INGRESS : MLXSW_REG_SBXX_DIR_EGRESS;
-}
-
int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core,
unsigned int sb_index, u16 pool_index,
struct devlink_sb_pool_info *pool_info)
{
+ enum mlxsw_reg_sbxx_dir dir = mlxsw_sp_sb_pool_dess[pool_index].dir;
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
- u8 pool = pool_get(pool_index);
- enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
- struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+ struct mlxsw_sp_sb_pr *pr;
+ pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
pool_info->pool_type = (enum devlink_sb_pool_type) dir;
pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size);
pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode;
@@ -674,34 +722,32 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
{
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size);
- u8 pool = pool_get(pool_index);
- enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
enum mlxsw_reg_sbpr_mode mode;
if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE))
return -EINVAL;
mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
- return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size);
+ return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode,
+ pool_size, false);
}
#define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */
-static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool,
- enum mlxsw_reg_sbxx_dir dir, u32 max_buff)
+static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
+ u32 max_buff)
{
- struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+ struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC)
return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET;
return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff);
}
-static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool,
- enum mlxsw_reg_sbxx_dir dir, u32 threshold,
- u32 *p_max_buff)
+static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u16 pool_index,
+ u32 threshold, u32 *p_max_buff)
{
- struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir);
+ struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index);
if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) {
int val;
@@ -725,12 +771,10 @@ int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
mlxsw_core_port_driver_priv(mlxsw_core_port);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u8 local_port = mlxsw_sp_port->local_port;
- u8 pool = pool_get(pool_index);
- enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port,
- pool, dir);
+ pool_index);
- *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool, dir,
+ *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool_index,
pm->max_buff);
return 0;
}
@@ -743,17 +787,15 @@ int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port,
mlxsw_core_port_driver_priv(mlxsw_core_port);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u8 local_port = mlxsw_sp_port->local_port;
- u8 pool = pool_get(pool_index);
- enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
u32 max_buff;
int err;
- err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
+ err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index,
threshold, &max_buff);
if (err)
return err;
- return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool, dir,
+ return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool_index,
0, max_buff);
}
@@ -771,9 +813,9 @@ int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port,
struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port,
pg_buff, dir);
- *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir,
+ *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool_index,
cm->max_buff);
- *p_pool_index = pool_index_get(cm->pool, dir);
+ *p_pool_index = cm->pool_index;
return 0;
}
@@ -788,24 +830,24 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port,
u8 local_port = mlxsw_sp_port->local_port;
u8 pg_buff = tc_index;
enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type;
- u8 pool = pool_get(pool_index);
u32 max_buff;
int err;
- if (dir != dir_get(pool_index))
+ if (dir != mlxsw_sp_sb_pool_dess[pool_index].dir)
return -EINVAL;
- err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir,
+ err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index,
threshold, &max_buff);
if (err)
return err;
- return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir,
- 0, max_buff, pool);
+ return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff,
+ 0, max_buff, false, pool_index);
}
#define MASKED_COUNT_MAX \
- (MLXSW_REG_SBSR_REC_MAX_COUNT / (MLXSW_SP_SB_TC_COUNT * 2))
+ (MLXSW_REG_SBSR_REC_MAX_COUNT / \
+ (MLXSW_SP_SB_ING_TC_COUNT + MLXSW_SP_SB_EG_TC_COUNT))
struct mlxsw_sp_sb_sr_occ_query_cb_ctx {
u8 masked_count;
@@ -831,7 +873,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core,
local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
if (!mlxsw_sp->ports[local_port])
continue;
- for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+ for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) {
cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i,
MLXSW_REG_SBXX_DIR_INGRESS);
mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++,
@@ -845,7 +887,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core,
local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
if (!mlxsw_sp->ports[local_port])
continue;
- for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+ for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) {
cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i,
MLXSW_REG_SBXX_DIR_EGRESS);
mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++,
@@ -880,23 +922,17 @@ next_batch:
local_port_1 = local_port;
masked_count = 0;
mlxsw_reg_sbsr_pack(sbsr_pl, false);
- for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+ for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++)
mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
+ for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++)
mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
- }
for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
if (!mlxsw_sp->ports[local_port])
continue;
mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
- for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) {
- err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
- MLXSW_REG_SBXX_DIR_INGRESS,
- &bulk_list);
- if (err)
- goto out;
+ for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) {
err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i,
- MLXSW_REG_SBXX_DIR_EGRESS,
&bulk_list);
if (err)
goto out;
@@ -945,23 +981,17 @@ next_batch:
local_port++;
masked_count = 0;
mlxsw_reg_sbsr_pack(sbsr_pl, true);
- for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) {
+ for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++)
mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1);
+ for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++)
mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1);
- }
for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) {
if (!mlxsw_sp->ports[local_port])
continue;
mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1);
mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1);
- for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) {
- err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
- MLXSW_REG_SBXX_DIR_INGRESS,
- &bulk_list);
- if (err)
- goto out;
+ for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) {
err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i,
- MLXSW_REG_SBXX_DIR_EGRESS,
&bulk_list);
if (err)
goto out;
@@ -994,10 +1024,8 @@ int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port,
mlxsw_core_port_driver_priv(mlxsw_core_port);
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u8 local_port = mlxsw_sp_port->local_port;
- u8 pool = pool_get(pool_index);
- enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index);
struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port,
- pool, dir);
+ pool_index);
*p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur);
*p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
index 715d24ff937e..a3db033d7399 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c
@@ -6,6 +6,7 @@
#include <linux/if_vlan.h>
#include <linux/if_bridge.h>
#include <linux/netdevice.h>
+#include <linux/rhashtable.h>
#include <linux/rtnetlink.h>
#include "spectrum.h"
@@ -14,6 +15,7 @@
struct mlxsw_sp_fid_family;
struct mlxsw_sp_fid_core {
+ struct rhashtable vni_ht;
struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX];
unsigned int *port_fid_mappings;
};
@@ -24,6 +26,12 @@ struct mlxsw_sp_fid {
unsigned int ref_count;
u16 fid_index;
struct mlxsw_sp_fid_family *fid_family;
+
+ struct rhash_head vni_ht_node;
+ __be32 vni;
+ u32 nve_flood_index;
+ u8 vni_valid:1,
+ nve_flood_index_valid:1;
};
struct mlxsw_sp_fid_8021q {
@@ -36,6 +44,12 @@ struct mlxsw_sp_fid_8021d {
int br_ifindex;
};
+static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = {
+ .key_len = sizeof_field(struct mlxsw_sp_fid, vni),
+ .key_offset = offsetof(struct mlxsw_sp_fid, vni),
+ .head_offset = offsetof(struct mlxsw_sp_fid, vni_ht_node),
+};
+
struct mlxsw_sp_flood_table {
enum mlxsw_sp_flood_type packet_type;
enum mlxsw_reg_sfgc_bridge_type bridge_type;
@@ -56,6 +70,11 @@ struct mlxsw_sp_fid_ops {
struct mlxsw_sp_port *port, u16 vid);
void (*port_vid_unmap)(struct mlxsw_sp_fid *fid,
struct mlxsw_sp_port *port, u16 vid);
+ int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni);
+ void (*vni_clear)(struct mlxsw_sp_fid *fid);
+ int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid,
+ u32 nve_flood_index);
+ void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid);
};
struct mlxsw_sp_fid_family {
@@ -94,6 +113,117 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = {
[MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types,
};
+struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp,
+ __be32 vni)
+{
+ struct mlxsw_sp_fid *fid;
+
+ fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni,
+ mlxsw_sp_fid_vni_ht_params);
+ if (fid)
+ fid->ref_count++;
+
+ return fid;
+}
+
+int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni)
+{
+ if (!fid->vni_valid)
+ return -EINVAL;
+
+ *vni = fid->vni;
+
+ return 0;
+}
+
+int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+ u32 nve_flood_index)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+ int err;
+
+ if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid))
+ return -EINVAL;
+
+ err = ops->nve_flood_index_set(fid, nve_flood_index);
+ if (err)
+ return err;
+
+ fid->nve_flood_index = nve_flood_index;
+ fid->nve_flood_index_valid = true;
+
+ return 0;
+}
+
+void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+
+ if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid))
+ return;
+
+ fid->nve_flood_index_valid = false;
+ ops->nve_flood_index_clear(fid);
+}
+
+bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid)
+{
+ return fid->nve_flood_index_valid;
+}
+
+int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+ struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+ int err;
+
+ if (WARN_ON(!ops->vni_set || fid->vni_valid))
+ return -EINVAL;
+
+ fid->vni = vni;
+ err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht,
+ &fid->vni_ht_node,
+ mlxsw_sp_fid_vni_ht_params);
+ if (err)
+ return err;
+
+ err = ops->vni_set(fid, vni);
+ if (err)
+ goto err_vni_set;
+
+ fid->vni_valid = true;
+
+ return 0;
+
+err_vni_set:
+ rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+ mlxsw_sp_fid_vni_ht_params);
+ return err;
+}
+
+void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+ const struct mlxsw_sp_fid_ops *ops = fid_family->ops;
+ struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp;
+
+ if (WARN_ON(!ops->vni_clear || !fid->vni_valid))
+ return;
+
+ fid->vni_valid = false;
+ ops->vni_clear(fid);
+ rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node,
+ mlxsw_sp_fid_vni_ht_params);
+}
+
+bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid)
+{
+ return fid->vni_valid;
+}
+
static const struct mlxsw_sp_flood_table *
mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid,
enum mlxsw_sp_flood_type packet_type)
@@ -217,6 +347,21 @@ static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
}
+static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
+ __be32 vni, bool vni_valid, u32 nve_flood_index,
+ bool nve_flood_index_valid)
+{
+ char sfmr_pl[MLXSW_REG_SFMR_LEN];
+
+ mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index,
+ 0);
+ mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid);
+ mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni));
+ mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid);
+ mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl);
+}
+
static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index,
u16 vid, bool valid)
{
@@ -393,6 +538,8 @@ static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid)
static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid)
{
+ if (fid->vni_valid)
+ mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid);
mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false);
}
@@ -531,6 +678,41 @@ mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid,
mlxsw_sp_port->local_port, vid, false);
}
+static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni,
+ true, fid->nve_flood_index,
+ fid->nve_flood_index_valid);
+}
+
+static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false,
+ fid->nve_flood_index, fid->nve_flood_index_valid);
+}
+
+static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid,
+ u32 nve_flood_index)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index,
+ fid->vni, fid->vni_valid, nve_flood_index,
+ true);
+}
+
+static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_fid_family *fid_family = fid->fid_family;
+
+ mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni,
+ fid->vni_valid, 0, false);
+}
+
static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
.setup = mlxsw_sp_fid_8021d_setup,
.configure = mlxsw_sp_fid_8021d_configure,
@@ -540,6 +722,10 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = {
.flood_index = mlxsw_sp_fid_8021d_flood_index,
.port_vid_map = mlxsw_sp_fid_8021d_port_vid_map,
.port_vid_unmap = mlxsw_sp_fid_8021d_port_vid_unmap,
+ .vni_set = mlxsw_sp_fid_8021d_vni_set,
+ .vni_clear = mlxsw_sp_fid_8021d_vni_clear,
+ .nve_flood_index_set = mlxsw_sp_fid_8021d_nve_flood_index_set,
+ .nve_flood_index_clear = mlxsw_sp_fid_8021d_nve_flood_index_clear,
};
static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = {
@@ -708,14 +894,12 @@ static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = {
[MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family,
};
-static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
- enum mlxsw_sp_fid_type type,
- const void *arg)
+static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_fid_type type,
+ const void *arg)
{
struct mlxsw_sp_fid_family *fid_family;
struct mlxsw_sp_fid *fid;
- u16 fid_index;
- int err;
fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
list_for_each_entry(fid, &fid_family->fids_list, list) {
@@ -725,6 +909,23 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
return fid;
}
+ return NULL;
+}
+
+static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp,
+ enum mlxsw_sp_fid_type type,
+ const void *arg)
+{
+ struct mlxsw_sp_fid_family *fid_family;
+ struct mlxsw_sp_fid *fid;
+ u16 fid_index;
+ int err;
+
+ fid = mlxsw_sp_fid_lookup(mlxsw_sp, type, arg);
+ if (fid)
+ return fid;
+
+ fid_family = mlxsw_sp->fid_core->fid_family_arr[type];
fid = kzalloc(fid_family->fid_size, GFP_KERNEL);
if (!fid)
return ERR_PTR(-ENOMEM);
@@ -784,6 +985,13 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp,
return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex);
}
+struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp,
+ int br_ifindex)
+{
+ return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D,
+ &br_ifindex);
+}
+
struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp,
u16 rif_index)
{
@@ -918,6 +1126,10 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp)
return -ENOMEM;
mlxsw_sp->fid_core = fid_core;
+ err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+
fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int),
GFP_KERNEL);
if (!fid_core->port_fid_mappings) {
@@ -944,6 +1156,8 @@ err_fid_ops_register:
}
kfree(fid_core->port_fid_mappings);
err_alloc_port_fid_mappings:
+ rhashtable_destroy(&fid_core->vni_ht);
+err_rhashtable_init:
kfree(fid_core);
return err;
}
@@ -957,5 +1171,6 @@ void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp)
mlxsw_sp_fid_family_unregister(mlxsw_sp,
fid_core->fid_family_arr[i]);
kfree(fid_core->port_fid_mappings);
+ rhashtable_destroy(&fid_core->vni_ht);
kfree(fid_core);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
new file mode 100644
index 000000000000..ad06d9969bc1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c
@@ -0,0 +1,982 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/err.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include <net/inet_ecn.h>
+#include <net/ipv6.h>
+
+#include "reg.h"
+#include "spectrum.h"
+#include "spectrum_nve.h"
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[] = {
+ [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp1_nve_vxlan_ops,
+};
+
+const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[] = {
+ [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp2_nve_vxlan_ops,
+};
+
+struct mlxsw_sp_nve_mc_entry;
+struct mlxsw_sp_nve_mc_record;
+struct mlxsw_sp_nve_mc_list;
+
+struct mlxsw_sp_nve_mc_record_ops {
+ enum mlxsw_reg_tnumt_record_type type;
+ int (*entry_add)(struct mlxsw_sp_nve_mc_record *mc_record,
+ struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr);
+ void (*entry_del)(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry);
+ void (*entry_set)(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ char *tnumt_pl, unsigned int entry_index);
+ bool (*entry_compare)(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr);
+};
+
+struct mlxsw_sp_nve_mc_list_key {
+ u16 fid_index;
+};
+
+struct mlxsw_sp_nve_mc_ipv6_entry {
+ struct in6_addr addr6;
+ u32 addr6_kvdl_index;
+};
+
+struct mlxsw_sp_nve_mc_entry {
+ union {
+ __be32 addr4;
+ struct mlxsw_sp_nve_mc_ipv6_entry ipv6_entry;
+ };
+ u8 valid:1;
+};
+
+struct mlxsw_sp_nve_mc_record {
+ struct list_head list;
+ enum mlxsw_sp_l3proto proto;
+ unsigned int num_entries;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_nve_mc_list *mc_list;
+ const struct mlxsw_sp_nve_mc_record_ops *ops;
+ u32 kvdl_index;
+ struct mlxsw_sp_nve_mc_entry entries[0];
+};
+
+struct mlxsw_sp_nve_mc_list {
+ struct list_head records_list;
+ struct rhash_head ht_node;
+ struct mlxsw_sp_nve_mc_list_key key;
+};
+
+static const struct rhashtable_params mlxsw_sp_nve_mc_list_ht_params = {
+ .key_len = sizeof(struct mlxsw_sp_nve_mc_list_key),
+ .key_offset = offsetof(struct mlxsw_sp_nve_mc_list, key),
+ .head_offset = offsetof(struct mlxsw_sp_nve_mc_list, ht_node),
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv4_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+ struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr)
+{
+ mc_entry->addr4 = addr->addr4;
+
+ return 0;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv4_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ char *tnumt_pl, unsigned int entry_index)
+{
+ u32 udip = be32_to_cpu(mc_entry->addr4);
+
+ mlxsw_reg_tnumt_udip_set(tnumt_pl, entry_index, udip);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv4_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr)
+{
+ return mc_entry->addr4 == addr->addr4;
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv4_ops = {
+ .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV4,
+ .entry_add = &mlxsw_sp_nve_mc_record_ipv4_entry_add,
+ .entry_del = &mlxsw_sp_nve_mc_record_ipv4_entry_del,
+ .entry_set = &mlxsw_sp_nve_mc_record_ipv4_entry_set,
+ .entry_compare = &mlxsw_sp_nve_mc_record_ipv4_entry_compare,
+};
+
+static int
+mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record,
+ struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr)
+{
+ WARN_ON(1);
+
+ return -EINVAL;
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+}
+
+static void
+mlxsw_sp_nve_mc_record_ipv6_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ char *tnumt_pl, unsigned int entry_index)
+{
+ u32 udip_ptr = mc_entry->ipv6_entry.addr6_kvdl_index;
+
+ mlxsw_reg_tnumt_udip_ptr_set(tnumt_pl, entry_index, udip_ptr);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_ipv6_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record,
+ const struct mlxsw_sp_nve_mc_entry *mc_entry,
+ const union mlxsw_sp_l3addr *addr)
+{
+ return ipv6_addr_equal(&mc_entry->ipv6_entry.addr6, &addr->addr6);
+}
+
+static const struct mlxsw_sp_nve_mc_record_ops
+mlxsw_sp_nve_mc_record_ipv6_ops = {
+ .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV6,
+ .entry_add = &mlxsw_sp_nve_mc_record_ipv6_entry_add,
+ .entry_del = &mlxsw_sp_nve_mc_record_ipv6_entry_del,
+ .entry_set = &mlxsw_sp_nve_mc_record_ipv6_entry_set,
+ .entry_compare = &mlxsw_sp_nve_mc_record_ipv6_entry_compare,
+};
+
+static const struct mlxsw_sp_nve_mc_record_ops *
+mlxsw_sp_nve_mc_record_ops_arr[] = {
+ [MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_nve_mc_record_ipv4_ops,
+ [MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops,
+};
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_nve_mc_list_key *key)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+ return rhashtable_lookup_fast(&nve->mc_list_ht, key,
+ mlxsw_sp_nve_mc_list_ht_params);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_create(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_nve_mc_list_key *key)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ struct mlxsw_sp_nve_mc_list *mc_list;
+ int err;
+
+ mc_list = kmalloc(sizeof(*mc_list), GFP_KERNEL);
+ if (!mc_list)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&mc_list->records_list);
+ mc_list->key = *key;
+
+ err = rhashtable_insert_fast(&nve->mc_list_ht, &mc_list->ht_node,
+ mlxsw_sp_nve_mc_list_ht_params);
+ if (err)
+ goto err_rhashtable_insert;
+
+ return mc_list;
+
+err_rhashtable_insert:
+ kfree(mc_list);
+ return ERR_PTR(err);
+}
+
+static void mlxsw_sp_nve_mc_list_destroy(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+ rhashtable_remove_fast(&nve->mc_list_ht, &mc_list->ht_node,
+ mlxsw_sp_nve_mc_list_ht_params);
+ WARN_ON(!list_empty(&mc_list->records_list));
+ kfree(mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_list *
+mlxsw_sp_nve_mc_list_get(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_nve_mc_list_key *key)
+{
+ struct mlxsw_sp_nve_mc_list *mc_list;
+
+ mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, key);
+ if (mc_list)
+ return mc_list;
+
+ return mlxsw_sp_nve_mc_list_create(mlxsw_sp, key);
+}
+
+static void
+mlxsw_sp_nve_mc_list_put(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list)
+{
+ if (!list_empty(&mc_list->records_list))
+ return;
+ mlxsw_sp_nve_mc_list_destroy(mlxsw_sp, mc_list);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto)
+{
+ unsigned int num_max_entries = mlxsw_sp->nve->num_max_mc_entries[proto];
+ struct mlxsw_sp_nve_mc_record *mc_record;
+ int err;
+
+ mc_record = kzalloc(sizeof(*mc_record) + num_max_entries *
+ sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL);
+ if (!mc_record)
+ return ERR_PTR(-ENOMEM);
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+ &mc_record->kvdl_index);
+ if (err)
+ goto err_kvdl_alloc;
+
+ mc_record->ops = mlxsw_sp_nve_mc_record_ops_arr[proto];
+ mc_record->mlxsw_sp = mlxsw_sp;
+ mc_record->mc_list = mc_list;
+ mc_record->proto = proto;
+ list_add_tail(&mc_record->list, &mc_list->records_list);
+
+ return mc_record;
+
+err_kvdl_alloc:
+ kfree(mc_record);
+ return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_nve_mc_record_destroy(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+
+ list_del(&mc_record->list);
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1,
+ mc_record->kvdl_index);
+ WARN_ON(mc_record->num_entries);
+ kfree(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_get(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+
+ list_for_each_entry_reverse(mc_record, &mc_list->records_list, list) {
+ unsigned int num_entries = mc_record->num_entries;
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+
+ if (mc_record->proto == proto &&
+ num_entries < nve->num_max_mc_entries[proto])
+ return mc_record;
+ }
+
+ return mlxsw_sp_nve_mc_record_create(mlxsw_sp, mc_list, proto);
+}
+
+static void
+mlxsw_sp_nve_mc_record_put(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ if (mc_record->num_entries != 0)
+ return;
+
+ mlxsw_sp_nve_mc_record_destroy(mc_record);
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_free_entry_find(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+ unsigned int num_max_entries;
+ int i;
+
+ num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+ for (i = 0; i < num_max_entries; i++) {
+ if (mc_record->entries[i].valid)
+ continue;
+ return &mc_record->entries[i];
+ }
+
+ return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_refresh(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ enum mlxsw_reg_tnumt_record_type type = mc_record->ops->type;
+ struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+ struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp;
+ char tnumt_pl[MLXSW_REG_TNUMT_LEN];
+ unsigned int num_max_entries;
+ unsigned int num_entries = 0;
+ u32 next_kvdl_index = 0;
+ bool next_valid = false;
+ int i;
+
+ if (!list_is_last(&mc_record->list, &mc_list->records_list)) {
+ struct mlxsw_sp_nve_mc_record *next_record;
+
+ next_record = list_next_entry(mc_record, list);
+ next_kvdl_index = next_record->kvdl_index;
+ next_valid = true;
+ }
+
+ mlxsw_reg_tnumt_pack(tnumt_pl, type, MLXSW_REG_TNUMT_TUNNEL_PORT_NVE,
+ mc_record->kvdl_index, next_valid,
+ next_kvdl_index, mc_record->num_entries);
+
+ num_max_entries = mlxsw_sp->nve->num_max_mc_entries[mc_record->proto];
+ for (i = 0; i < num_max_entries; i++) {
+ struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+ mc_entry = &mc_record->entries[i];
+ if (!mc_entry->valid)
+ continue;
+ mc_record->ops->entry_set(mc_record, mc_entry, tnumt_pl,
+ num_entries++);
+ }
+
+ WARN_ON(num_entries != mc_record->num_entries);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnumt), tnumt_pl);
+}
+
+static bool
+mlxsw_sp_nve_mc_record_is_first(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+ struct mlxsw_sp_nve_mc_record *first_record;
+
+ first_record = list_first_entry(&mc_list->records_list,
+ struct mlxsw_sp_nve_mc_record, list);
+
+ return mc_record == first_record;
+}
+
+static struct mlxsw_sp_nve_mc_entry *
+mlxsw_sp_nve_mc_entry_find(struct mlxsw_sp_nve_mc_record *mc_record,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+ unsigned int num_max_entries;
+ int i;
+
+ num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+ for (i = 0; i < num_max_entries; i++) {
+ struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+ mc_entry = &mc_record->entries[i];
+ if (!mc_entry->valid)
+ continue;
+ if (mc_record->ops->entry_compare(mc_record, mc_entry, addr))
+ return mc_entry;
+ }
+
+ return NULL;
+}
+
+static int
+mlxsw_sp_nve_mc_record_ip_add(struct mlxsw_sp_nve_mc_record *mc_record,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_entry *mc_entry = NULL;
+ int err;
+
+ mc_entry = mlxsw_sp_nve_mc_free_entry_find(mc_record);
+ if (WARN_ON(!mc_entry))
+ return -EINVAL;
+
+ err = mc_record->ops->entry_add(mc_record, mc_entry, addr);
+ if (err)
+ return err;
+ mc_record->num_entries++;
+ mc_entry->valid = true;
+
+ err = mlxsw_sp_nve_mc_record_refresh(mc_record);
+ if (err)
+ goto err_record_refresh;
+
+ /* If this is a new record and not the first one, then we need to
+ * update the next pointer of the previous entry
+ */
+ if (mc_record->num_entries != 1 ||
+ mlxsw_sp_nve_mc_record_is_first(mc_record))
+ return 0;
+
+ err = mlxsw_sp_nve_mc_record_refresh(list_prev_entry(mc_record, list));
+ if (err)
+ goto err_prev_record_refresh;
+
+ return 0;
+
+err_prev_record_refresh:
+err_record_refresh:
+ mc_entry->valid = false;
+ mc_record->num_entries--;
+ mc_record->ops->entry_del(mc_record, mc_entry);
+ return err;
+}
+
+static void
+mlxsw_sp_nve_mc_record_entry_del(struct mlxsw_sp_nve_mc_record *mc_record,
+ struct mlxsw_sp_nve_mc_entry *mc_entry)
+{
+ struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list;
+
+ mc_entry->valid = false;
+ mc_record->num_entries--;
+
+ /* When the record continues to exist we only need to invalidate
+ * the requested entry
+ */
+ if (mc_record->num_entries != 0) {
+ mlxsw_sp_nve_mc_record_refresh(mc_record);
+ mc_record->ops->entry_del(mc_record, mc_entry);
+ return;
+ }
+
+ /* If the record needs to be deleted, but it is not the first,
+ * then we need to make sure that the previous record no longer
+ * points to it. Remove deleted record from the list to reflect
+ * that and then re-add it at the end, so that it could be
+ * properly removed by the record destruction code
+ */
+ if (!mlxsw_sp_nve_mc_record_is_first(mc_record)) {
+ struct mlxsw_sp_nve_mc_record *prev_record;
+
+ prev_record = list_prev_entry(mc_record, list);
+ list_del(&mc_record->list);
+ mlxsw_sp_nve_mc_record_refresh(prev_record);
+ list_add_tail(&mc_record->list, &mc_list->records_list);
+ mc_record->ops->entry_del(mc_record, mc_entry);
+ return;
+ }
+
+ /* If the first record needs to be deleted, but the list is not
+ * singular, then the second record needs to be written in the
+ * first record's address, as this address is stored as a property
+ * of the FID
+ */
+ if (mlxsw_sp_nve_mc_record_is_first(mc_record) &&
+ !list_is_singular(&mc_list->records_list)) {
+ struct mlxsw_sp_nve_mc_record *next_record;
+
+ next_record = list_next_entry(mc_record, list);
+ swap(mc_record->kvdl_index, next_record->kvdl_index);
+ mlxsw_sp_nve_mc_record_refresh(next_record);
+ mc_record->ops->entry_del(mc_record, mc_entry);
+ return;
+ }
+
+ /* This is the last case where the last remaining record needs to
+ * be deleted. Simply delete the entry
+ */
+ mc_record->ops->entry_del(mc_record, mc_entry);
+}
+
+static struct mlxsw_sp_nve_mc_record *
+mlxsw_sp_nve_mc_record_find(struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr,
+ struct mlxsw_sp_nve_mc_entry **mc_entry)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+
+ list_for_each_entry(mc_record, &mc_list->records_list, list) {
+ if (mc_record->proto != proto)
+ continue;
+
+ *mc_entry = mlxsw_sp_nve_mc_entry_find(mc_record, addr);
+ if (*mc_entry)
+ return mc_record;
+ }
+
+ return NULL;
+}
+
+static int mlxsw_sp_nve_mc_list_ip_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+ int err;
+
+ mc_record = mlxsw_sp_nve_mc_record_get(mlxsw_sp, mc_list, proto);
+ if (IS_ERR(mc_record))
+ return PTR_ERR(mc_record);
+
+ err = mlxsw_sp_nve_mc_record_ip_add(mc_record, addr);
+ if (err)
+ goto err_ip_add;
+
+ return 0;
+
+err_ip_add:
+ mlxsw_sp_nve_mc_record_put(mc_record);
+ return err;
+}
+
+static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_mc_list *mc_list,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+ struct mlxsw_sp_nve_mc_entry *mc_entry;
+
+ mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr,
+ &mc_entry);
+ if (WARN_ON(!mc_record))
+ return;
+
+ mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+ mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static int
+mlxsw_sp_nve_fid_flood_index_set(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_nve_mc_list *mc_list)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+
+ /* The address of the first record in the list is a property of
+ * the FID and we never change it. It only needs to be set when
+ * a new list is created
+ */
+ if (mlxsw_sp_fid_nve_flood_index_is_set(fid))
+ return 0;
+
+ mc_record = list_first_entry(&mc_list->records_list,
+ struct mlxsw_sp_nve_mc_record, list);
+
+ return mlxsw_sp_fid_nve_flood_index_set(fid, mc_record->kvdl_index);
+}
+
+static void
+mlxsw_sp_nve_fid_flood_index_clear(struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_nve_mc_list *mc_list)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record;
+
+ /* The address of the first record needs to be invalidated only when
+ * the last record is about to be removed
+ */
+ if (!list_is_singular(&mc_list->records_list))
+ return;
+
+ mc_record = list_first_entry(&mc_list->records_list,
+ struct mlxsw_sp_nve_mc_record, list);
+ if (mc_record->num_entries != 1)
+ return;
+
+ return mlxsw_sp_fid_nve_flood_index_clear(fid);
+}
+
+int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_list_key key = { 0 };
+ struct mlxsw_sp_nve_mc_list *mc_list;
+ int err;
+
+ key.fid_index = mlxsw_sp_fid_index(fid);
+ mc_list = mlxsw_sp_nve_mc_list_get(mlxsw_sp, &key);
+ if (IS_ERR(mc_list))
+ return PTR_ERR(mc_list);
+
+ err = mlxsw_sp_nve_mc_list_ip_add(mlxsw_sp, mc_list, proto, addr);
+ if (err)
+ goto err_add_ip;
+
+ err = mlxsw_sp_nve_fid_flood_index_set(fid, mc_list);
+ if (err)
+ goto err_fid_flood_index_set;
+
+ return 0;
+
+err_fid_flood_index_set:
+ mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+err_add_ip:
+ mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+ return err;
+}
+
+void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid,
+ enum mlxsw_sp_l3proto proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ struct mlxsw_sp_nve_mc_list_key key = { 0 };
+ struct mlxsw_sp_nve_mc_list *mc_list;
+
+ key.fid_index = mlxsw_sp_fid_index(fid);
+ mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+ if (WARN_ON(!mc_list))
+ return;
+
+ mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list);
+ mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr);
+ mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+static void
+mlxsw_sp_nve_mc_record_delete(struct mlxsw_sp_nve_mc_record *mc_record)
+{
+ struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve;
+ unsigned int num_max_entries;
+ int i;
+
+ num_max_entries = nve->num_max_mc_entries[mc_record->proto];
+ for (i = 0; i < num_max_entries; i++) {
+ struct mlxsw_sp_nve_mc_entry *mc_entry = &mc_record->entries[i];
+
+ if (!mc_entry->valid)
+ continue;
+ mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry);
+ }
+
+ WARN_ON(mc_record->num_entries);
+ mlxsw_sp_nve_mc_record_put(mc_record);
+}
+
+static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid)
+{
+ struct mlxsw_sp_nve_mc_record *mc_record, *tmp;
+ struct mlxsw_sp_nve_mc_list_key key = { 0 };
+ struct mlxsw_sp_nve_mc_list *mc_list;
+
+ if (!mlxsw_sp_fid_nve_flood_index_is_set(fid))
+ return;
+
+ mlxsw_sp_fid_nve_flood_index_clear(fid);
+
+ key.fid_index = mlxsw_sp_fid_index(fid);
+ mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key);
+ if (WARN_ON(!mc_list))
+ return;
+
+ list_for_each_entry_safe(mc_record, tmp, &mc_list->records_list, list)
+ mlxsw_sp_nve_mc_record_delete(mc_record);
+
+ WARN_ON(!list_empty(&mc_list->records_list));
+ mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list);
+}
+
+u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp)
+{
+ WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0);
+
+ return mlxsw_sp->nve->tunnel_index;
+}
+
+bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp,
+ u32 tb_id, __be32 addr)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ struct mlxsw_sp_nve_config *config = &nve->config;
+
+ if (nve->num_nve_tunnels &&
+ config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 &&
+ config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id)
+ return true;
+
+ return false;
+}
+
+static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_nve_config *config)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ const struct mlxsw_sp_nve_ops *ops;
+ int err;
+
+ if (nve->num_nve_tunnels++ != 0)
+ return 0;
+
+ err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+ &nve->tunnel_index);
+ if (err)
+ goto err_kvdl_alloc;
+
+ ops = nve->nve_ops_arr[config->type];
+ err = ops->init(nve, config);
+ if (err)
+ goto err_ops_init;
+
+ return 0;
+
+err_ops_init:
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+ nve->tunnel_index);
+err_kvdl_alloc:
+ nve->num_nve_tunnels--;
+ return err;
+}
+
+static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ const struct mlxsw_sp_nve_ops *ops;
+
+ ops = nve->nve_ops_arr[nve->config.type];
+
+ if (mlxsw_sp->nve->num_nve_tunnels == 1) {
+ ops->fini(nve);
+ mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
+ nve->tunnel_index);
+ }
+ nve->num_nve_tunnels--;
+}
+
+static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp,
+ u16 fid_index)
+{
+ char sfdf_pl[MLXSW_REG_SFDF_LEN];
+
+ mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID);
+ mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index);
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl);
+}
+
+int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid,
+ struct mlxsw_sp_nve_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_nve *nve = mlxsw_sp->nve;
+ const struct mlxsw_sp_nve_ops *ops;
+ struct mlxsw_sp_nve_config config;
+ int err;
+
+ ops = nve->nve_ops_arr[params->type];
+
+ if (!ops->can_offload(nve, params->dev, extack))
+ return -EOPNOTSUPP;
+
+ memset(&config, 0, sizeof(config));
+ ops->nve_config(nve, params->dev, &config);
+ if (nve->num_nve_tunnels &&
+ memcmp(&config, &nve->config, sizeof(config))) {
+ NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration");
+ return -EOPNOTSUPP;
+ }
+
+ err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to initialize NVE tunnel");
+ return err;
+ }
+
+ err = mlxsw_sp_fid_vni_set(fid, params->vni);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID");
+ goto err_fid_vni_set;
+ }
+
+ nve->config = config;
+
+ return 0;
+
+err_fid_vni_set:
+ mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+ return err;
+}
+
+void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fid *fid)
+{
+ u16 fid_index = mlxsw_sp_fid_index(fid);
+
+ mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid);
+ mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index);
+ mlxsw_sp_fid_vni_clear(fid);
+ mlxsw_sp_nve_tunnel_fini(mlxsw_sp);
+}
+
+int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char tnqdr_pl[MLXSW_REG_TNQDR_LEN];
+
+ mlxsw_reg_tnqdr_pack(tnqdr_pl, mlxsw_sp_port->local_port);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqdr), tnqdr_pl);
+}
+
+void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+}
+
+static int mlxsw_sp_nve_qos_init(struct mlxsw_sp *mlxsw_sp)
+{
+ char tnqcr_pl[MLXSW_REG_TNQCR_LEN];
+
+ mlxsw_reg_tnqcr_pack(tnqcr_pl);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqcr), tnqcr_pl);
+}
+
+static int mlxsw_sp_nve_ecn_encap_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ /* Iterate over inner ECN values */
+ for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+ u8 outer_ecn = INET_ECN_encapsulate(0, i);
+ char tneem_pl[MLXSW_REG_TNEEM_LEN];
+ int err;
+
+ mlxsw_reg_tneem_pack(tneem_pl, i, outer_ecn);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tneem),
+ tneem_pl);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp,
+ u8 inner_ecn, u8 outer_ecn)
+{
+ char tndem_pl[MLXSW_REG_TNDEM_LEN];
+ bool trap_en, set_ce = false;
+ u8 new_inner_ecn;
+
+ trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce);
+ new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn;
+
+ mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn,
+ trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl);
+}
+
+static int mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int i;
+
+ /* Iterate over inner ECN values */
+ for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) {
+ int j;
+
+ /* Iterate over outer ECN values */
+ for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) {
+ int err;
+
+ err = __mlxsw_sp_nve_ecn_decap_init(mlxsw_sp, i, j);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int mlxsw_sp_nve_ecn_init(struct mlxsw_sp *mlxsw_sp)
+{
+ int err;
+
+ err = mlxsw_sp_nve_ecn_encap_init(mlxsw_sp);
+ if (err)
+ return err;
+
+ return mlxsw_sp_nve_ecn_decap_init(mlxsw_sp);
+}
+
+static int mlxsw_sp_nve_resources_query(struct mlxsw_sp *mlxsw_sp)
+{
+ unsigned int max;
+
+ if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4) ||
+ !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6))
+ return -EIO;
+ max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4);
+ mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV4] = max;
+ max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6);
+ mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV6] = max;
+
+ return 0;
+}
+
+int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp)
+{
+ struct mlxsw_sp_nve *nve;
+ int err;
+
+ nve = kzalloc(sizeof(*mlxsw_sp->nve), GFP_KERNEL);
+ if (!nve)
+ return -ENOMEM;
+ mlxsw_sp->nve = nve;
+ nve->mlxsw_sp = mlxsw_sp;
+ nve->nve_ops_arr = mlxsw_sp->nve_ops_arr;
+
+ err = rhashtable_init(&nve->mc_list_ht,
+ &mlxsw_sp_nve_mc_list_ht_params);
+ if (err)
+ goto err_rhashtable_init;
+
+ err = mlxsw_sp_nve_qos_init(mlxsw_sp);
+ if (err)
+ goto err_nve_qos_init;
+
+ err = mlxsw_sp_nve_ecn_init(mlxsw_sp);
+ if (err)
+ goto err_nve_ecn_init;
+
+ err = mlxsw_sp_nve_resources_query(mlxsw_sp);
+ if (err)
+ goto err_nve_resources_query;
+
+ return 0;
+
+err_nve_resources_query:
+err_nve_ecn_init:
+err_nve_qos_init:
+ rhashtable_destroy(&nve->mc_list_ht);
+err_rhashtable_init:
+ mlxsw_sp->nve = NULL;
+ kfree(nve);
+ return err;
+}
+
+void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ WARN_ON(mlxsw_sp->nve->num_nve_tunnels);
+ rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht);
+ mlxsw_sp->nve = NULL;
+ kfree(mlxsw_sp->nve);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
new file mode 100644
index 000000000000..4cc3297e13d6
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#ifndef _MLXSW_SPECTRUM_NVE_H
+#define _MLXSW_SPECTRUM_NVE_H
+
+#include <linux/netlink.h>
+#include <linux/rhashtable.h>
+
+#include "spectrum.h"
+
+struct mlxsw_sp_nve_config {
+ enum mlxsw_sp_nve_type type;
+ u8 ttl;
+ u8 learning_en:1;
+ __be16 udp_dport;
+ __be32 flowlabel;
+ u32 ul_tb_id;
+ enum mlxsw_sp_l3proto ul_proto;
+ union mlxsw_sp_l3addr ul_sip;
+};
+
+struct mlxsw_sp_nve {
+ struct mlxsw_sp_nve_config config;
+ struct rhashtable mc_list_ht;
+ struct mlxsw_sp *mlxsw_sp;
+ const struct mlxsw_sp_nve_ops **nve_ops_arr;
+ unsigned int num_nve_tunnels; /* Protected by RTNL */
+ unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX];
+ u32 tunnel_index;
+};
+
+struct mlxsw_sp_nve_ops {
+ enum mlxsw_sp_nve_type type;
+ bool (*can_offload)(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct netlink_ext_ack *extack);
+ void (*nve_config)(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct mlxsw_sp_nve_config *config);
+ int (*init)(struct mlxsw_sp_nve *nve,
+ const struct mlxsw_sp_nve_config *config);
+ void (*fini)(struct mlxsw_sp_nve *nve);
+};
+
+extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops;
+extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops;
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
new file mode 100644
index 000000000000..d21c7be5b1c9
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */
+
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <net/vxlan.h>
+
+#include "reg.h"
+#include "spectrum_nve.h"
+
+/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B)
+ *
+ * In the worst case - where we have a VLAN tag on the outer Ethernet
+ * header and IPv6 in overlay and underlay - we need to parse 128 bytes
+ */
+#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128
+#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96
+
+#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS VXLAN_F_UDP_ZERO_CSUM_TX
+
+static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_config *cfg = &vxlan->cfg;
+
+ if (cfg->saddr.sa.sa_family != AF_INET) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported");
+ return false;
+ }
+
+ if (vxlan_addr_multicast(&cfg->remote_ip)) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported");
+ return false;
+ }
+
+ if (vxlan_addr_any(&cfg->saddr)) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified");
+ return false;
+ }
+
+ if (cfg->remote_ifindex) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported");
+ return false;
+ }
+
+ if (cfg->port_min || cfg->port_max) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported");
+ return false;
+ }
+
+ if (cfg->tos != 1) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit");
+ return false;
+ }
+
+ if (cfg->flags & VXLAN_F_TTL_INHERIT) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit");
+ return false;
+ }
+
+ if (cfg->flags & VXLAN_F_LEARN) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported");
+ return false;
+ }
+
+ if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported");
+ return false;
+ }
+
+ if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag");
+ return false;
+ }
+
+ if (cfg->ttl == 0) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0");
+ return false;
+ }
+
+ if (cfg->label != 0) {
+ NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0");
+ return false;
+ }
+
+ return true;
+}
+
+static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct mlxsw_sp_nve_config *config)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_config *cfg = &vxlan->cfg;
+
+ config->type = MLXSW_SP_NVE_TYPE_VXLAN;
+ config->ttl = cfg->ttl;
+ config->flowlabel = cfg->label;
+ config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0;
+ config->ul_tb_id = RT_TABLE_MAIN;
+ config->ul_proto = MLXSW_SP_L3_PROTO_IPV4;
+ config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr;
+ config->udp_dport = cfg->dst_port;
+}
+
+static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp,
+ unsigned int parsing_depth,
+ __be16 udp_dport)
+{
+ char mprs_pl[MLXSW_REG_MPRS_LEN];
+
+ mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport));
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl);
+}
+
+static int
+mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp,
+ const struct mlxsw_sp_nve_config *config)
+{
+ char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+ u16 ul_vr_id;
+ u8 udp_sport;
+ int err;
+
+ err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id,
+ &ul_vr_id);
+ if (err)
+ return err;
+
+ mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true,
+ config->ttl);
+ /* VxLAN driver's default UDP source port range is 32768 (0x8000)
+ * to 60999 (0xee47). Set the upper 8 bits of the UDP source port
+ * to a random number between 0x80 and 0xee
+ */
+ get_random_bytes(&udp_sport, sizeof(udp_sport));
+ udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80;
+ mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport);
+ mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en);
+ mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id);
+ mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4));
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp)
+{
+ char tngcr_pl[MLXSW_REG_TNGCR_LEN];
+
+ mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0);
+
+ mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp,
+ unsigned int tunnel_index)
+{
+ char rtdp_pl[MLXSW_REG_RTDP_LEN];
+
+ mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index);
+
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl);
+}
+
+static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+ const struct mlxsw_sp_nve_config *config)
+{
+ struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+ int err;
+
+ err = mlxsw_sp_nve_parsing_set(mlxsw_sp,
+ MLXSW_SP_NVE_VXLAN_PARSING_DEPTH,
+ config->udp_dport);
+ if (err)
+ return err;
+
+ err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config);
+ if (err)
+ goto err_config_set;
+
+ err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index);
+ if (err)
+ goto err_rtdp_set;
+
+ err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id,
+ config->ul_proto,
+ &config->ul_sip,
+ nve->tunnel_index);
+ if (err)
+ goto err_promote_decap;
+
+ return 0;
+
+err_promote_decap:
+err_rtdp_set:
+ mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+err_config_set:
+ mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
+ config->udp_dport);
+ return err;
+}
+
+static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+ struct mlxsw_sp_nve_config *config = &nve->config;
+ struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp;
+
+ mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id,
+ config->ul_proto, &config->ul_sip);
+ mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp);
+ mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH,
+ config->udp_dport);
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = {
+ .type = MLXSW_SP_NVE_TYPE_VXLAN,
+ .can_offload = mlxsw_sp1_nve_vxlan_can_offload,
+ .nve_config = mlxsw_sp_nve_vxlan_config,
+ .init = mlxsw_sp1_nve_vxlan_init,
+ .fini = mlxsw_sp1_nve_vxlan_fini,
+};
+
+static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve,
+ const struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ return false;
+}
+
+static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve,
+ const struct mlxsw_sp_nve_config *config)
+{
+ return -EOPNOTSUPP;
+}
+
+static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve)
+{
+}
+
+const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = {
+ .type = MLXSW_SP_NVE_TYPE_VXLAN,
+ .can_offload = mlxsw_sp2_nve_vxlan_can_offload,
+ .nve_config = mlxsw_sp_nve_vxlan_config,
+ .init = mlxsw_sp2_nve_vxlan_init,
+ .fini = mlxsw_sp2_nve_vxlan_fini,
+};
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2ab9cf25a08a..9e9bb57134f2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -366,6 +366,7 @@ enum mlxsw_sp_fib_entry_type {
* encapsulating entries.)
*/
MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
+ MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
};
struct mlxsw_sp_nexthop_group;
@@ -741,6 +742,19 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
return NULL;
}
+int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+ u16 *vr_id)
+{
+ struct mlxsw_sp_vr *vr;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+ if (!vr)
+ return -ESRCH;
+ *vr_id = vr->id;
+
+ return 0;
+}
+
static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
enum mlxsw_sp_l3proto proto)
{
@@ -1128,6 +1142,52 @@ mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
}
+static struct mlxsw_sp_fib_entry *
+mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
+ enum mlxsw_sp_l3proto proto,
+ const union mlxsw_sp_l3addr *addr,
+ enum mlxsw_sp_fib_entry_type type)
+{
+ struct mlxsw_sp_fib_entry *fib_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ unsigned char addr_prefix_len;
+ struct mlxsw_sp_fib *fib;
+ struct mlxsw_sp_vr *vr;
+ const void *addrp;
+ size_t addr_len;
+ u32 addr4;
+
+ vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
+ if (!vr)
+ return NULL;
+ fib = mlxsw_sp_vr_fib(vr, proto);
+
+ switch (proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ addr4 = be32_to_cpu(addr->addr4);
+ addrp = &addr4;
+ addr_len = 4;
+ addr_prefix_len = 32;
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+ default:
+ WARN_ON(1);
+ return NULL;
+ }
+
+ fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
+ addr_prefix_len);
+ if (!fib_node || list_empty(&fib_node->entry_list))
+ return NULL;
+
+ fib_entry = list_first_entry(&fib_node->entry_list,
+ struct mlxsw_sp_fib_entry, list);
+ if (fib_entry->type != type)
+ return NULL;
+
+ return fib_entry;
+}
+
/* Given an IPIP entry, find the corresponding decap route. */
static struct mlxsw_sp_fib_entry *
mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
@@ -1765,6 +1825,56 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
return 0;
}
+int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+ enum mlxsw_sp_l3proto ul_proto,
+ const union mlxsw_sp_l3addr *ul_sip,
+ u32 tunnel_index)
+{
+ enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ struct mlxsw_sp_fib_entry *fib_entry;
+ int err;
+
+ /* It is valid to create a tunnel with a local IP and only later
+ * assign this IP address to a local interface
+ */
+ fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+ ul_proto, ul_sip,
+ type);
+ if (!fib_entry)
+ return 0;
+
+ fib_entry->decap.tunnel_index = tunnel_index;
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+
+ err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+ if (err)
+ goto err_fib_entry_update;
+
+ return 0;
+
+err_fib_entry_update:
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+ return err;
+}
+
+void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
+ enum mlxsw_sp_l3proto ul_proto,
+ const union mlxsw_sp_l3addr *ul_sip)
+{
+ enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+ struct mlxsw_sp_fib_entry *fib_entry;
+
+ fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
+ ul_proto, ul_sip,
+ type);
+ if (!fib_entry)
+ return;
+
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
+ mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
+}
+
struct mlxsw_sp_neigh_key {
struct neighbour *n;
};
@@ -3815,6 +3925,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
return !!nh_group->nh_rif;
case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+ case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
return true;
default:
return false;
@@ -3848,7 +3959,8 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry)
int i;
if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL ||
- fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) {
+ fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP ||
+ fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) {
nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD;
return;
}
@@ -4072,6 +4184,18 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
fib_entry->decap.tunnel_index);
}
+static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry,
+ enum mlxsw_reg_ralue_op op)
+{
+ char ralue_pl[MLXSW_REG_RALUE_LEN];
+
+ mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
+ mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
+ fib_entry->decap.tunnel_index);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
+}
+
static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry,
enum mlxsw_reg_ralue_op op)
@@ -4086,6 +4210,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
fib_entry, op);
+ case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
+ return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
}
return -EINVAL;
}
@@ -4121,6 +4247,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
+ u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
struct net_device *dev = fen_info->fi->fib_dev;
struct mlxsw_sp_ipip_entry *ipip_entry;
struct fib_info *fi = fen_info->fi;
@@ -4135,6 +4262,15 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
fib_entry,
ipip_entry);
}
+ if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id,
+ dip.addr4)) {
+ u32 t_index;
+
+ t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp);
+ fib_entry->decap.tunnel_index = t_index;
+ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
+ return 0;
+ }
/* fall through */
case RTN_BROADCAST:
fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
index 1a60391daafa..3dbafdeaab2b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h
@@ -7,17 +7,6 @@
#include "spectrum.h"
#include "reg.h"
-enum mlxsw_sp_l3proto {
- MLXSW_SP_L3_PROTO_IPV4,
- MLXSW_SP_L3_PROTO_IPV6,
-#define MLXSW_SP_L3_PROTO_MAX (MLXSW_SP_L3_PROTO_IPV6 + 1)
-};
-
-union mlxsw_sp_l3addr {
- __be32 addr4;
- struct in6_addr addr6;
-};
-
struct mlxsw_sp_rif_ipip_lb;
struct mlxsw_sp_rif_ipip_lb_config {
enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt;
@@ -35,8 +24,6 @@ struct mlxsw_sp_neigh_entry;
struct mlxsw_sp_nexthop;
struct mlxsw_sp_ipip_entry;
-struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
- const struct net_device *dev);
struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
u16 rif_index);
u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif);
@@ -44,9 +31,7 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif);
u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif);
u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev);
int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif);
-u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp);
const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif);
-struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif);
int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_rif *rif,
enum mlxsw_sp_rif_counter_dir dir,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index db715da7bab7..bc60d7a8b49d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -15,9 +15,9 @@
#include <linux/rtnetlink.h>
#include <linux/netlink.h>
#include <net/switchdev.h>
+#include <net/vxlan.h>
#include "spectrum_span.h"
-#include "spectrum_router.h"
#include "spectrum_switchdev.h"
#include "spectrum.h"
#include "core.h"
@@ -84,9 +84,19 @@ struct mlxsw_sp_bridge_ops {
void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device,
struct mlxsw_sp_bridge_port *bridge_port,
struct mlxsw_sp_port *mlxsw_sp_port);
+ int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack);
+ void (*vxlan_leave)(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev);
struct mlxsw_sp_fid *
(*fid_get)(struct mlxsw_sp_bridge_device *bridge_device,
u16 vid);
+ struct mlxsw_sp_fid *
+ (*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device,
+ u16 vid);
+ u16 (*fid_vid)(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct mlxsw_sp_fid *fid);
};
static int
@@ -1237,6 +1247,51 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding)
MLXSW_REG_SFD_OP_WRITE_REMOVE;
}
+static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp,
+ const char *mac, u16 fid,
+ enum mlxsw_sp_l3proto proto,
+ const union mlxsw_sp_l3addr *addr,
+ bool adding, bool dynamic)
+{
+ enum mlxsw_reg_sfd_uc_tunnel_protocol sfd_proto;
+ char *sfd_pl;
+ u8 num_rec;
+ u32 uip;
+ int err;
+
+ switch (proto) {
+ case MLXSW_SP_L3_PROTO_IPV4:
+ uip = be32_to_cpu(addr->addr4);
+ sfd_proto = MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4;
+ break;
+ case MLXSW_SP_L3_PROTO_IPV6: /* fall through */
+ default:
+ WARN_ON(1);
+ return -EOPNOTSUPP;
+ }
+
+ sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL);
+ if (!sfd_pl)
+ return -ENOMEM;
+
+ mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0);
+ mlxsw_reg_sfd_uc_tunnel_pack(sfd_pl, 0,
+ mlxsw_sp_sfd_rec_policy(dynamic), mac, fid,
+ MLXSW_REG_SFD_REC_ACTION_NOP, uip,
+ sfd_proto);
+ num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl);
+ if (err)
+ goto out;
+
+ if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl))
+ err = -EBUSY;
+
+out:
+ kfree(sfd_pl);
+ return err;
+}
+
static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port,
const char *mac, u16 fid, bool adding,
enum mlxsw_reg_sfd_rec_action action,
@@ -1950,6 +2005,21 @@ mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1);
}
+static int
+mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack)
+{
+ WARN_ON(1);
+ return -EINVAL;
+}
+
+static void
+mlxsw_sp_bridge_8021q_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev)
+{
+}
+
static struct mlxsw_sp_fid *
mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
u16 vid)
@@ -1959,10 +2029,29 @@ mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid);
}
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+ u16 vid)
+{
+ WARN_ON(1);
+ return NULL;
+}
+
+static u16
+mlxsw_sp_bridge_8021q_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct mlxsw_sp_fid *fid)
+{
+ return mlxsw_sp_fid_8021q_vid(fid);
+}
+
static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = {
.port_join = mlxsw_sp_bridge_8021q_port_join,
.port_leave = mlxsw_sp_bridge_8021q_port_leave,
+ .vxlan_join = mlxsw_sp_bridge_8021q_vxlan_join,
+ .vxlan_leave = mlxsw_sp_bridge_8021q_vxlan_leave,
.fid_get = mlxsw_sp_bridge_8021q_fid_get,
+ .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup,
+ .fid_vid = mlxsw_sp_bridge_8021q_fid_vid,
};
static bool
@@ -2026,19 +2115,126 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device,
mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan);
}
+static int
+mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+ struct vxlan_dev *vxlan = netdev_priv(vxlan_dev);
+ struct mlxsw_sp_nve_params params = {
+ .type = MLXSW_SP_NVE_TYPE_VXLAN,
+ .vni = vxlan->cfg.vni,
+ .dev = vxlan_dev,
+ };
+ struct mlxsw_sp_fid *fid;
+ int err;
+
+ fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+ if (!fid)
+ return -EINVAL;
+
+ if (mlxsw_sp_fid_vni_is_set(fid))
+ return -EINVAL;
+
+ err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, &params, extack);
+ if (err)
+ goto err_nve_fid_enable;
+
+ /* The tunnel port does not hold a reference on the FID. Only
+ * local ports and the router port
+ */
+ mlxsw_sp_fid_put(fid);
+
+ return 0;
+
+err_nve_fid_enable:
+ mlxsw_sp_fid_put(fid);
+ return err;
+}
+
+static void
+mlxsw_sp_bridge_8021d_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct net_device *vxlan_dev)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+ struct mlxsw_sp_fid *fid;
+
+ fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+ if (WARN_ON(!fid))
+ return;
+
+ /* If the VxLAN device is down, then the FID does not have a VNI */
+ if (!mlxsw_sp_fid_vni_is_set(fid))
+ goto out;
+
+ mlxsw_sp_nve_fid_disable(mlxsw_sp, fid);
+out:
+ mlxsw_sp_fid_put(fid);
+}
+
static struct mlxsw_sp_fid *
mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device,
u16 vid)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+ struct net_device *vxlan_dev;
+ struct mlxsw_sp_fid *fid;
+ int err;
+
+ fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+ if (IS_ERR(fid))
+ return fid;
+
+ if (mlxsw_sp_fid_vni_is_set(fid))
+ return fid;
+
+ vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev);
+ if (!vxlan_dev)
+ return fid;
+
+ if (!netif_running(vxlan_dev))
+ return fid;
+
+ err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, NULL);
+ if (err)
+ goto err_vxlan_join;
+
+ return fid;
+
+err_vxlan_join:
+ mlxsw_sp_fid_put(fid);
+ return ERR_PTR(err);
+}
+
+static struct mlxsw_sp_fid *
+mlxsw_sp_bridge_8021d_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device,
+ u16 vid)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev);
+
+ /* The only valid VLAN for a VLAN-unaware bridge is 0 */
+ if (vid)
+ return NULL;
- return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex);
+ return mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex);
+}
+
+static u16
+mlxsw_sp_bridge_8021d_fid_vid(struct mlxsw_sp_bridge_device *bridge_device,
+ const struct mlxsw_sp_fid *fid)
+{
+ return 0;
}
static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = {
.port_join = mlxsw_sp_bridge_8021d_port_join,
.port_leave = mlxsw_sp_bridge_8021d_port_leave,
+ .vxlan_join = mlxsw_sp_bridge_8021d_vxlan_join,
+ .vxlan_leave = mlxsw_sp_bridge_8021d_vxlan_leave,
.fid_get = mlxsw_sp_bridge_8021d_fid_get,
+ .fid_lookup = mlxsw_sp_bridge_8021d_fid_lookup,
+ .fid_vid = mlxsw_sp_bridge_8021d_fid_vid,
};
int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port,
@@ -2088,15 +2284,43 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port,
mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port);
}
+int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ const struct net_device *vxlan_dev,
+ struct netlink_ext_ack *extack)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (WARN_ON(!bridge_device))
+ return -EINVAL;
+
+ return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, extack);
+}
+
+void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp,
+ const struct net_device *br_dev,
+ const struct net_device *vxlan_dev)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (WARN_ON(!bridge_device))
+ return;
+
+ bridge_device->ops->vxlan_leave(bridge_device, vxlan_dev);
+}
+
static void
mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type,
const char *mac, u16 vid,
- struct net_device *dev)
+ struct net_device *dev, bool offloaded)
{
struct switchdev_notifier_fdb_info info;
info.addr = mac;
info.vid = vid;
+ info.offloaded = offloaded;
call_switchdev_notifiers(type, dev, &info.info);
}
@@ -2148,7 +2372,7 @@ do_fdb_op:
if (!do_notification)
return;
type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
- mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+ mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
return;
@@ -2208,7 +2432,7 @@ do_fdb_op:
if (!do_notification)
return;
type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE;
- mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev);
+ mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding);
return;
@@ -2284,12 +2508,127 @@ out:
struct mlxsw_sp_switchdev_event_work {
struct work_struct work;
- struct switchdev_notifier_fdb_info fdb_info;
+ union {
+ struct switchdev_notifier_fdb_info fdb_info;
+ struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+ };
struct net_device *dev;
unsigned long event;
};
-static void mlxsw_sp_switchdev_event_work(struct work_struct *work)
+static void
+mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr,
+ enum mlxsw_sp_l3proto *proto,
+ union mlxsw_sp_l3addr *addr)
+{
+ if (vxlan_addr->sa.sa_family == AF_INET) {
+ addr->addr4 = vxlan_addr->sin.sin_addr.s_addr;
+ *proto = MLXSW_SP_L3_PROTO_IPV4;
+ } else {
+ addr->addr6 = vxlan_addr->sin6.sin6_addr;
+ *proto = MLXSW_SP_L3_PROTO_IPV6;
+ }
+}
+
+static void
+mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_switchdev_event_work *
+ switchdev_work,
+ struct mlxsw_sp_fid *fid, __be32 vni)
+{
+ struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info;
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct net_device *dev = switchdev_work->dev;
+ enum mlxsw_sp_l3proto proto;
+ union mlxsw_sp_l3addr addr;
+ int err;
+
+ fdb_info = &switchdev_work->fdb_info;
+ err = vxlan_fdb_find_uc(dev, fdb_info->addr, vni, &vxlan_fdb_info);
+ if (err)
+ return;
+
+ mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info.remote_ip,
+ &proto, &addr);
+
+ switch (switchdev_work->event) {
+ case SWITCHDEV_FDB_ADD_TO_DEVICE:
+ err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+ vxlan_fdb_info.eth_addr,
+ mlxsw_sp_fid_index(fid),
+ proto, &addr, true, false);
+ if (err)
+ return;
+ vxlan_fdb_info.offloaded = true;
+ call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+ &vxlan_fdb_info.info);
+ mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+ vxlan_fdb_info.eth_addr,
+ fdb_info->vid, dev, true);
+ break;
+ case SWITCHDEV_FDB_DEL_TO_DEVICE:
+ err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp,
+ vxlan_fdb_info.eth_addr,
+ mlxsw_sp_fid_index(fid),
+ proto, &addr, false,
+ false);
+ vxlan_fdb_info.offloaded = false;
+ call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+ &vxlan_fdb_info.info);
+ break;
+ }
+}
+
+static void
+mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work *
+ switchdev_work)
+{
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct net_device *dev = switchdev_work->dev;
+ struct net_device *br_dev;
+ struct mlxsw_sp *mlxsw_sp;
+ struct mlxsw_sp_fid *fid;
+ __be32 vni;
+ int err;
+
+ if (switchdev_work->event != SWITCHDEV_FDB_ADD_TO_DEVICE &&
+ switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE)
+ return;
+
+ if (!switchdev_work->fdb_info.added_by_user)
+ return;
+
+ if (!netif_running(dev))
+ return;
+ br_dev = netdev_master_upper_dev_get(dev);
+ if (!br_dev)
+ return;
+ if (!netif_is_bridge_master(br_dev))
+ return;
+ mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+ if (!mlxsw_sp)
+ return;
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (!bridge_device)
+ return;
+
+ fid = bridge_device->ops->fid_lookup(bridge_device,
+ switchdev_work->fdb_info.vid);
+ if (!fid)
+ return;
+
+ err = mlxsw_sp_fid_vni(fid, &vni);
+ if (err)
+ goto out;
+
+ mlxsw_sp_switchdev_bridge_vxlan_fdb_event(mlxsw_sp, switchdev_work, fid,
+ vni);
+
+out:
+ mlxsw_sp_fid_put(fid);
+}
+
+static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work)
{
struct mlxsw_sp_switchdev_event_work *switchdev_work =
container_of(work, struct mlxsw_sp_switchdev_event_work, work);
@@ -2299,6 +2638,11 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work)
int err;
rtnl_lock();
+ if (netif_is_vxlan(dev)) {
+ mlxsw_sp_switchdev_bridge_nve_fdb_event(switchdev_work);
+ goto out;
+ }
+
mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev);
if (!mlxsw_sp_port)
goto out;
@@ -2313,7 +2657,7 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work)
break;
mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
fdb_info->addr,
- fdb_info->vid, dev);
+ fdb_info->vid, dev, true);
break;
case SWITCHDEV_FDB_DEL_TO_DEVICE:
fdb_info = &switchdev_work->fdb_info;
@@ -2338,22 +2682,213 @@ out:
dev_put(dev);
}
+static void
+mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_switchdev_event_work *
+ switchdev_work)
+{
+ struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct net_device *dev = switchdev_work->dev;
+ u8 all_zeros_mac[ETH_ALEN] = { 0 };
+ enum mlxsw_sp_l3proto proto;
+ union mlxsw_sp_l3addr addr;
+ struct net_device *br_dev;
+ struct mlxsw_sp_fid *fid;
+ u16 vid;
+ int err;
+
+ vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+ br_dev = netdev_master_upper_dev_get(dev);
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (!bridge_device)
+ return;
+
+ fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+ if (!fid)
+ return;
+
+ mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+ &proto, &addr);
+
+ if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+ err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr);
+ if (err) {
+ mlxsw_sp_fid_put(fid);
+ return;
+ }
+ vxlan_fdb_info->offloaded = true;
+ call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+ &vxlan_fdb_info->info);
+ mlxsw_sp_fid_put(fid);
+ return;
+ }
+
+ /* The device has a single FDB table, whereas Linux has two - one
+ * in the bridge driver and another in the VxLAN driver. We only
+ * program an entry to the device if the MAC points to the VxLAN
+ * device in the bridge's FDB table
+ */
+ vid = bridge_device->ops->fid_vid(bridge_device, fid);
+ if (br_fdb_find_port(br_dev, vxlan_fdb_info->eth_addr, vid) != dev)
+ goto err_br_fdb_find;
+
+ err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+ mlxsw_sp_fid_index(fid), proto,
+ &addr, true, false);
+ if (err)
+ goto err_fdb_tunnel_uc_op;
+ vxlan_fdb_info->offloaded = true;
+ call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev,
+ &vxlan_fdb_info->info);
+ mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+ vxlan_fdb_info->eth_addr, vid, dev, true);
+
+ mlxsw_sp_fid_put(fid);
+
+ return;
+
+err_fdb_tunnel_uc_op:
+err_br_fdb_find:
+ mlxsw_sp_fid_put(fid);
+}
+
+static void
+mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_switchdev_event_work *
+ switchdev_work)
+{
+ struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+ struct mlxsw_sp_bridge_device *bridge_device;
+ struct net_device *dev = switchdev_work->dev;
+ struct net_device *br_dev = netdev_master_upper_dev_get(dev);
+ u8 all_zeros_mac[ETH_ALEN] = { 0 };
+ enum mlxsw_sp_l3proto proto;
+ union mlxsw_sp_l3addr addr;
+ struct mlxsw_sp_fid *fid;
+ u16 vid;
+
+ vxlan_fdb_info = &switchdev_work->vxlan_fdb_info;
+
+ bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev);
+ if (!bridge_device)
+ return;
+
+ fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni);
+ if (!fid)
+ return;
+
+ mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip,
+ &proto, &addr);
+
+ if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) {
+ mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr);
+ mlxsw_sp_fid_put(fid);
+ return;
+ }
+
+ mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr,
+ mlxsw_sp_fid_index(fid), proto, &addr,
+ false, false);
+ vid = bridge_device->ops->fid_vid(bridge_device, fid);
+ mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED,
+ vxlan_fdb_info->eth_addr, vid, dev, false);
+
+ mlxsw_sp_fid_put(fid);
+}
+
+static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work)
+{
+ struct mlxsw_sp_switchdev_event_work *switchdev_work =
+ container_of(work, struct mlxsw_sp_switchdev_event_work, work);
+ struct net_device *dev = switchdev_work->dev;
+ struct mlxsw_sp *mlxsw_sp;
+ struct net_device *br_dev;
+
+ rtnl_lock();
+
+ if (!netif_running(dev))
+ goto out;
+ br_dev = netdev_master_upper_dev_get(dev);
+ if (!br_dev)
+ goto out;
+ if (!netif_is_bridge_master(br_dev))
+ goto out;
+ mlxsw_sp = mlxsw_sp_lower_get(br_dev);
+ if (!mlxsw_sp)
+ goto out;
+
+ switch (switchdev_work->event) {
+ case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE:
+ mlxsw_sp_switchdev_vxlan_fdb_add(mlxsw_sp, switchdev_work);
+ break;
+ case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+ mlxsw_sp_switchdev_vxlan_fdb_del(mlxsw_sp, switchdev_work);
+ break;
+ }
+
+out:
+ rtnl_unlock();
+ kfree(switchdev_work);
+ dev_put(dev);
+}
+
+static int
+mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work *
+ switchdev_work,
+ struct switchdev_notifier_info *info)
+{
+ struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev);
+ struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info;
+ struct vxlan_config *cfg = &vxlan->cfg;
+
+ vxlan_fdb_info = container_of(info,
+ struct switchdev_notifier_vxlan_fdb_info,
+ info);
+
+ if (vxlan_fdb_info->remote_port != cfg->dst_port)
+ return -EOPNOTSUPP;
+ if (vxlan_fdb_info->remote_vni != cfg->vni)
+ return -EOPNOTSUPP;
+ if (vxlan_fdb_info->vni != cfg->vni)
+ return -EOPNOTSUPP;
+ if (vxlan_fdb_info->remote_ifindex)
+ return -EOPNOTSUPP;
+ if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr))
+ return -EOPNOTSUPP;
+ if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip))
+ return -EOPNOTSUPP;
+
+ switchdev_work->vxlan_fdb_info = *vxlan_fdb_info;
+
+ return 0;
+}
+
/* Called under rcu_read_lock() */
static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = switchdev_notifier_info_to_dev(ptr);
struct mlxsw_sp_switchdev_event_work *switchdev_work;
- struct switchdev_notifier_fdb_info *fdb_info = ptr;
+ struct switchdev_notifier_fdb_info *fdb_info;
+ struct switchdev_notifier_info *info = ptr;
+ struct net_device *br_dev;
+ int err;
- if (!mlxsw_sp_port_dev_lower_find_rcu(dev))
+ /* Tunnel devices are not our uppers, so check their master instead */
+ br_dev = netdev_master_upper_dev_get_rcu(dev);
+ if (!br_dev)
+ return NOTIFY_DONE;
+ if (!netif_is_bridge_master(br_dev))
+ return NOTIFY_DONE;
+ if (!mlxsw_sp_port_dev_lower_find_rcu(br_dev))
return NOTIFY_DONE;
switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC);
if (!switchdev_work)
return NOTIFY_BAD;
- INIT_WORK(&switchdev_work->work, mlxsw_sp_switchdev_event_work);
switchdev_work->dev = dev;
switchdev_work->event = event;
@@ -2362,6 +2897,11 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
case SWITCHDEV_FDB_DEL_TO_DEVICE: /* fall through */
case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */
case SWITCHDEV_FDB_DEL_TO_BRIDGE:
+ fdb_info = container_of(info,
+ struct switchdev_notifier_fdb_info,
+ info);
+ INIT_WORK(&switchdev_work->work,
+ mlxsw_sp_switchdev_bridge_fdb_event_work);
memcpy(&switchdev_work->fdb_info, ptr,
sizeof(switchdev_work->fdb_info));
switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC);
@@ -2375,6 +2915,16 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
*/
dev_hold(dev);
break;
+ case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: /* fall through */
+ case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE:
+ INIT_WORK(&switchdev_work->work,
+ mlxsw_sp_switchdev_vxlan_fdb_event_work);
+ err = mlxsw_sp_switchdev_vxlan_work_prepare(switchdev_work,
+ info);
+ if (err)
+ goto err_vxlan_work_prepare;
+ dev_hold(dev);
+ break;
default:
kfree(switchdev_work);
return NOTIFY_DONE;
@@ -2384,6 +2934,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused,
return NOTIFY_DONE;
+err_vxlan_work_prepare:
err_addr_alloc:
kfree(switchdev_work);
return NOTIFY_BAD;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 53020724c2f6..6f18f4d3322a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -24,6 +24,7 @@ enum {
MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34,
MLXSW_TRAP_ID_PKT_SAMPLE = 0x38,
MLXSW_TRAP_ID_FID_MISS = 0x3D,
+ MLXSW_TRAP_ID_DECAP_ECN0 = 0x40,
MLXSW_TRAP_ID_ARPBC = 0x50,
MLXSW_TRAP_ID_ARPUC = 0x51,
MLXSW_TRAP_ID_MTUERROR = 0x52,
@@ -59,6 +60,7 @@ enum {
MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
+ MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
MLXSW_TRAP_ID_ACL0 = 0x1C0,