diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
57 files changed, 5884 insertions, 1255 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 4bdf25059542..deef5a998985 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -614,7 +614,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, int i; buf->direct.buf = NULL; - buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; + buf->nbufs = DIV_ROUND_UP(size, PAGE_SIZE); buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index d25e16d2c319..109472d6b61f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -167,8 +167,13 @@ static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].rx_ppp = pfcrx; params->prof[i].tx_pause = !(pfcrx || pfctx); params->prof[i].tx_ppp = pfctx; - params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; - params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; + if (mlx4_low_memory_profile()) { + params->prof[i].tx_ring_size = MLX4_EN_MIN_TX_SIZE; + params->prof[i].rx_ring_size = MLX4_EN_MIN_RX_SIZE; + } else { + params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE; + params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE; + } params->prof[i].num_up = MLX4_EN_NUM_UP_LOW; params->prof[i].num_tx_rings_p_up = params->max_num_tx_rings_p_up; params->prof[i].tx_ring_num[TX] = params->max_num_tx_rings_p_up * diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 7262c6310650..4b4351141b94 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -406,7 +406,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; if (WARN_ON(!obj_per_chunk)) return -EINVAL; - num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk; + num_icm = DIV_ROUND_UP(nobj, obj_per_chunk); table->icm = kvcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL); if (!table->icm) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index c3228b89df46..485d856546c6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -72,7 +72,7 @@ #define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT) #define DEF_RX_RINGS 16 #define MAX_RX_RINGS 128 -#define MIN_RX_RINGS 4 +#define MIN_RX_RINGS 1 #define LOG_TXBB_SIZE 6 #define TXBB_SIZE BIT(LOG_TXBB_SIZE) #define HEADROOM (2048 / TXBB_SIZE + 1) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a53736c26c0c..a5a0823e5ada 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -308,10 +308,11 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_FLOW_TABLE: case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: - case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT: case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: case MLX5_CMD_OP_FPGA_DESTROY_QP: case MLX5_CMD_OP_DESTROY_GENERAL_OBJECT: + case MLX5_CMD_OP_DEALLOC_MEMIC: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -426,7 +427,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: - case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: + case MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT: case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: case MLX5_CMD_OP_FPGA_CREATE_QP: case MLX5_CMD_OP_FPGA_MODIFY_QP: @@ -435,6 +436,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_CREATE_GENERAL_OBJECT: case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT: case MLX5_CMD_OP_QUERY_GENERAL_OBJECT: + case MLX5_CMD_OP_ALLOC_MEMIC: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -599,8 +601,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); - MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); - MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); + MLX5_COMMAND_STR_CASE(ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_PACKET_REFORMAT_CONTEXT); MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); MLX5_COMMAND_STR_CASE(FPGA_CREATE_QP); @@ -617,6 +619,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT); MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT); MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(ALLOC_MEMIC); + MLX5_COMMAND_STR_CASE(DEALLOC_MEMIC); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index a4179122a279..4b85abb5c9f7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -109,6 +109,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, cq->cons_index = 0; cq->arm_sn = 0; cq->eq = eq; + cq->uid = MLX5_GET(create_cq_in, in, uid); refcount_set(&cq->refcount, 1); init_completion(&cq->free); if (!cq->comp) @@ -144,6 +145,7 @@ err_cmd: memset(dout, 0, sizeof(dout)); MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, din, uid, cq->uid); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } @@ -165,6 +167,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); + MLX5_SET(destroy_cq_in, in, uid, cq->uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; @@ -196,6 +199,7 @@ int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0}; MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); + MLX5_SET(modify_cq_in, in, uid, cq->uid); return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h index 0240aee9189e..d027ce00c8ce 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -133,7 +133,7 @@ TRACE_EVENT(mlx5_fs_del_fg, {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ - {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT, "REFORMAT"},\ {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ {MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH, "VLAN_PUSH"},\ @@ -252,10 +252,10 @@ TRACE_EVENT(mlx5_fs_add_rule, memcpy(__entry->destination, &rule->dest_attr, sizeof(__entry->destination)); - if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER && - rule->dest_attr.counter) + if (rule->dest_attr.type & + MLX5_FLOW_DESTINATION_TYPE_COUNTER) __entry->counter_id = - rule->dest_attr.counter->id; + rule->dest_attr.counter_id; ), TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", __entry->rule, __entry->fte, __entry->index, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0f189f873859..d7fbd5b6ac95 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -173,6 +173,7 @@ static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) } } +/* Use this function to get max num channels (rxqs/txqs) only to create netdev */ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return is_kdump_kernel() ? @@ -181,6 +182,13 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) MLX5E_MAX_NUM_CHANNELS); } +/* Use this function to get max num channels after netdev was created */ +static inline int mlx5e_get_netdev_max_channels(struct net_device *netdev) +{ + return min_t(unsigned int, netdev->num_rx_queues, + netdev->num_tx_queues); +} + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -205,18 +213,12 @@ struct mlx5e_umr_wqe { extern const char mlx5e_self_tests[][ETH_GSTRING_LEN]; -static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { - "rx_cqe_moder", - "tx_cqe_moder", - "rx_cqe_compress", - "rx_striding_rq", -}; - enum mlx5e_priv_flag { MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), MLX5E_PFLAG_TX_CQE_BASED_MODER = (1 << 1), MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 2), MLX5E_PFLAG_RX_STRIDING_RQ = (1 << 3), + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE = (1 << 4), }; #define MLX5E_SET_PFLAG(params, pflag, enable) \ @@ -298,6 +300,7 @@ struct mlx5e_dcbx_dp { enum { MLX5E_RQ_STATE_ENABLED, MLX5E_RQ_STATE_AM, + MLX5E_RQ_STATE_NO_CSUM_COMPLETE, }; struct mlx5e_cq { @@ -678,7 +681,7 @@ struct mlx5e_priv { struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; struct work_struct tx_timeout_work; - struct delayed_work update_stats_work; + struct work_struct update_stats_work; struct mlx5_core_dev *mdev; struct net_device *netdev; @@ -703,7 +706,7 @@ struct mlx5e_priv { }; struct mlx5e_profile { - void (*init)(struct mlx5_core_dev *mdev, + int (*init)(struct mlx5_core_dev *mdev, struct net_device *netdev, const struct mlx5e_profile *profile, void *ppriv); void (*cleanup)(struct mlx5e_priv *priv); @@ -715,7 +718,6 @@ struct mlx5e_profile { void (*disable)(struct mlx5e_priv *priv); void (*update_stats)(struct mlx5e_priv *priv); void (*update_carrier)(struct mlx5e_priv *priv); - int (*max_nch)(struct mlx5_core_dev *mdev); struct { mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; @@ -906,10 +908,16 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb); /* common netdev helpers */ +void mlx5e_create_q_counters(struct mlx5e_priv *priv); +void mlx5e_destroy_q_counters(struct mlx5e_priv *priv); +int mlx5e_open_drop_rq(struct mlx5e_priv *priv, + struct mlx5e_rq *drop_rq); +void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); + int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); -int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv); -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); @@ -925,8 +933,8 @@ int mlx5e_create_tises(struct mlx5e_priv *priv); void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); -void mlx5e_update_stats_work(struct work_struct *work); +void mlx5e_queue_update_stats(struct mlx5e_priv *priv); int mlx5e_bits_invert(unsigned long a, int size); typedef int (*change_hw_mtu_cb)(struct mlx5e_priv *priv); @@ -953,21 +961,32 @@ int mlx5e_ethtool_get_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal); int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal); +u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv); +u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, struct ethtool_flash *flash); /* mlx5e generic netdev management API */ +int mlx5e_netdev_init(struct net_device *netdev, + struct mlx5e_priv *priv, + struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv); +void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv); struct net_device* mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, - void *ppriv); + int nch, void *ppriv); int mlx5e_attach_netdev(struct mlx5e_priv *priv); void mlx5e_detach_netdev(struct mlx5e_priv *priv); void mlx5e_destroy_netdev(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 max_channels, u16 mtu); +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params); +void mlx5e_build_rss_params(struct mlx5e_params *params); u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 24e3b564964f..023dc4bccd28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -235,3 +235,211 @@ out: kfree(out); return err; } + +static u32 fec_supported_speeds[] = { + 10000, + 40000, + 25000, + 50000, + 56000, + 100000 +}; + +#define MLX5E_FEC_SUPPORTED_SPEEDS ARRAY_SIZE(fec_supported_speeds) + +/* get/set FEC admin field for a given speed */ +static int mlx5e_fec_admin_field(u32 *pplm, + u8 *fec_policy, + bool write, + u32 speed) +{ + switch (speed) { + case 10000: + case 40000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_cap_10g_40g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_10g_40g, *fec_policy); + break; + case 25000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_admin_25g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_25g, *fec_policy); + break; + case 50000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_admin_50g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_50g, *fec_policy); + break; + case 56000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_admin_56g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_56g, *fec_policy); + break; + case 100000: + if (!write) + *fec_policy = MLX5_GET(pplm_reg, pplm, + fec_override_admin_100g); + else + MLX5_SET(pplm_reg, pplm, + fec_override_admin_100g, *fec_policy); + break; + default: + return -EINVAL; + } + return 0; +} + +/* returns FEC capabilities for a given speed */ +static int mlx5e_get_fec_cap_field(u32 *pplm, + u8 *fec_cap, + u32 speed) +{ + switch (speed) { + case 10000: + case 40000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_admin_10g_40g); + break; + case 25000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_cap_25g); + break; + case 50000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_cap_50g); + break; + case 56000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_cap_56g); + break; + case 100000: + *fec_cap = MLX5_GET(pplm_reg, pplm, + fec_override_cap_100g); + break; + default: + return -EINVAL; + } + return 0; +} + +int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps) +{ + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + u32 current_fec_speed; + int err; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + err = mlx5e_port_linkspeed(dev, ¤t_fec_speed); + if (err) + return err; + + return mlx5e_get_fec_cap_field(out, fec_caps, current_fec_speed); +} + +int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, + u8 *fec_configured_mode) +{ + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + u32 link_speed; + int err; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + *fec_mode_active = MLX5_GET(pplm_reg, out, fec_mode_active); + + if (!fec_configured_mode) + return 0; + + err = mlx5e_port_linkspeed(dev, &link_speed); + if (err) + return err; + + return mlx5e_fec_admin_field(out, fec_configured_mode, 0, link_speed); +} + +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy) +{ + bool fec_mode_not_supp_in_speed = false; + u8 no_fec_policy = BIT(MLX5E_FEC_NOFEC); + u32 out[MLX5_ST_SZ_DW(pplm_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(pplm_reg)] = {}; + int sz = MLX5_ST_SZ_BYTES(pplm_reg); + u32 current_fec_speed; + u8 fec_caps = 0; + int err; + int i; + + if (!MLX5_CAP_GEN(dev, pcam_reg)) + return -EOPNOTSUPP; + + if (!MLX5_CAP_PCAM_REG(dev, pplm)) + return -EOPNOTSUPP; + + MLX5_SET(pplm_reg, in, local_port, 1); + err = mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 0); + if (err) + return err; + + err = mlx5e_port_linkspeed(dev, ¤t_fec_speed); + if (err) + return err; + + memset(in, 0, sz); + MLX5_SET(pplm_reg, in, local_port, 1); + for (i = 0; i < MLX5E_FEC_SUPPORTED_SPEEDS && !!fec_policy; i++) { + mlx5e_get_fec_cap_field(out, &fec_caps, fec_supported_speeds[i]); + /* policy supported for link speed */ + if (!!(fec_caps & fec_policy)) { + mlx5e_fec_admin_field(in, &fec_policy, 1, + fec_supported_speeds[i]); + } else { + if (fec_supported_speeds[i] == current_fec_speed) + return -EOPNOTSUPP; + mlx5e_fec_admin_field(in, &no_fec_policy, 1, + fec_supported_speeds[i]); + fec_mode_not_supp_in_speed = true; + } + } + + if (fec_mode_not_supp_in_speed) + mlx5_core_dbg(dev, + "FEC policy 0x%x is not supported for some speeds", + fec_policy); + + return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPLM, 0, 1); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index f8cbd8194179..cd2160b8c9bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -45,4 +45,16 @@ int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); + +int mlx5e_get_fec_caps(struct mlx5_core_dev *dev, u8 *fec_caps); +int mlx5e_get_fec_mode(struct mlx5_core_dev *dev, u32 *fec_mode_active, + u8 *fec_configured_mode); +int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u8 fec_policy); + +enum { + MLX5E_FEC_NOFEC, + MLX5E_FEC_FIRECODE, + MLX5E_FEC_RS_528_514, +}; + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 45cdde694d20..8657e0f26995 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -543,8 +543,11 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); - netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n", - __func__, arfs_rule->filter_id, arfs_rule->rxq, err); + priv->channel_stats[arfs_rule->rxq].rq.arfs_err++; + mlx5e_dbg(HW, priv, + "%s: add rule(filter id=%d, rq idx=%d, ip proto=0x%x) failed,err=%d\n", + __func__, arfs_rule->filter_id, arfs_rule->rxq, + tuple->ip_proto, err); } out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index db3278cc052b..3078491cc0d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -153,7 +153,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, - MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_); + MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST); MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 98dd3e0ada72..3e770abfd802 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -135,6 +135,14 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); } +static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { + "rx_cqe_moder", + "tx_cqe_moder", + "rx_cqe_compress", + "rx_striding_rq", + "rx_no_csum_complete", +}; + int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { int i, num_stats = 0; @@ -311,7 +319,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv, struct ethtool_channels *ch) { - ch->max_combined = priv->profile->max_nch(priv->mdev); + ch->max_combined = mlx5e_get_netdev_max_channels(priv->netdev); ch->combined_count = priv->channels.params.num_channels; } @@ -539,6 +547,70 @@ static void ptys2ethtool_adver_link(unsigned long *advertising_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); } +static const u32 pplm_fec_2_ethtool[] = { + [MLX5E_FEC_NOFEC] = ETHTOOL_FEC_OFF, + [MLX5E_FEC_FIRECODE] = ETHTOOL_FEC_BASER, + [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, +}; + +static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) +{ + int mode = 0; + + if (!fec_mode) + return ETHTOOL_FEC_AUTO; + + mode = find_first_bit(&fec_mode, size); + + if (mode < ARRAY_SIZE(pplm_fec_2_ethtool)) + return pplm_fec_2_ethtool[mode]; + + return 0; +} + +/* we use ETHTOOL_FEC_* offset and apply it to ETHTOOL_LINK_MODE_FEC_*_BIT */ +static u32 ethtool_fec2ethtool_caps(u_long ethtool_fec_code) +{ + u32 offset; + + offset = find_first_bit(ðtool_fec_code, sizeof(u32)); + offset -= ETHTOOL_FEC_OFF_BIT; + offset += ETHTOOL_LINK_MODE_FEC_NONE_BIT; + + return offset; +} + +static int get_fec_supported_advertised(struct mlx5_core_dev *dev, + struct ethtool_link_ksettings *link_ksettings) +{ + u_long fec_caps = 0; + u32 active_fec = 0; + u32 offset; + u32 bitn; + int err; + + err = mlx5e_get_fec_caps(dev, (u8 *)&fec_caps); + if (err) + return (err == -EOPNOTSUPP) ? 0 : err; + + err = mlx5e_get_fec_mode(dev, &active_fec, NULL); + if (err) + return err; + + for_each_set_bit(bitn, &fec_caps, ARRAY_SIZE(pplm_fec_2_ethtool)) { + u_long ethtool_bitmask = pplm_fec_2_ethtool[bitn]; + + offset = ethtool_fec2ethtool_caps(ethtool_bitmask); + __set_bit(offset, link_ksettings->link_modes.supported); + } + + active_fec = pplm2ethtool_fec(active_fec, sizeof(u32) * BITS_PER_BYTE); + offset = ethtool_fec2ethtool_caps(active_fec); + __set_bit(offset, link_ksettings->link_modes.advertising); + + return 0; +} + static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings, u32 eth_proto_cap, u8 connector_type) @@ -734,7 +806,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, if (err) { netdev_err(netdev, "%s: query port ptys failed: %d\n", __func__, err); - goto err_query_ptys; + goto err_query_regs; } eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); @@ -770,11 +842,17 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, AUTONEG_ENABLE; ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Autoneg); + + err = get_fec_supported_advertised(mdev, link_ksettings); + if (err) + netdev_dbg(netdev, "%s: FEC caps query failed: %d\n", + __func__, err); + if (!an_disable_admin) ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Autoneg); -err_query_ptys: +err_query_regs: return err; } @@ -852,18 +930,30 @@ out: return err; } +u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) +{ + return sizeof(priv->channels.params.toeplitz_hash_key); +} + static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - return sizeof(priv->channels.params.toeplitz_hash_key); + return mlx5e_ethtool_get_rxfh_key_size(priv); } -static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) +u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv) { return MLX5E_INDIR_RQT_SIZE; } +static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_indir_size(priv); +} + static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { @@ -1257,6 +1347,58 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static int mlx5e_get_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 fec_configured = 0; + u32 fec_active = 0; + int err; + + err = mlx5e_get_fec_mode(mdev, &fec_active, &fec_configured); + + if (err) + return err; + + fecparam->active_fec = pplm2ethtool_fec((u_long)fec_active, + sizeof(u32) * BITS_PER_BYTE); + + if (!fecparam->active_fec) + return -EOPNOTSUPP; + + fecparam->fec = pplm2ethtool_fec((u_long)fec_configured, + sizeof(u8) * BITS_PER_BYTE); + + return 0; +} + +static int mlx5e_set_fecparam(struct net_device *netdev, + struct ethtool_fecparam *fecparam) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u8 fec_policy = 0; + int mode; + int err; + + for (mode = 0; mode < ARRAY_SIZE(pplm_fec_2_ethtool); mode++) { + if (!(pplm_fec_2_ethtool[mode] & fecparam->fec)) + continue; + fec_policy |= (1 << mode); + break; + } + + err = mlx5e_set_fec_mode(mdev, fec_policy); + + if (err) + return err; + + mlx5_toggle_port_link(mdev); + + return 0; +} + static u32 mlx5e_get_msglevel(struct net_device *dev) { return ((struct mlx5e_priv *)netdev_priv(dev))->msglevel; @@ -1512,6 +1654,27 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable) return 0; } +static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *channels = &priv->channels; + struct mlx5e_channel *c; + int i; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + for (i = 0; i < channels->num; i++) { + c = channels->c[i]; + if (enable) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + else + __clear_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + } + + return 0; +} + static int mlx5e_handle_pflag(struct net_device *netdev, u32 wanted_flags, enum mlx5e_priv_flag flag, @@ -1563,6 +1726,12 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) err = mlx5e_handle_pflag(netdev, pflags, MLX5E_PFLAG_RX_STRIDING_RQ, set_pflag_rx_striding_rq); + if (err) + goto out; + + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, + set_pflag_rx_no_csum_complete); out: mutex_unlock(&priv->state_lock); @@ -1652,4 +1821,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .self_test = mlx5e_self_test, .get_msglevel = mlx5e_get_msglevel, .set_msglevel = mlx5e_set_msglevel, + .get_fecparam = mlx5e_get_fecparam, + .set_fecparam = mlx5e_set_fecparam, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 41cde926cdab..c18dcebe1462 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -131,14 +131,14 @@ set_ip4(void *headers_c, void *headers_v, __be32 ip4src_m, if (ip4src_m) { memcpy(MLX5E_FTE_ADDR_OF(headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), &ip4src_v, sizeof(ip4src_v)); - memset(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), - 0xff, sizeof(ip4src_m)); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), + &ip4src_m, sizeof(ip4src_m)); } if (ip4dst_m) { memcpy(MLX5E_FTE_ADDR_OF(headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ip4dst_v, sizeof(ip4dst_v)); - memset(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - 0xff, sizeof(ip4dst_m)); + memcpy(MLX5E_FTE_ADDR_OF(headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &ip4dst_m, sizeof(ip4dst_m)); } MLX5E_FTE_SET(headers_c, ethertype, 0xffff); @@ -173,11 +173,11 @@ set_tcp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v, __be16 pdst_m, __be16 pdst_v) { if (psrc_m) { - MLX5E_FTE_SET(headers_c, tcp_sport, 0xffff); + MLX5E_FTE_SET(headers_c, tcp_sport, ntohs(psrc_m)); MLX5E_FTE_SET(headers_v, tcp_sport, ntohs(psrc_v)); } if (pdst_m) { - MLX5E_FTE_SET(headers_c, tcp_dport, 0xffff); + MLX5E_FTE_SET(headers_c, tcp_dport, ntohs(pdst_m)); MLX5E_FTE_SET(headers_v, tcp_dport, ntohs(pdst_v)); } @@ -190,12 +190,12 @@ set_udp(void *headers_c, void *headers_v, __be16 psrc_m, __be16 psrc_v, __be16 pdst_m, __be16 pdst_v) { if (psrc_m) { - MLX5E_FTE_SET(headers_c, udp_sport, 0xffff); + MLX5E_FTE_SET(headers_c, udp_sport, ntohs(psrc_m)); MLX5E_FTE_SET(headers_v, udp_sport, ntohs(psrc_v)); } if (pdst_m) { - MLX5E_FTE_SET(headers_c, udp_dport, 0xffff); + MLX5E_FTE_SET(headers_c, udp_dport, ntohs(pdst_m)); MLX5E_FTE_SET(headers_v, udp_dport, ntohs(pdst_v)); } @@ -508,26 +508,14 @@ static int validate_tcpudp4(struct ethtool_rx_flow_spec *fs) if (l4_mask->tos) return -EINVAL; - if (l4_mask->ip4src) { - if (!all_ones(l4_mask->ip4src)) - return -EINVAL; + if (l4_mask->ip4src) ntuples++; - } - if (l4_mask->ip4dst) { - if (!all_ones(l4_mask->ip4dst)) - return -EINVAL; + if (l4_mask->ip4dst) ntuples++; - } - if (l4_mask->psrc) { - if (!all_ones(l4_mask->psrc)) - return -EINVAL; + if (l4_mask->psrc) ntuples++; - } - if (l4_mask->pdst) { - if (!all_ones(l4_mask->pdst)) - return -EINVAL; + if (l4_mask->pdst) ntuples++; - } /* Flow is TCP/UDP */ return ++ntuples; } @@ -540,16 +528,10 @@ static int validate_ip4(struct ethtool_rx_flow_spec *fs) if (l3_mask->l4_4_bytes || l3_mask->tos || fs->h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4) return -EINVAL; - if (l3_mask->ip4src) { - if (!all_ones(l3_mask->ip4src)) - return -EINVAL; + if (l3_mask->ip4src) ntuples++; - } - if (l3_mask->ip4dst) { - if (!all_ones(l3_mask->ip4dst)) - return -EINVAL; + if (l3_mask->ip4dst) ntuples++; - } if (l3_mask->proto) ntuples++; /* Flow is IPv4 */ @@ -588,16 +570,10 @@ static int validate_tcpudp6(struct ethtool_rx_flow_spec *fs) if (!ipv6_addr_any((struct in6_addr *)l4_mask->ip6dst)) ntuples++; - if (l4_mask->psrc) { - if (!all_ones(l4_mask->psrc)) - return -EINVAL; + if (l4_mask->psrc) ntuples++; - } - if (l4_mask->pdst) { - if (!all_ones(l4_mask->pdst)) - return -EINVAL; + if (l4_mask->pdst) ntuples++; - } /* Flow is TCP/UDP */ return ++ntuples; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f291d1bf1558..1243edbedc9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -272,10 +272,9 @@ static void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) mlx5e_stats_grps[i].update_stats(priv); } -void mlx5e_update_stats_work(struct work_struct *work) +static void mlx5e_update_stats_work(struct work_struct *work) { - struct delayed_work *dwork = to_delayed_work(work); - struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv, + struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, update_stats_work); mutex_lock(&priv->state_lock); @@ -283,6 +282,17 @@ void mlx5e_update_stats_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } +void mlx5e_queue_update_stats(struct mlx5e_priv *priv) +{ + if (!priv->profile->update_stats) + return; + + if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state))) + return; + + queue_work(priv->wq, &priv->update_stats_work); +} + static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { @@ -929,6 +939,9 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (params->rx_dim_enabled) __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) + __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); + return 0; err_destroy_rq: @@ -1786,7 +1799,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, struct mlx5e_channel_param *cparam) { struct mlx5e_priv *priv = c->priv; - int err, tc, max_nch = priv->profile->max_nch(priv->mdev); + int err, tc, max_nch = mlx5e_get_netdev_max_channels(priv->netdev); for (tc = 0; tc < params->num_tc; tc++) { int txq_ix = c->ix + tc * max_nch; @@ -2426,7 +2439,7 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) int err; int ix; - for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) { rqt = &priv->direct_tir[ix].rqt; err = mlx5e_create_rqt(priv, 1 /*size */, rqt); if (err) @@ -2447,7 +2460,7 @@ void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) { int i; - for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) + for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); } @@ -2541,7 +2554,7 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); } - for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) { struct mlx5e_redirect_rqt_param direct_rrp = { .is_rss = false, { @@ -2742,7 +2755,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) goto free_in; } - for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + for (ix = 0; ix < mlx5e_get_netdev_max_channels(priv->netdev); ix++) { err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in, inlen); if (err) @@ -2842,7 +2855,7 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) static void mlx5e_build_tc2txq_maps(struct mlx5e_priv *priv) { - int max_nch = priv->profile->max_nch(priv->mdev); + int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i, tc; for (i = 0; i < max_nch; i++) @@ -2954,9 +2967,7 @@ int mlx5e_open_locked(struct net_device *netdev) if (priv->profile->update_carrier) priv->profile->update_carrier(priv); - if (priv->profile->update_stats) - queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - + mlx5e_queue_update_stats(priv); return 0; err_clear_state_opened_flag: @@ -3049,8 +3060,8 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, return mlx5e_alloc_cq_common(mdev, param, cq); } -static int mlx5e_open_drop_rq(struct mlx5e_priv *priv, - struct mlx5e_rq *drop_rq) +int mlx5e_open_drop_rq(struct mlx5e_priv *priv, + struct mlx5e_rq *drop_rq) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_cq_param cq_param = {}; @@ -3094,7 +3105,7 @@ err_free_cq: return err; } -static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) +void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) { mlx5e_destroy_rq(drop_rq); mlx5e_free_rq(drop_rq); @@ -3175,7 +3186,7 @@ static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *t MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); } -int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) { struct mlx5e_tir *tir; void *tirc; @@ -3202,7 +3213,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) } } - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) goto out; for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { @@ -3236,7 +3247,7 @@ err_destroy_inner_tirs: int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) { - int nch = priv->profile->max_nch(priv->mdev); + int nch = mlx5e_get_netdev_max_channels(priv->netdev); struct mlx5e_tir *tir; void *tirc; int inlen; @@ -3273,14 +3284,14 @@ err_destroy_ch_tirs: return err; } -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) { int i; for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); - if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) return; for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) @@ -3289,7 +3300,7 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) { - int nch = priv->profile->max_nch(priv->mdev); + int nch = mlx5e_get_netdev_max_channels(priv->netdev); int i; for (i = 0; i < nch; i++) @@ -3381,9 +3392,6 @@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; - if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) - return -EOPNOTSUPP; - switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); @@ -3438,7 +3446,7 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) struct mlx5e_pport_stats *pstats = &priv->stats.pport; /* update HW stats in background for next time */ - queue_delayed_work(priv->wq, &priv->update_stats_work, 0); + mlx5e_queue_update_stats(priv); if (mlx5e_is_uplink_rep(priv)) { stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); @@ -4480,6 +4488,31 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); } +void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) +{ + /* Prefer Striding RQ, unless any of the following holds: + * - Striding RQ configuration is not possible/supported. + * - Slow PCI heuristic. + * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. + */ + if (!slow_pci_heuristic(mdev) && + mlx5e_striding_rq_possible(mdev, params) && + (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) || + !mlx5e_rx_is_linear_skb(mdev, params))) + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); + mlx5e_set_rq_type(mdev, params); + mlx5e_init_rq_type_params(mdev, params); +} + +void mlx5e_build_rss_params(struct mlx5e_params *params) +{ + params->rss_hfunc = ETH_RSS_HASH_XOR; + netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(params->indirection_rqt, + MLX5E_INDIR_RQT_SIZE, params->num_channels); +} + void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u16 max_channels, u16 mtu) @@ -4503,20 +4536,10 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->rx_cqe_compress_def = slow_pci_heuristic(mdev); MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false); /* RQ */ - /* Prefer Striding RQ, unless any of the following holds: - * - Striding RQ configuration is not possible/supported. - * - Slow PCI heuristic. - * - Legacy RQ would use linear SKB while Striding RQ would use non-linear. - */ - if (!slow_pci_heuristic(mdev) && - mlx5e_striding_rq_possible(mdev, params) && - (mlx5e_rx_mpwqe_is_linear_skb(mdev, params) || - !mlx5e_rx_is_linear_skb(mdev, params))) - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true); - mlx5e_set_rq_type(mdev, params); - mlx5e_init_rq_type_params(mdev, params); + mlx5e_build_rq_params(mdev, params); /* HW LRO */ @@ -4539,37 +4562,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); /* RSS */ - params->rss_hfunc = ETH_RSS_HASH_XOR; - netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(params->indirection_rqt, - MLX5E_INDIR_RQT_SIZE, max_channels); -} - -static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) -{ - struct mlx5e_priv *priv = netdev_priv(netdev); - - priv->mdev = mdev; - priv->netdev = netdev; - priv->profile = profile; - priv->ppriv = ppriv; - priv->msglevel = MLX5E_MSG_LEVEL; - priv->max_opened_tc = 1; - - mlx5e_build_nic_params(mdev, &priv->channels.params, - profile->max_nch(mdev), netdev->mtu); - - mutex_init(&priv->state_lock); - - INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); - INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); - INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); - INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); - - mlx5e_timestamp_init(priv); + mlx5e_build_rss_params(params); } static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) @@ -4707,7 +4700,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) mlx5e_tls_build_netdev(priv); } -static void mlx5e_create_q_counters(struct mlx5e_priv *priv) +void mlx5e_create_q_counters(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; int err; @@ -4725,7 +4718,7 @@ static void mlx5e_create_q_counters(struct mlx5e_priv *priv) } } -static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) +void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) { if (priv->q_counter) mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); @@ -4734,15 +4727,23 @@ static void mlx5e_destroy_q_counters(struct mlx5e_priv *priv) mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter); } -static void mlx5e_nic_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +static int mlx5e_nic_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; - mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); + err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); + if (err) + return err; + + mlx5e_build_nic_params(mdev, &priv->channels.params, + mlx5e_get_netdev_max_channels(netdev), netdev->mtu); + + mlx5e_timestamp_init(priv); + err = mlx5e_ipsec_init(priv); if (err) mlx5_core_err(mdev, "IPSec initialization failed, %d\n", err); @@ -4751,12 +4752,15 @@ static void mlx5e_nic_init(struct mlx5_core_dev *mdev, mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); mlx5e_build_nic_netdev(netdev); mlx5e_build_tc2txq_maps(priv); + + return 0; } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); + mlx5e_netdev_cleanup(priv->netdev, priv); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) @@ -4764,15 +4768,23 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) struct mlx5_core_dev *mdev = priv->mdev; int err; + mlx5e_create_q_counters(priv); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_destroy_q_counters; + } + err = mlx5e_create_indirect_rqt(priv); if (err) - return err; + goto err_close_drop_rq; err = mlx5e_create_direct_rqts(priv); if (err) goto err_destroy_indirect_rqts; - err = mlx5e_create_indirect_tirs(priv); + err = mlx5e_create_indirect_tirs(priv, true); if (err) goto err_destroy_direct_rqts; @@ -4797,11 +4809,15 @@ err_destroy_flow_steering: err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: mlx5e_destroy_direct_rqts(priv); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); +err_destroy_q_counters: + mlx5e_destroy_q_counters(priv); return err; } @@ -4810,9 +4826,11 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); - mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_indirect_tirs(priv, true); mlx5e_destroy_direct_rqts(priv); mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_destroy_q_counters(priv); } static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) @@ -4905,7 +4923,6 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, .update_stats = mlx5e_update_ndo_stats, - .max_nch = mlx5e_get_max_num_channels, .update_carrier = mlx5e_update_carrier, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, @@ -4914,13 +4931,53 @@ static const struct mlx5e_profile mlx5e_nic_profile = { /* mlx5e generic netdev management API (move to en_common.c) */ +/* mlx5e_netdev_init/cleanup must be called from profile->init/cleanup callbacks */ +int mlx5e_netdev_init(struct net_device *netdev, + struct mlx5e_priv *priv, + struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + /* priv init */ + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + priv->msglevel = MLX5E_MSG_LEVEL; + priv->max_opened_tc = 1; + + mutex_init(&priv->state_lock); + INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); + INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); + INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work); + INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + + priv->wq = create_singlethread_workqueue("mlx5e"); + if (!priv->wq) + return -ENOMEM; + + /* netdev init */ + netif_carrier_off(netdev); + +#ifdef CONFIG_MLX5_EN_ARFS + netdev->rx_cpu_rmap = mdev->rmap; +#endif + + return 0; +} + +void mlx5e_netdev_cleanup(struct net_device *netdev, struct mlx5e_priv *priv) +{ + destroy_workqueue(priv->wq); +} + struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, + int nch, void *ppriv) { - int nch = profile->max_nch(mdev); struct net_device *netdev; - struct mlx5e_priv *priv; + int err; netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * profile->max_tc, @@ -4930,25 +4987,15 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, return NULL; } -#ifdef CONFIG_MLX5_EN_ARFS - netdev->rx_cpu_rmap = mdev->rmap; -#endif - - profile->init(mdev, netdev, profile, ppriv); - - netif_carrier_off(netdev); - - priv = netdev_priv(netdev); - - priv->wq = create_singlethread_workqueue("mlx5e"); - if (!priv->wq) - goto err_cleanup_nic; + err = profile->init(mdev, netdev, profile, ppriv); + if (err) { + mlx5_core_err(mdev, "failed to init mlx5e profile %d\n", err); + goto err_free_netdev; + } return netdev; -err_cleanup_nic: - if (profile->cleanup) - profile->cleanup(priv); +err_free_netdev: free_netdev(netdev); return NULL; @@ -4956,7 +5003,6 @@ err_cleanup_nic: int mlx5e_attach_netdev(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; const struct mlx5e_profile *profile; int err; @@ -4967,28 +5013,16 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv) if (err) goto out; - mlx5e_create_q_counters(priv); - - err = mlx5e_open_drop_rq(priv, &priv->drop_rq); - if (err) { - mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - goto err_destroy_q_counters; - } - err = profile->init_rx(priv); if (err) - goto err_close_drop_rq; + goto err_cleanup_tx; if (profile->enable) profile->enable(priv); return 0; -err_close_drop_rq: - mlx5e_close_drop_rq(&priv->drop_rq); - -err_destroy_q_counters: - mlx5e_destroy_q_counters(priv); +err_cleanup_tx: profile->cleanup_tx(priv); out: @@ -5006,10 +5040,8 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv) flush_workqueue(priv->wq); profile->cleanup_rx(priv); - mlx5e_close_drop_rq(&priv->drop_rq); - mlx5e_destroy_q_counters(priv); profile->cleanup_tx(priv); - cancel_delayed_work_sync(&priv->update_stats_work); + cancel_work_sync(&priv->update_stats_work); } void mlx5e_destroy_netdev(struct mlx5e_priv *priv) @@ -5017,7 +5049,6 @@ void mlx5e_destroy_netdev(struct mlx5e_priv *priv) const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; - destroy_workqueue(priv->wq); if (profile->cleanup) profile->cleanup(priv); free_netdev(netdev); @@ -5066,6 +5097,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) void *rpriv = NULL; void *priv; int err; + int nch; err = mlx5e_check_required_hca_cap(mdev); if (err) @@ -5081,7 +5113,8 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) } #endif - netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv); + nch = mlx5e_get_max_num_channels(mdev); + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, nch, rpriv); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); goto err_free_rpriv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index c9cc9747d21d..c3c657548824 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -46,8 +46,6 @@ #define MLX5E_REP_PARAMS_LOG_SQ_SIZE \ max(0x6, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE) -#define MLX5E_REP_PARAMS_LOG_RQ_SIZE \ - max(0x6, MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE) static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -182,12 +180,108 @@ static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) } } +static void mlx5e_rep_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_ringparam(priv, param); +} + +static int mlx5e_rep_set_ringparam(struct net_device *dev, + struct ethtool_ringparam *param) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_set_ringparam(priv, param); +} + +static int mlx5e_replace_rep_vport_rx_rule(struct mlx5e_priv *priv, + struct mlx5_flow_destination *dest) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + struct mlx5_flow_handle *flow_rule; + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, + rep->vport, + dest); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + + mlx5_del_flow_rules(rpriv->vport_rx_rule); + rpriv->vport_rx_rule = flow_rule; + return 0; +} + +static void mlx5e_rep_get_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + mlx5e_ethtool_get_channels(priv, ch); +} + +static int mlx5e_rep_set_channels(struct net_device *dev, + struct ethtool_channels *ch) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + u16 curr_channels_amount = priv->channels.params.num_channels; + u32 new_channels_amount = ch->combined_count; + struct mlx5_flow_destination new_dest; + int err = 0; + + err = mlx5e_ethtool_set_channels(priv, ch); + if (err) + return err; + + if (curr_channels_amount == 1 && new_channels_amount > 1) { + new_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + new_dest.ft = priv->fs.ttc.ft.t; + } else if (new_channels_amount == 1 && curr_channels_amount > 1) { + new_dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + new_dest.tir_num = priv->direct_tir[0].tirn; + } else { + return 0; + } + + err = mlx5e_replace_rep_vport_rx_rule(priv, &new_dest); + if (err) { + netdev_warn(priv->netdev, "Failed to update vport rx rule, when going from (%d) channels to (%d) channels\n", + curr_channels_amount, new_channels_amount); + return err; + } + + return 0; +} + +static u32 mlx5e_rep_get_rxfh_key_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_key_size(priv); +} + +static u32 mlx5e_rep_get_rxfh_indir_size(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return mlx5e_ethtool_get_rxfh_indir_size(priv); +} + static const struct ethtool_ops mlx5e_rep_ethtool_ops = { .get_drvinfo = mlx5e_rep_get_drvinfo, .get_link = ethtool_op_get_link, .get_strings = mlx5e_rep_get_strings, .get_sset_count = mlx5e_rep_get_sset_count, .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, + .get_ringparam = mlx5e_rep_get_ringparam, + .set_ringparam = mlx5e_rep_set_ringparam, + .get_channels = mlx5e_rep_get_channels, + .set_channels = mlx5e_rep_set_channels, + .get_rxfh_key_size = mlx5e_rep_get_rxfh_key_size, + .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size, }; int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) @@ -759,9 +853,6 @@ static int mlx5e_rep_setup_tc_cb_egdev(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; - if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) - return -EOPNOTSUPP; - switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_EGRESS); @@ -775,9 +866,6 @@ static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data, { struct mlx5e_priv *priv = cb_priv; - if (!tc_cls_can_offload_and_chain0(priv->netdev, type_data)) - return -EOPNOTSUPP; - switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_rep_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS); @@ -898,8 +986,7 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) struct mlx5e_priv *priv = netdev_priv(dev); /* update HW stats in background for next time */ - queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - + mlx5e_queue_update_stats(priv); memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); } @@ -934,16 +1021,20 @@ static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, params->hard_mtu = MLX5E_ETH_HARD_MTU; params->sw_mtu = mtu; params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; - params->rq_wq_type = MLX5_WQ_TYPE_CYCLIC; - params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE; + /* RQ */ + mlx5e_build_rq_params(mdev, params); + + /* CQ moderation params */ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(params, cq_period_mode); params->num_tc = 1; - params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + + /* RSS */ + mlx5e_build_rss_params(params); } static void mlx5e_build_rep_netdev(struct net_device *netdev) @@ -963,6 +1054,16 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->features |= NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC; + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + + netdev->features |= netdev->hw_features; + eth_hw_addr_random(netdev); netdev->min_mtu = ETH_MIN_MTU; @@ -970,63 +1071,127 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev) netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu); } -static void mlx5e_init_rep(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +static int mlx5e_init_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); + int err; - priv->mdev = mdev; - priv->netdev = netdev; - priv->profile = profile; - priv->ppriv = ppriv; - - mutex_init(&priv->state_lock); + err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); + if (err) + return err; - INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); - priv->channels.params.num_channels = profile->max_nch(mdev); + priv->channels.params.num_channels = + mlx5e_get_netdev_max_channels(netdev); mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu); mlx5e_build_rep_netdev(netdev); mlx5e_timestamp_init(priv); + + return 0; } -static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) +static void mlx5e_cleanup_rep(struct mlx5e_priv *priv) +{ + mlx5e_netdev_cleanup(priv->netdev, priv); +} + +static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) +{ + struct ttc_params ttc_params = {}; + int tt, err; + + priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + /* The inner_ttc in the ttc params is intentionally not set */ + ttc_params.any_tt_tirn = priv->direct_tir[0].tirn; + mlx5e_set_ttc_ft_params(&ttc_params); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + ttc_params.indir_tirn[tt] = priv->indir_tir[tt].tirn; + + err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); + if (err) { + netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err); + return err; + } + return 0; +} + +static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_flow_handle *flow_rule; + struct mlx5_flow_destination dest; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = priv->direct_tir[0].tirn; + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, + rep->vport, + &dest); + if (IS_ERR(flow_rule)) + return PTR_ERR(flow_rule); + rpriv->vport_rx_rule = flow_rule; + return 0; +} + +static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; int err; mlx5e_init_l2_addr(priv); + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + return err; + } + + err = mlx5e_create_indirect_rqt(priv); + if (err) + goto err_close_drop_rq; + err = mlx5e_create_direct_rqts(priv); if (err) - return err; + goto err_destroy_indirect_rqts; - err = mlx5e_create_direct_tirs(priv); + err = mlx5e_create_indirect_tirs(priv, false); if (err) goto err_destroy_direct_rqts; - flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, - rep->vport, - priv->direct_tir[0].tirn); - if (IS_ERR(flow_rule)) { - err = PTR_ERR(flow_rule); + err = mlx5e_create_direct_tirs(priv); + if (err) + goto err_destroy_indirect_tirs; + + err = mlx5e_create_rep_ttc_table(priv); + if (err) goto err_destroy_direct_tirs; - } - rpriv->vport_rx_rule = flow_rule; + + err = mlx5e_create_rep_vport_rx_rule(priv); + if (err) + goto err_destroy_ttc_table; return 0; +err_destroy_ttc_table: + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv, false); err_destroy_direct_rqts: mlx5e_destroy_direct_rqts(priv); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); return err; } @@ -1035,8 +1200,12 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) struct mlx5e_rep_priv *rpriv = priv->ppriv; mlx5_del_flow_rules(rpriv->vport_rx_rule); + mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv, false); mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_close_drop_rq(&priv->drop_rq); } static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) @@ -1051,23 +1220,17 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) return 0; } -static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev) -{ -#define MLX5E_PORT_REPRESENTOR_NCH 1 - return MLX5E_PORT_REPRESENTOR_NCH; -} - static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, + .cleanup = mlx5e_cleanup_rep, .init_rx = mlx5e_init_rep_rx, .cleanup_rx = mlx5e_cleanup_rep_rx, .init_tx = mlx5e_init_rep_tx, .cleanup_tx = mlx5e_cleanup_nic_tx, .update_stats = mlx5e_rep_update_hw_counters, - .max_nch = mlx5e_get_rep_max_num_channels, .update_carrier = NULL, .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, - .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = 1, }; @@ -1127,13 +1290,14 @@ mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) struct mlx5e_rep_priv *rpriv; struct net_device *netdev; struct mlx5e_priv *upriv; - int err; + int nch, err; rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); if (!rpriv) return -ENOMEM; - netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, rpriv); + nch = mlx5e_get_max_num_channels(dev); + netdev = mlx5e_create_netdev(dev, &mlx5e_rep_profile, nch, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", rep->vport); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 00172dee5339..2f7fb8de6967 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -37,6 +37,7 @@ #include <net/busy_poll.h> #include <net/ip6_checksum.h> #include <net/page_pool.h> +#include <net/inet_ecn.h> #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -688,12 +689,29 @@ static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, skb_set_hash(skb, be32_to_cpu(cqe->rss_hash_result), ht); } -static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth) +static inline bool is_last_ethertype_ip(struct sk_buff *skb, int *network_depth, + __be16 *proto) { - __be16 ethertype = ((struct ethhdr *)skb->data)->h_proto; + *proto = ((struct ethhdr *)skb->data)->h_proto; + *proto = __vlan_get_protocol(skb, *proto, network_depth); + return (*proto == htons(ETH_P_IP) || *proto == htons(ETH_P_IPV6)); +} + +static inline void mlx5e_enable_ecn(struct mlx5e_rq *rq, struct sk_buff *skb) +{ + int network_depth = 0; + __be16 proto; + void *ip; + int rc; - ethertype = __vlan_get_protocol(skb, ethertype, network_depth); - return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); + if (unlikely(!is_last_ethertype_ip(skb, &network_depth, &proto))) + return; + + ip = skb->data + network_depth; + rc = ((proto == htons(ETH_P_IP)) ? IP_ECN_set_ce((struct iphdr *)ip) : + IP6_ECN_set_ce(skb, (struct ipv6hdr *)ip)); + + rq->stats->ecn_mark += !!rc; } static __be32 mlx5e_get_fcs(struct sk_buff *skb) @@ -735,6 +753,14 @@ static __be32 mlx5e_get_fcs(struct sk_buff *skb) return fcs_bytes; } +static u8 get_ip_proto(struct sk_buff *skb, __be16 proto) +{ + void *ip_p = skb->data + sizeof(struct ethhdr); + + return (proto == htons(ETH_P_IP)) ? ((struct iphdr *)ip_p)->protocol : + ((struct ipv6hdr *)ip_p)->nexthdr; +} + static inline void mlx5e_handle_csum(struct net_device *netdev, struct mlx5_cqe64 *cqe, struct mlx5e_rq *rq, @@ -743,6 +769,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, { struct mlx5e_rq_stats *stats = rq->stats; int network_depth = 0; + __be16 proto; if (unlikely(!(netdev->features & NETIF_F_RXCSUM))) goto csum_none; @@ -753,7 +780,13 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } - if (likely(is_last_ethertype_ip(skb, &network_depth))) { + if (unlikely(test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state))) + goto csum_unnecessary; + + if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { + if (unlikely(get_ip_proto(skb, proto) == IPPROTO_SCTP)) + goto csum_unnecessary; + skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); if (network_depth > ETH_HLEN) @@ -771,8 +804,10 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, return; } +csum_unnecessary: if (likely((cqe->hds_ip_ext & CQE_L3_OK) && - (cqe->hds_ip_ext & CQE_L4_OK))) { + ((cqe->hds_ip_ext & CQE_L4_OK) || + (get_cqe_l4_hdr_type(cqe) == CQE_L4_HDR_TYPE_NONE)))) { skb->ip_summed = CHECKSUM_UNNECESSARY; if (cqe_is_tunneled(cqe)) { skb->csum_level = 1; @@ -788,6 +823,8 @@ csum_none: stats->csum_none++; } +#define MLX5E_CE_BIT_MASK 0x80 + static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, u32 cqe_bcnt, struct mlx5e_rq *rq, @@ -832,6 +869,10 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); + /* checking CE bit in cqe - MSB in ml_path field */ + if (unlikely(cqe->ml_path & MLX5E_CE_BIT_MASK)) + mlx5e_enable_ecn(rq, skb); + skb->protocol = eth_type_trans(skb, netdev); } @@ -1228,8 +1269,8 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - struct mlx5e_rq_stats *stats = rq->stats; struct hwtstamp_config *tstamp; + struct mlx5e_rq_stats *stats; struct net_device *netdev; struct mlx5e_priv *priv; char *pseudo_header; @@ -1252,6 +1293,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, priv = mlx5i_epriv(netdev); tstamp = &priv->tstamp; + stats = &priv->channel_stats[rq->ix].rq; g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 6839481f7697..1e55b9c27ffc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -53,6 +53,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_ecn_mark) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_removed_vlan_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, @@ -92,6 +93,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_events) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_poll) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, ch_arm) }, @@ -131,7 +133,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) { + for (i = 0; i < mlx5e_get_netdev_max_channels(priv->netdev); i++) { struct mlx5e_channel_stats *channel_stats = &priv->channel_stats[i]; struct mlx5e_xdpsq_stats *xdpsq_red_stats = &channel_stats->xdpsq; @@ -144,6 +146,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_bytes += rq_stats->bytes; s->rx_lro_packets += rq_stats->lro_packets; s->rx_lro_bytes += rq_stats->lro_bytes; + s->rx_ecn_mark += rq_stats->ecn_mark; s->rx_removed_vlan_packets += rq_stats->removed_vlan_packets; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; @@ -168,6 +171,7 @@ void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv) s->rx_cache_busy += rq_stats->cache_busy; s->rx_cache_waive += rq_stats->cache_waive; s->rx_congst_umr += rq_stats->congst_umr; + s->rx_arfs_err += rq_stats->arfs_err; s->ch_events += ch_stats->events; s->ch_poll += ch_stats->poll; s->ch_arm += ch_stats->arm; @@ -610,46 +614,82 @@ static const struct counter_desc pport_phy_statistical_stats_desc[] = { { "rx_corrected_bits_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits) }, }; -#define NUM_PPORT_PHY_STATISTICAL_COUNTERS ARRAY_SIZE(pport_phy_statistical_stats_desc) +static const struct counter_desc +pport_phy_statistical_err_lanes_stats_desc[] = { + { "rx_err_lane_0_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane0) }, + { "rx_err_lane_1_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane1) }, + { "rx_err_lane_2_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane2) }, + { "rx_err_lane_3_phy", PPORT_PHY_STATISTICAL_OFF(phy_corrected_bits_lane3) }, +}; + +#define NUM_PPORT_PHY_STATISTICAL_COUNTERS \ + ARRAY_SIZE(pport_phy_statistical_stats_desc) +#define NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS \ + ARRAY_SIZE(pport_phy_statistical_err_lanes_stats_desc) static int mlx5e_grp_phy_get_num_stats(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; + int num_stats; + /* "1" for link_down_events special counter */ - return MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group) ? - NUM_PPORT_PHY_STATISTICAL_COUNTERS + 1 : 1; + num_stats = 1; + + num_stats += MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group) ? + NUM_PPORT_PHY_STATISTICAL_COUNTERS : 0; + + num_stats += MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters) ? + NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS : 0; + + return num_stats; } static int mlx5e_grp_phy_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) { + struct mlx5_core_dev *mdev = priv->mdev; int i; strcpy(data + (idx++) * ETH_GSTRING_LEN, "link_down_events_phy"); - if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) return idx; for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_phy_statistical_stats_desc[i].format); + + if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_phy_statistical_err_lanes_stats_desc[i].format); + return idx; } static int mlx5e_grp_phy_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { + struct mlx5_core_dev *mdev = priv->mdev; int i; /* link_down_events_phy has special handling since it is not stored in __be64 format */ data[idx++] = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); - if (!MLX5_CAP_PCAM_FEATURE((priv)->mdev, ppcnt_statistical_group)) + if (!MLX5_CAP_PCAM_FEATURE(mdev, ppcnt_statistical_group)) return idx; for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, pport_phy_statistical_stats_desc, i); + + if (MLX5_CAP_PCAM_FEATURE(mdev, per_lane_error_counters)) + for (i = 0; i < NUM_PPORT_PHY_STATISTICAL_PER_LANE_COUNTERS; i++) + data[idx++] = + MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, + pport_phy_statistical_err_lanes_stats_desc, + i); return idx; } @@ -1144,6 +1184,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_redirect) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, ecn_mark) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, removed_vlan_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) }, @@ -1158,6 +1199,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) }, }; static const struct counter_desc sq_stats_desc[] = { @@ -1211,7 +1253,7 @@ static const struct counter_desc ch_stats_desc[] = { static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) { - int max_nch = priv->profile->max_nch(priv->mdev); + int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); return (NUM_RQ_STATS * max_nch) + (NUM_CH_STATS * max_nch) + @@ -1223,7 +1265,7 @@ static int mlx5e_grp_channels_get_num_stats(struct mlx5e_priv *priv) static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, int idx) { - int max_nch = priv->profile->max_nch(priv->mdev); + int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i, j, tc; for (i = 0; i < max_nch; i++) @@ -1258,7 +1300,7 @@ static int mlx5e_grp_channels_fill_strings(struct mlx5e_priv *priv, u8 *data, static int mlx5e_grp_channels_fill_stats(struct mlx5e_priv *priv, u64 *data, int idx) { - int max_nch = priv->profile->max_nch(priv->mdev); + int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); int i, j, tc; for (i = 0; i < max_nch; i++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index a4c035aedd46..77f74ce11280 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -66,6 +66,7 @@ struct mlx5e_sw_stats { u64 tx_nop; u64 rx_lro_packets; u64 rx_lro_bytes; + u64 rx_ecn_mark; u64 rx_removed_vlan_packets; u64 rx_csum_unnecessary; u64 rx_csum_none; @@ -105,6 +106,7 @@ struct mlx5e_sw_stats { u64 rx_cache_busy; u64 rx_cache_waive; u64 rx_congst_umr; + u64 rx_arfs_err; u64 ch_events; u64 ch_poll; u64 ch_arm; @@ -184,6 +186,7 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 ecn_mark; u64 removed_vlan_packets; u64 xdp_drop; u64 xdp_redirect; @@ -200,6 +203,7 @@ struct mlx5e_rq_stats { u64 cache_busy; u64 cache_waive; u64 congst_umr; + u64 arfs_err; }; struct mlx5e_sq_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 85796727093e..608025ca5c04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -61,6 +61,7 @@ struct mlx5_nic_flow_attr { u32 hairpin_tirn; u8 match_level; struct mlx5_flow_table *hairpin_ft; + struct mlx5_fc *counter; }; #define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) @@ -73,6 +74,7 @@ enum { MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE + 2), MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 3), MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 4), + MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 5), }; #define MLX5E_TC_MAX_SPLITS 1 @@ -81,7 +83,7 @@ struct mlx5e_tc_flow { struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; - u8 flags; + u16 flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; struct list_head encap; /* flows sharing the same encap ID */ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ @@ -100,11 +102,6 @@ struct mlx5e_tc_flow_parse_attr { int mirred_ifindex; }; -enum { - MLX5_HEADER_TYPE_VXLAN = 0x0, - MLX5_HEADER_TYPE_NVGRE = 0x1, -}; - #define MLX5E_TC_TABLE_NUM_GROUPS 4 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16) @@ -532,7 +529,8 @@ static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv, #define UNKNOWN_MATCH_PRIO 8 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, u8 *match_prio) + struct mlx5_flow_spec *spec, u8 *match_prio, + struct netlink_ext_ack *extack) { void *headers_c, *headers_v; u8 prio_val, prio_mask = 0; @@ -540,8 +538,8 @@ static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, #ifdef CONFIG_MLX5_CORE_EN_DCB if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) { - netdev_warn(priv->netdev, - "only PCP trust state supported for hairpin\n"); + NL_SET_ERR_MSG_MOD(extack, + "only PCP trust state supported for hairpin"); return -EOPNOTSUPP; } #endif @@ -557,8 +555,8 @@ static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, if (!vlan_present || !prio_mask) { prio_val = UNKNOWN_MATCH_PRIO; } else if (prio_mask != 0x7) { - netdev_warn(priv->netdev, - "masked priority match not supported for hairpin\n"); + NL_SET_ERR_MSG_MOD(extack, + "masked priority match not supported for hairpin"); return -EOPNOTSUPP; } @@ -568,7 +566,8 @@ static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, - struct mlx5e_tc_flow_parse_attr *parse_attr) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) { int peer_ifindex = parse_attr->mirred_ifindex; struct mlx5_hairpin_params params; @@ -583,12 +582,13 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { - netdev_warn(priv->netdev, "hairpin is not supported\n"); + NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); return -EOPNOTSUPP; } peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id); - err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio); + err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio, + extack); if (err) return err; hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); @@ -674,29 +674,28 @@ static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, } } -static struct mlx5_flow_handle * +static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_flow_destination dest[2] = {}; struct mlx5_flow_act flow_act = { .action = attr->action, - .has_flow_tag = true, .flow_tag = attr->flow_tag, - .encap_id = 0, + .reformat_id = 0, + .flags = FLOW_ACT_HAS_TAG | FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; - struct mlx5_flow_handle *rule; bool table_created = false; int err, dest_ix = 0; if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { - err = mlx5e_hairpin_flow_add(priv, flow, parse_attr); + err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) { - rule = ERR_PTR(err); goto err_add_hairpin_flow; } if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { @@ -716,22 +715,21 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) { - rule = ERR_CAST(counter); + err = PTR_ERR(counter); goto err_fc_create; } dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[dest_ix].counter = counter; + dest[dest_ix].counter_id = mlx5_fc_id(counter); dest_ix++; + attr->counter = counter; } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); flow_act.modify_id = attr->mod_hdr_id; kfree(parse_attr->mod_hdr_actions); - if (err) { - rule = ERR_PTR(err); + if (err) goto err_create_mod_hdr_id; - } } if (IS_ERR_OR_NULL(priv->fs.tc.t)) { @@ -753,9 +751,11 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, MLX5E_TC_TABLE_NUM_GROUPS, MLX5E_TC_FT_LEVEL, 0); if (IS_ERR(priv->fs.tc.t)) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to create tc offload table\n"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); - rule = ERR_CAST(priv->fs.tc.t); + err = PTR_ERR(priv->fs.tc.t); goto err_create_ft; } @@ -765,13 +765,15 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, if (attr->match_level != MLX5_MATCH_NONE) parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, - &flow_act, dest, dest_ix); + flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, + &flow_act, dest, dest_ix); - if (IS_ERR(rule)) + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); goto err_add_rule; + } - return rule; + return 0; err_add_rule: if (table_created) { @@ -787,7 +789,7 @@ err_fc_create: if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) mlx5e_hairpin_flow_del(priv, flow); err_add_hairpin_flow: - return rule; + return err; } static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, @@ -796,7 +798,7 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_fc *counter = NULL; - counter = mlx5_flow_rule_counter(flow->rule[0]); + counter = attr->counter; mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter); @@ -819,30 +821,119 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, struct net_device *mirred_dev, struct net_device **encap_dev, - struct mlx5e_tc_flow *flow); + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); static struct mlx5_flow_handle * +mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *attr) +{ + struct mlx5_flow_handle *rule; + + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + if (IS_ERR(rule)) + return rule; + + if (attr->mirror_count) { + flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); + if (IS_ERR(flow->rule[1])) { + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + return flow->rule[1]; + } + } + + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + return rule; +} + +static void +mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *attr) +{ + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + + if (attr->mirror_count) + mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); + + mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); +} + +static struct mlx5_flow_handle * +mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_flow_spec *spec, + struct mlx5_esw_flow_attr *slow_attr) +{ + struct mlx5_flow_handle *rule; + + memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); + slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + slow_attr->mirror_count = 0, + slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN, + + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); + if (!IS_ERR(rule)) + flow->flags |= MLX5E_TC_FLOW_SLOW; + + return rule; +} + +static void +mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw, + struct mlx5e_tc_flow *flow, + struct mlx5_esw_flow_attr *slow_attr) +{ + memcpy(slow_attr, flow->esw_attr, sizeof(*slow_attr)); + mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); + flow->flags &= ~MLX5E_TC_FLOW_SLOW; +} + +static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u32 max_chain = mlx5_eswitch_get_chain_range(esw); struct mlx5_esw_flow_attr *attr = flow->esw_attr; + u16 max_prio = mlx5_eswitch_get_prio_range(esw); struct net_device *out_dev, *encap_dev = NULL; - struct mlx5_flow_handle *rule = NULL; + struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; - int err; + int err = 0, encap_err = 0; + + /* if prios are not supported, keep the old behaviour of using same prio + * for all offloaded rules. + */ + if (!mlx5_eswitch_prios_supported(esw)) + attr->prio = 1; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + if (attr->chain > max_chain) { + NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); + err = -EOPNOTSUPP; + goto err_max_prio_chain; + } + + if (attr->prio > max_prio) { + NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); + err = -EOPNOTSUPP; + goto err_max_prio_chain; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { out_dev = __dev_get_by_index(dev_net(priv->netdev), attr->parse_attr->mirred_ifindex); - err = mlx5e_attach_encap(priv, &parse_attr->tun_info, - out_dev, &encap_dev, flow); - if (err) { - rule = ERR_PTR(err); - if (err != -EAGAIN) - goto err_attach_encap; + encap_err = mlx5e_attach_encap(priv, &parse_attr->tun_info, + out_dev, &encap_dev, flow, + extack); + if (encap_err && encap_err != -EAGAIN) { + err = encap_err; + goto err_attach_encap; } out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; @@ -851,49 +942,58 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } err = mlx5_eswitch_add_vlan_action(esw, attr); - if (err) { - rule = ERR_PTR(err); + if (err) goto err_add_vlan; - } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); kfree(parse_attr->mod_hdr_actions); - if (err) { - rule = ERR_PTR(err); + if (err) goto err_mod_hdr; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + counter = mlx5_fc_create(esw->dev, true); + if (IS_ERR(counter)) { + err = PTR_ERR(counter); + goto err_create_counter; } + + attr->counter = counter; } - /* we get here if (1) there's no error (rule being null) or when + /* we get here if (1) there's no error or when * (2) there's an encap action and we're on -EAGAIN (no valid neigh) */ - if (rule != ERR_PTR(-EAGAIN)) { - rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(rule)) - goto err_add_rule; - - if (attr->mirror_count) { - flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &parse_attr->spec, attr); - if (IS_ERR(flow->rule[1])) - goto err_fwd_rule; - } + if (encap_err == -EAGAIN) { + /* continue with goto slow path rule instead */ + struct mlx5_esw_flow_attr slow_attr; + + flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec, &slow_attr); + } else { + flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); + } + + if (IS_ERR(flow->rule[0])) { + err = PTR_ERR(flow->rule[0]); + goto err_add_rule; } - return rule; -err_fwd_rule: - mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - rule = flow->rule[1]; + return 0; + err_add_rule: + mlx5_fc_destroy(esw->dev, counter); +err_create_counter: if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); err_mod_hdr: mlx5_eswitch_del_vlan_action(esw, attr); err_add_vlan: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) mlx5e_detach_encap(priv, flow); err_attach_encap: - return rule; +err_max_prio_chain: + return err; } static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, @@ -901,36 +1001,43 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5_esw_flow_attr slow_attr; if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - if (attr->mirror_count) - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr); - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + if (flow->flags & MLX5E_TC_FLOW_SLOW) + mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + else + mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); } mlx5_eswitch_del_vlan_action(esw, attr); - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) { mlx5e_detach_encap(priv, flow); kvfree(attr->parse_attr); } if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(esw->dev, attr->counter); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_esw_flow_attr *esw_attr; + struct mlx5_esw_flow_attr slow_attr, *esw_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; int err; - err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - e->encap_size, e->encap_header, - &e->encap_id); + err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + e->encap_size, e->encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); if (err) { mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", err); @@ -942,26 +1049,20 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, list_for_each_entry(flow, &e->flows, encap) { esw_attr = flow->esw_attr; esw_attr->encap_id = e->encap_id; - flow->rule[0] = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr); - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); + spec = &esw_attr->parse_attr->spec; + + /* update from slow path rule to encap rule */ + rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); continue; } - if (esw_attr->mirror_count) { - flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, &esw_attr->parse_attr->spec, esw_attr); - if (IS_ERR(flow->rule[1])) { - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], esw_attr); - err = PTR_ERR(flow->rule[1]); - mlx5_core_warn(priv->mdev, "Failed to update cached mirror flow, %d\n", - err); - continue; - } - } - - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */ + flow->rule[0] = rule; } } @@ -969,25 +1070,44 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr slow_attr; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; struct mlx5e_tc_flow *flow; + int err; list_for_each_entry(flow, &e->flows, encap) { - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - struct mlx5_esw_flow_attr *attr = flow->esw_attr; + spec = &flow->esw_attr->parse_attr->spec; - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; - if (attr->mirror_count) - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[1], attr); - mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr); + /* update from encap rule to slow path rule */ + rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec, &slow_attr); + + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", + err); + continue; } + + mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */ + flow->rule[0] = rule; } if (e->flags & MLX5_ENCAP_ENTRY_VALID) { e->flags &= ~MLX5_ENCAP_ENTRY_VALID; - mlx5_encap_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); } } +static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) +{ + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + return flow->esw_attr->counter; + else + return flow->nic_attr->counter; +} + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh *m_neigh = &nhe->m_neigh; @@ -1013,7 +1133,7 @@ void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) continue; list_for_each_entry(flow, &e->flows, encap) { if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - counter = mlx5_flow_rule_counter(flow->rule[0]); + counter = mlx5e_tc_get_counter(flow); mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { neigh_used = true; @@ -1053,7 +1173,7 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_encap_dealloc(priv->mdev, e->encap_id); + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); hash_del_rcu(&e->encap_hlist); kfree(e->encap_header); @@ -1105,6 +1225,7 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f) { + struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, @@ -1133,6 +1254,8 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) parse_vxlan_attr(spec, f); else { + NL_SET_ERR_MSG_MOD(extack, + "port isn't an offloaded vxlan udp dport"); netdev_warn(priv->netdev, "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->dst)); return -EOPNOTSUPP; @@ -1149,6 +1272,8 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, udp_sport, ntohs(key->src)); } else { /* udp dst port must be given */ vxlan_match_offload_err: + NL_SET_ERR_MSG_MOD(extack, + "IP tunnel decap offload supported only for vxlan, must set UDP dport"); netdev_warn(priv->netdev, "IP tunnel decap offload supported only for vxlan, must set UDP dport\n"); return -EOPNOTSUPP; @@ -1225,6 +1350,16 @@ vxlan_match_offload_err: MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + + if (mask->ttl && + !MLX5_CAP_ESW_FLOWTABLE_FDB + (priv->mdev, + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); + return -EOPNOTSUPP; + } + } /* Enforce DMAC when offloading incoming tunneled flows. @@ -1247,6 +1382,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, u8 *match_level) { + struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, @@ -1277,6 +1413,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, BIT(FLOW_DISSECTOR_KEY_TCP) | BIT(FLOW_DISSECTOR_KEY_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { + NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", f->dissector->used_keys); return -EOPNOTSUPP; @@ -1553,8 +1690,11 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, if (mask->ttl && !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, - ft_field_support.outer_ipv4_ttl)) + ft_field_support.outer_ipv4_ttl)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on TTL is not supported"); return -EOPNOTSUPP; + } if (mask->tos || mask->ttl) *match_level = MLX5_MATCH_L3; @@ -1596,6 +1736,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, udp_dport, ntohs(key->dst)); break; default: + NL_SET_ERR_MSG_MOD(extack, + "Only UDP and TCP transports are supported for L4 matching"); netdev_err(priv->netdev, "Only UDP and TCP transport are supported\n"); return -EINVAL; @@ -1632,6 +1774,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f) { + struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; @@ -1646,6 +1789,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, if (rep->vport != FDB_UPLINK_VPORT && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && esw->offloads.inline_mode < match_level)) { + NL_SET_ERR_MSG_MOD(extack, + "Flow is not offloaded due to min inline setting"); netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", match_level, esw->offloads.inline_mode); @@ -1747,7 +1892,8 @@ static struct mlx5_fields fields[] = { */ static int offload_pedit_fields(struct pedit_headers *masks, struct pedit_headers *vals, - struct mlx5e_tc_flow_parse_attr *parse_attr) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) { struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; int i, action_size, nactions, max_actions, first, last, next_z; @@ -1786,11 +1932,15 @@ static int offload_pedit_fields(struct pedit_headers *masks, continue; if (s_mask && a_mask) { + NL_SET_ERR_MSG_MOD(extack, + "can't set and add to the same HW field"); printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field); return -EOPNOTSUPP; } if (nactions == max_actions) { + NL_SET_ERR_MSG_MOD(extack, + "too many pedit actions, can't offload"); printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); return -EOPNOTSUPP; } @@ -1823,6 +1973,8 @@ static int offload_pedit_fields(struct pedit_headers *masks, next_z = find_next_zero_bit(&mask, field_bsize, first); last = find_last_bit(&mask, field_bsize); if (first < next_z && next_z < last) { + NL_SET_ERR_MSG_MOD(extack, + "rewrite of few sub-fields isn't supported"); printk(KERN_WARNING "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n", mask); return -EOPNOTSUPP; @@ -1881,7 +2033,8 @@ static const struct pedit_headers zero_masks = {}; static int parse_tc_pedit_action(struct mlx5e_priv *priv, const struct tc_action *a, int namespace, - struct mlx5e_tc_flow_parse_attr *parse_attr) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct netlink_ext_ack *extack) { struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks; int nkeys, i, err = -EOPNOTSUPP; @@ -1899,12 +2052,13 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv, err = -EOPNOTSUPP; /* can't be all optimistic */ if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { - netdev_warn(priv->netdev, "legacy pedit isn't offloaded\n"); + NL_SET_ERR_MSG_MOD(extack, + "legacy pedit isn't offloaded"); goto out_err; } if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) { - netdev_warn(priv->netdev, "pedit cmd %d isn't offloaded\n", cmd); + NL_SET_ERR_MSG_MOD(extack, "pedit cmd isn't offloaded"); goto out_err; } @@ -1921,13 +2075,15 @@ static int parse_tc_pedit_action(struct mlx5e_priv *priv, if (err) goto out_err; - err = offload_pedit_fields(masks, vals, parse_attr); + err = offload_pedit_fields(masks, vals, parse_attr, extack); if (err < 0) goto out_dealloc_parsed_actions; for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { cmd_masks = &masks[cmd]; if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { + NL_SET_ERR_MSG_MOD(extack, + "attempt to offload an unsupported field"); netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd); print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, 16, 1, cmd_masks, sizeof(zero_masks), true); @@ -1944,19 +2100,26 @@ out_err: return err; } -static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 update_flags) +static bool csum_offload_supported(struct mlx5e_priv *priv, + u32 action, + u32 update_flags, + struct netlink_ext_ack *extack) { u32 prot_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR | TCA_CSUM_UPDATE_FLAG_TCP | TCA_CSUM_UPDATE_FLAG_UDP; /* The HW recalcs checksums only if re-writing headers */ if (!(action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) { + NL_SET_ERR_MSG_MOD(extack, + "TC csum action is only offloaded with pedit"); netdev_warn(priv->netdev, "TC csum action is only offloaded with pedit\n"); return false; } if (update_flags & ~prot_flags) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload TC csum action for some header/s"); netdev_warn(priv->netdev, "can't offload TC csum action for some header/s - flags %#x\n", update_flags); @@ -1967,7 +2130,8 @@ static bool csum_offload_supported(struct mlx5e_priv *priv, u32 action, u32 upda } static bool modify_header_match_supported(struct mlx5_flow_spec *spec, - struct tcf_exts *exts) + struct tcf_exts *exts, + struct netlink_ext_ack *extack) { const struct tc_action *a; bool modify_ip_header; @@ -2005,6 +2169,8 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload re-write of non TCP/UDP"); pr_info("can't offload re-write of ip proto %d\n", ip_proto); return false; } @@ -2016,7 +2182,8 @@ out_ok: static bool actions_match_supported(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { u32 actions; @@ -2030,7 +2197,8 @@ static bool actions_match_supported(struct mlx5e_priv *priv, return false; if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return modify_header_match_supported(&parse_attr->spec, exts); + return modify_header_match_supported(&parse_attr->spec, exts, + extack); return true; } @@ -2043,15 +2211,16 @@ static bool same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) fmdev = priv->mdev; pmdev = peer_priv->mdev; - mlx5_query_nic_vport_system_image_guid(fmdev, &fsystem_guid); - mlx5_query_nic_vport_system_image_guid(pmdev, &psystem_guid); + fsystem_guid = mlx5_query_nic_system_image_guid(fmdev); + psystem_guid = mlx5_query_nic_system_image_guid(pmdev); return (fsystem_guid == psystem_guid); } static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { struct mlx5_nic_flow_attr *attr = flow->nic_attr; const struct tc_action *a; @@ -2075,7 +2244,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_pedit(a)) { err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr); + parse_attr, extack); if (err) return err; @@ -2086,7 +2255,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_csum(a)) { if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a))) + tcf_csum_update_flags(a), + extack)) continue; return -EOPNOTSUPP; @@ -2102,6 +2272,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; } else { + NL_SET_ERR_MSG_MOD(extack, + "device is not on same HW, can't offload"); netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", peer_dev->name); return -EINVAL; @@ -2113,8 +2285,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, u32 mark = tcf_skbedit_mark(a); if (mark & ~MLX5E_TC_FLOW_ID_MASK) { - netdev_warn(priv->netdev, "Bad flow mark - only 16 bit is supported: 0x%x\n", - mark); + NL_SET_ERR_MSG_MOD(extack, + "Bad flow mark - only 16 bit is supported"); return -EINVAL; } @@ -2127,7 +2299,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow)) + if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) return -EOPNOTSUPP; return 0; @@ -2331,7 +2503,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, return -ENOMEM; switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: fl4.flowi4_proto = IPPROTO_UDP; fl4.fl4_dport = tun_key->tp_dst; break; @@ -2375,7 +2547,7 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: gen_vxlan_header_ipv4(out_dev, encap_header, ipv4_encap_size, e->h_dest, tos, ttl, fl4.daddr, @@ -2395,8 +2567,10 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, goto out; } - err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - ipv4_encap_size, encap_header, &e->encap_id); + err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + ipv4_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); if (err) goto destroy_neigh_entry; @@ -2440,7 +2614,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, return -ENOMEM; switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: fl6.flowi6_proto = IPPROTO_UDP; fl6.fl6_dport = tun_key->tp_dst; break; @@ -2484,7 +2658,7 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, read_unlock_bh(&n->lock); switch (e->tunnel_type) { - case MLX5_HEADER_TYPE_VXLAN: + case MLX5_REFORMAT_TYPE_L2_TO_VXLAN: gen_vxlan_header_ipv6(out_dev, encap_header, ipv6_encap_size, e->h_dest, tos, ttl, &fl6.daddr, @@ -2505,8 +2679,10 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, goto out; } - err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, - ipv6_encap_size, encap_header, &e->encap_id); + err = mlx5_packet_reformat_alloc(priv->mdev, e->tunnel_type, + ipv6_encap_size, encap_header, + MLX5_FLOW_NAMESPACE_FDB, + &e->encap_id); if (err) goto destroy_neigh_entry; @@ -2529,7 +2705,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, struct net_device *mirred_dev, struct net_device **encap_dev, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; unsigned short family = ip_tunnel_info_af(tun_info); @@ -2547,6 +2724,8 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, /* setting udp src port isn't supported */ if (memchr_inv(&key->tp_src, 0, sizeof(key->tp_src))) { vxlan_encap_offload_err: + NL_SET_ERR_MSG_MOD(extack, + "must set udp dst port and not set udp src port"); netdev_warn(priv->netdev, "must set udp dst port and not set udp src port\n"); return -EOPNOTSUPP; @@ -2554,8 +2733,10 @@ vxlan_encap_offload_err: if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->tp_dst)) && MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) { - tunnel_type = MLX5_HEADER_TYPE_VXLAN; + tunnel_type = MLX5_REFORMAT_TYPE_L2_TO_VXLAN; } else { + NL_SET_ERR_MSG_MOD(extack, + "port isn't an offloaded vxlan udp dport"); netdev_warn(priv->netdev, "%d isn't an offloaded vxlan udp dport\n", be16_to_cpu(key->tp_dst)); return -EOPNOTSUPP; @@ -2660,8 +2841,10 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv, static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, struct mlx5e_tc_flow_parse_attr *parse_attr, - struct mlx5e_tc_flow *flow) + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct ip_tunnel_info *info = NULL; @@ -2686,7 +2869,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_pedit(a)) { err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, - parse_attr); + parse_attr, extack); if (err) return err; @@ -2697,7 +2880,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_csum(a)) { if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a))) + tcf_csum_update_flags(a), + extack)) continue; return -EOPNOTSUPP; @@ -2710,6 +2894,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, out_dev = tcf_mirred_dev(a); if (attr->out_count >= MLX5_MAX_FLOW_FWD_VPORTS) { + NL_SET_ERR_MSG_MOD(extack, + "can't support more output ports, can't offload forwarding"); pr_err("can't support more than %d output ports, can't offload forwarding\n", attr->out_count); return -EOPNOTSUPP; @@ -2728,11 +2914,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, parse_attr->mirred_ifindex = out_dev->ifindex; parse_attr->tun_info = *info; attr->parse_attr = parse_attr; - action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | + action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; /* attr->out_rep is resolved when we handle encap */ } else { + NL_SET_ERR_MSG_MOD(extack, + "devices are not on same switch HW, can't offload forwarding"); pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); return -EINVAL; @@ -2765,14 +2953,35 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_gact_goto_chain(a)) { + u32 dest_chain = tcf_gact_goto_chain_index(a); + u32 max_chain = mlx5_eswitch_get_chain_range(esw); + + if (dest_chain <= attr->chain) { + NL_SET_ERR_MSG(extack, "Goto earlier chain isn't supported"); + return -EOPNOTSUPP; + } + if (dest_chain > max_chain) { + NL_SET_ERR_MSG(extack, "Requested destination chain is out of supported range"); + return -EOPNOTSUPP; + } + action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->dest_chain = dest_chain; + + continue; + } + return -EINVAL; } attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow)) + if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) return -EOPNOTSUPP; if (attr->out_count > 1 && !mlx5_esw_has_fwd_fdb(priv->mdev)) { + NL_SET_ERR_MSG_MOD(extack, + "current firmware doesn't support split rule for port mirroring"); netdev_warn_once(priv->netdev, "current firmware doesn't support split rule for port mirroring\n"); return -EOPNOTSUPP; } @@ -2780,9 +2989,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return 0; } -static void get_flags(int flags, u8 *flow_flags) +static void get_flags(int flags, u16 *flow_flags) { - u8 __flow_flags = 0; + u16 __flow_flags = 0; if (flags & MLX5E_TC_INGRESS) __flow_flags |= MLX5E_TC_FLOW_INGRESS; @@ -2811,31 +3020,15 @@ static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv) return &priv->fs.tc.ht; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, - struct tc_cls_flower_offload *f, int flags) +static int +mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, + struct tc_cls_flower_offload *f, u16 flow_flags, + struct mlx5e_tc_flow_parse_attr **__parse_attr, + struct mlx5e_tc_flow **__flow) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; - struct rhashtable *tc_ht = get_tc_ht(priv); struct mlx5e_tc_flow *flow; - int attr_size, err = 0; - u8 flow_flags = 0; - - get_flags(flags, &flow_flags); - - flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (flow) { - netdev_warn_once(priv->netdev, "flow cookie %lx already exists, ignoring\n", f->cookie); - return 0; - } - - if (esw && esw->mode == SRIOV_OFFLOADS) { - flow_flags |= MLX5E_TC_FLOW_ESWITCH; - attr_size = sizeof(struct mlx5_esw_flow_attr); - } else { - flow_flags |= MLX5E_TC_FLOW_NIC; - attr_size = sizeof(struct mlx5_nic_flow_attr); - } + int err; flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); @@ -2849,45 +3042,161 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, flow->priv = priv; err = parse_cls_flower(priv, flow, &parse_attr->spec, f); - if (err < 0) + if (err) goto err_free; - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); - if (err < 0) - goto err_free; - flow->rule[0] = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); - } else { - err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); - if (err < 0) - goto err_free; - flow->rule[0] = mlx5e_tc_add_nic_flow(priv, parse_attr, flow); - } + *__flow = flow; + *__parse_attr = parse_attr; - if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - if (err != -EAGAIN) - goto err_free; - } + return 0; - if (err != -EAGAIN) - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; +err_free: + kfree(flow); + kvfree(parse_attr); + return err; +} + +static int +mlx5e_add_fdb_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct mlx5e_tc_flow **__flow) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; - if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || - !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)) + flow_flags |= MLX5E_TC_FLOW_ESWITCH; + attr_size = sizeof(struct mlx5_esw_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; + + flow->esw_attr->chain = f->common.chain_index; + flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack); + if (err) + goto err_free; + + err = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow, extack); + if (err) + goto err_free; + + if (!(flow->esw_attr->action & + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)) kvfree(parse_attr); - err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); - if (err) { - mlx5e_tc_del_flow(priv, flow); - kfree(flow); - } + *__flow = flow; + + return 0; +err_free: + kfree(flow); + kvfree(parse_attr); +out: return err; +} + +static int +mlx5e_add_nic_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + u16 flow_flags, + struct mlx5e_tc_flow **__flow) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_tc_flow *flow; + int attr_size, err; + + /* multi-chain not supported for NIC rules */ + if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) + return -EOPNOTSUPP; + + flow_flags |= MLX5E_TC_FLOW_NIC; + attr_size = sizeof(struct mlx5_nic_flow_attr); + err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, + &parse_attr, &flow); + if (err) + goto out; + + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack); + if (err) + goto err_free; + + err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack); + if (err) + goto err_free; + + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + kvfree(parse_attr); + *__flow = flow; + + return 0; err_free: + kfree(flow); kvfree(parse_attr); +out: + return err; +} + +static int +mlx5e_tc_add_flow(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, + int flags, + struct mlx5e_tc_flow **flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u16 flow_flags; + int err; + + get_flags(flags, &flow_flags); + + if (!tc_can_offload_extack(priv->netdev, f->common.extack)) + return -EOPNOTSUPP; + + if (esw && esw->mode == SRIOV_OFFLOADS) + err = mlx5e_add_fdb_flow(priv, f, flow_flags, flow); + else + err = mlx5e_add_nic_flow(priv, f, flow_flags, flow); + + return err; +} + +int mlx5e_configure_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f, int flags) +{ + struct netlink_ext_ack *extack = f->common.extack; + struct rhashtable *tc_ht = get_tc_ht(priv); + struct mlx5e_tc_flow *flow; + int err = 0; + + flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + if (flow) { + NL_SET_ERR_MSG_MOD(extack, + "flow cookie already exists, ignoring"); + netdev_warn_once(priv->netdev, + "flow cookie %lx already exists, ignoring\n", + f->cookie); + goto out; + } + + err = mlx5e_tc_add_flow(priv, f, flags, &flow); + if (err) + goto out; + + err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); + if (err) + goto err_free; + + return 0; + +err_free: + mlx5e_tc_del_flow(priv, flow); kfree(flow); +out: return err; } @@ -2938,7 +3247,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) return 0; - counter = mlx5_flow_rule_counter(flow->rule[0]); + counter = mlx5e_tc_get_counter(flow); if (!counter) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index ea7dedc2d5ad..d004957328f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -263,7 +263,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw) esw_debug(dev, "Create FDB log_max_size(%d)\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + root_ns = mlx5_get_fdb_sub_ns(dev, 0); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); return -EOPNOTSUPP; @@ -1198,7 +1198,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, if (counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter = counter; + drop_ctr_dst.counter_id = mlx5_fc_id(counter); dst = &drop_ctr_dst; dest_num++; } @@ -1285,7 +1285,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, if (counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - drop_ctr_dst.counter = counter; + drop_ctr_dst.counter_id = mlx5_fc_id(counter); dst = &drop_ctr_dst; dest_num++; } @@ -1746,7 +1746,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index c17bfcab517c..aaafc9f17115 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -59,6 +59,10 @@ #define mlx5_esw_has_fwd_fdb(dev) \ MLX5_CAP_ESW_FLOWTABLE(dev, fdb_multi_path_to_table) +#define FDB_MAX_CHAIN 3 +#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) +#define FDB_MAX_PRIO 16 + struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_group *allow_untagged_spoofchk_grp; @@ -120,6 +124,13 @@ struct mlx5_vport { u16 enabled_events; }; +enum offloads_fdb_flags { + ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED = BIT(0), +}; + +extern const unsigned int ESW_POOLS[4]; + +#define PRIO_LEVELS 2 struct mlx5_eswitch_fdb { union { struct legacy_fdb { @@ -130,16 +141,24 @@ struct mlx5_eswitch_fdb { } legacy; struct offloads_fdb { - struct mlx5_flow_table *fast_fdb; - struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_table *slow_fdb; struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_handle *miss_rule_uni; struct mlx5_flow_handle *miss_rule_multi; int vlan_push_pop_refcount; + + struct { + struct mlx5_flow_table *fdb; + u32 num_rules; + } fdb_prio[FDB_MAX_CHAIN + 1][FDB_MAX_PRIO + 1][PRIO_LEVELS]; + /* Protects fdb_prio table */ + struct mutex fdb_prio_lock; + + int fdb_left[ARRAY_SIZE(ESW_POOLS)]; } offloads; }; + u32 flags; }; struct mlx5_esw_offload { @@ -181,6 +200,7 @@ struct mlx5_eswitch { struct mlx5_esw_offload offloads; int mode; + int nvports; }; void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); @@ -228,9 +248,23 @@ void mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr); +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr); + +bool +mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw); + +u16 +mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw); + +u32 +mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw); struct mlx5_flow_handle * -mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, + struct mlx5_flow_destination *dest); enum { SET_VLAN_STRIP = BIT(0), @@ -265,15 +299,22 @@ struct mlx5_esw_flow_attr { u32 encap_id; u32 mod_hdr_id; u8 match_level; + struct mlx5_fc *counter; + u32 chain; + u16 prio; + u32 dest_chain; struct mlx5e_tc_flow_parse_attr *parse_attr; }; -int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); -int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode); +int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, + struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, + struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); @@ -314,6 +355,11 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} + +#define FDB_MAX_CHAIN 1 +#define FDB_SLOW_PATH_CHAIN (FDB_MAX_CHAIN + 1) +#define FDB_MAX_PRIO 1 + #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3028e8d90920..9eac137790f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -37,33 +37,59 @@ #include <linux/mlx5/fs.h> #include "mlx5_core.h" #include "eswitch.h" +#include "en.h" +#include "fs_core.h" enum { FDB_FAST_PATH = 0, FDB_SLOW_PATH }; +#define fdb_prio_table(esw, chain, prio, level) \ + (esw)->fdb_table.offloads.fdb_prio[(chain)][(prio)][(level)] + +static struct mlx5_flow_table * +esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); +static void +esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level); + +bool mlx5_eswitch_prios_supported(struct mlx5_eswitch *esw) +{ + return (!!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)); +} + +u32 mlx5_eswitch_get_chain_range(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) + return FDB_MAX_CHAIN; + + return 0; +} + +u16 mlx5_eswitch_get_prio_range(struct mlx5_eswitch *esw) +{ + if (esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED) + return FDB_MAX_PRIO; + + return 1; +} + struct mlx5_flow_handle * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; - struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_table *ft = NULL; - struct mlx5_fc *counter = NULL; + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + bool mirror = !!(attr->mirror_count); struct mlx5_flow_handle *rule; + struct mlx5_flow_table *fdb; int j, i = 0; void *misc; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); - if (attr->mirror_count) - ft = esw->fdb_table.offloads.fwd_fdb; - else - ft = esw->fdb_table.offloads.fast_fdb; - flow_act.action = attr->action; /* if per flow vlan pop/push is emulated, don't set that into the firmware */ if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) @@ -81,23 +107,33 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - for (j = attr->mirror_count; j < attr->out_count; j++) { - dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest[i].vport.num = attr->out_rep[j]->vport; - dest[i].vport.vhca_id = - MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); - dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + if (attr->dest_chain) { + struct mlx5_flow_table *ft; + + ft = esw_get_prio_table(esw, attr->dest_chain, 1, 0); + if (IS_ERR(ft)) { + rule = ERR_CAST(ft); + goto err_create_goto_table; + } + + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = ft; i++; + } else { + for (j = attr->mirror_count; j < attr->out_count; j++) { + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest[i].vport.num = attr->out_rep[j]->vport; + dest[i].vport.vhca_id = + MLX5_CAP_GEN(attr->out_mdev[j], vhca_id); + dest[i].vport.vhca_id_valid = + !!MLX5_CAP_ESW(esw->dev, merged_eswitch); + i++; + } } } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - counter = mlx5_fc_create(esw->dev, true); - if (IS_ERR(counter)) { - rule = ERR_CAST(counter); - goto err_counter_alloc; - } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[i].counter = counter; + dest[i].counter_id = mlx5_fc_id(attr->counter); i++; } @@ -127,10 +163,16 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) - flow_act.encap_id = attr->encap_id; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) + flow_act.reformat_id = attr->encap_id; - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, i); + fdb = esw_get_prio_table(esw, attr->chain, attr->prio, !!mirror); + if (IS_ERR(fdb)) { + rule = ERR_CAST(fdb); + goto err_esw_get; + } + + rule = mlx5_add_flow_rules(fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) goto err_add_rule; else @@ -139,8 +181,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; err_add_rule: - mlx5_fc_destroy(esw->dev, counter); -err_counter_alloc: + esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); +err_esw_get: + if (attr->dest_chain) + esw_put_prio_table(esw, attr->dest_chain, 1, 0); +err_create_goto_table: return rule; } @@ -150,11 +195,25 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest[MLX5_MAX_FLOW_FWD_VPORTS + 1] = {}; - struct mlx5_flow_act flow_act = {0}; + struct mlx5_flow_act flow_act = { .flags = FLOW_ACT_NO_APPEND, }; + struct mlx5_flow_table *fast_fdb; + struct mlx5_flow_table *fwd_fdb; struct mlx5_flow_handle *rule; void *misc; int i; + fast_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 0); + if (IS_ERR(fast_fdb)) { + rule = ERR_CAST(fast_fdb); + goto err_get_fast; + } + + fwd_fdb = esw_get_prio_table(esw, attr->chain, attr->prio, 1); + if (IS_ERR(fwd_fdb)) { + rule = ERR_CAST(fwd_fdb); + goto err_get_fwd; + } + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; for (i = 0; i < attr->mirror_count; i++) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; @@ -164,7 +223,7 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, dest[i].vport.vhca_id_valid = !!MLX5_CAP_ESW(esw->dev, merged_eswitch); } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[i].ft = esw->fdb_table.offloads.fwd_fdb, + dest[i].ft = fwd_fdb, i++; misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); @@ -187,25 +246,57 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw, spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS; - rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fast_fdb, spec, &flow_act, dest, i); + rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); - if (!IS_ERR(rule)) - esw->offloads.num_flows++; + if (IS_ERR(rule)) + goto add_err; + esw->offloads.num_flows++; + + return rule; +add_err: + esw_put_prio_table(esw, attr->chain, attr->prio, 1); +err_get_fwd: + esw_put_prio_table(esw, attr->chain, attr->prio, 0); +err_get_fast: return rule; } +static void +__mlx5_eswitch_del_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr, + bool fwd_rule) +{ + bool mirror = (attr->mirror_count > 0); + + mlx5_del_flow_rules(rule); + esw->offloads.num_flows--; + + if (fwd_rule) { + esw_put_prio_table(esw, attr->chain, attr->prio, 1); + esw_put_prio_table(esw, attr->chain, attr->prio, 0); + } else { + esw_put_prio_table(esw, attr->chain, attr->prio, !!mirror); + if (attr->dest_chain) + esw_put_prio_table(esw, attr->dest_chain, 1, 0); + } +} + void mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_handle *rule, struct mlx5_esw_flow_attr *attr) { - struct mlx5_fc *counter = NULL; + __mlx5_eswitch_del_rule(esw, rule, attr, false); +} - counter = mlx5_flow_rule_counter(rule); - mlx5_del_flow_rules(rule); - mlx5_fc_destroy(esw->dev, counter); - esw->offloads.num_flows--; +void +mlx5_eswitch_del_fwd_rule(struct mlx5_eswitch *esw, + struct mlx5_flow_handle *rule, + struct mlx5_esw_flow_attr *attr) +{ + __mlx5_eswitch_del_rule(esw, rule, attr, true); } static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) @@ -294,7 +385,8 @@ int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); - fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && + !attr->dest_chain); err = esw_add_vlan_action_check(attr, push, pop, fwd); if (err) @@ -501,74 +593,170 @@ out: #define ESW_OFFLOADS_NUM_GROUPS 4 -static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +/* Firmware currently has 4 pool of 4 sizes that it supports (ESW_POOLS), + * and a virtual memory region of 16M (ESW_SIZE), this region is duplicated + * for each flow table pool. We can allocate up to 16M of each pool, + * and we keep track of how much we used via put/get_sz_to_pool. + * Firmware doesn't report any of this for now. + * ESW_POOL is expected to be sorted from large to small + */ +#define ESW_SIZE (16 * 1024 * 1024) +const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024, + 64 * 1024, 4 * 1024 }; + +static int +get_sz_from_pool(struct mlx5_eswitch *esw) +{ + int sz = 0, i; + + for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { + if (esw->fdb_table.offloads.fdb_left[i]) { + --esw->fdb_table.offloads.fdb_left[i]; + sz = ESW_POOLS[i]; + break; + } + } + + return sz; +} + +static void +put_sz_to_pool(struct mlx5_eswitch *esw, int sz) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) { + if (sz >= ESW_POOLS[i]) { + ++esw->fdb_table.offloads.fdb_left[i]; + break; + } + } +} + +static struct mlx5_flow_table * +create_next_size_table(struct mlx5_eswitch *esw, + struct mlx5_flow_namespace *ns, + u16 table_prio, + int level, + u32 flags) +{ + struct mlx5_flow_table *fdb; + int sz; + + sz = get_sz_from_pool(esw); + if (!sz) + return ERR_PTR(-ENOSPC); + + fdb = mlx5_create_auto_grouped_flow_table(ns, + table_prio, + sz, + ESW_OFFLOADS_NUM_GROUPS, + level, + flags); + if (IS_ERR(fdb)) { + esw_warn(esw->dev, "Failed to create FDB Table err %d (table prio: %d, level: %d, size: %d)\n", + (int)PTR_ERR(fdb), table_prio, level, sz); + put_sz_to_pool(esw, sz); + } + + return fdb; +} + +static struct mlx5_flow_table * +esw_get_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) { struct mlx5_core_dev *dev = esw->dev; - struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - int esw_size, err = 0; + struct mlx5_flow_namespace *ns; + int table_prio, l = 0; u32 flags = 0; - u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | - MLX5_CAP_GEN(dev, max_flow_counter_15_0); - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); - if (!root_ns) { - esw_warn(dev, "Failed to get FDB flow namespace\n"); - err = -EOPNOTSUPP; - goto out_namespace; - } + if (chain == FDB_SLOW_PATH_CHAIN) + return esw->fdb_table.offloads.slow_fdb; - esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), - max_flow_counter, ESW_OFFLOADS_NUM_GROUPS); + mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); - esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS, - 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + fdb = fdb_prio_table(esw, chain, prio, level).fdb; + if (fdb) { + /* take ref on earlier levels as well */ + while (level >= 0) + fdb_prio_table(esw, chain, prio, level--).num_rules++; + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + return fdb; + } - if (mlx5_esw_has_fwd_fdb(dev)) - esw_size >>= 1; + ns = mlx5_get_fdb_sub_ns(dev, chain); + if (!ns) { + esw_warn(dev, "Failed to get FDB sub namespace\n"); + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + return ERR_PTR(-EOPNOTSUPP); + } if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN; + flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); - fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, - esw_size, - ESW_OFFLOADS_NUM_GROUPS, 0, - flags); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); - goto out_namespace; - } - esw->fdb_table.offloads.fast_fdb = fdb; + table_prio = (chain * FDB_MAX_PRIO) + prio - 1; + + /* create earlier levels for correct fs_core lookup when + * connecting tables + */ + for (l = 0; l <= level; l++) { + if (fdb_prio_table(esw, chain, prio, l).fdb) { + fdb_prio_table(esw, chain, prio, l).num_rules++; + continue; + } - if (!mlx5_esw_has_fwd_fdb(dev)) - goto out_namespace; + fdb = create_next_size_table(esw, ns, table_prio, l, flags); + if (IS_ERR(fdb)) { + l--; + goto err_create_fdb; + } - fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, - esw_size, - ESW_OFFLOADS_NUM_GROUPS, 1, - flags); - if (IS_ERR(fdb)) { - err = PTR_ERR(fdb); - esw_warn(dev, "Failed to create fwd table err %d\n", err); - goto out_ft; + fdb_prio_table(esw, chain, prio, l).fdb = fdb; + fdb_prio_table(esw, chain, prio, l).num_rules = 1; } - esw->fdb_table.offloads.fwd_fdb = fdb; - return err; + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + return fdb; -out_ft: - mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb); -out_namespace: - return err; +err_create_fdb: + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); + if (l >= 0) + esw_put_prio_table(esw, chain, prio, l); + + return fdb; +} + +static void +esw_put_prio_table(struct mlx5_eswitch *esw, u32 chain, u16 prio, int level) +{ + int l; + + if (chain == FDB_SLOW_PATH_CHAIN) + return; + + mutex_lock(&esw->fdb_table.offloads.fdb_prio_lock); + + for (l = level; l >= 0; l--) { + if (--(fdb_prio_table(esw, chain, prio, l).num_rules) > 0) + continue; + + put_sz_to_pool(esw, fdb_prio_table(esw, chain, prio, l).fdb->max_fte); + mlx5_destroy_flow_table(fdb_prio_table(esw, chain, prio, l).fdb); + fdb_prio_table(esw, chain, prio, l).fdb = NULL; + } + + mutex_unlock(&esw->fdb_table.offloads.fdb_prio_lock); } -static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_fast_fdb_tables(struct mlx5_eswitch *esw) { - if (mlx5_esw_has_fwd_fdb(esw->dev)) - mlx5_destroy_flow_table(esw->fdb_table.offloads.fwd_fdb); - mlx5_destroy_flow_table(esw->fdb_table.offloads.fast_fdb); + /* If lazy creation isn't supported, deref the fast path tables */ + if (!(esw->fdb_table.flags & ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED)) { + esw_put_prio_table(esw, 0, 1, 1); + esw_put_prio_table(esw, 0, 1, 0); + } } #define MAX_PF_SQ 256 @@ -579,12 +767,13 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; + u32 *flow_group_in, max_flow_counter; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - int table_size, ix, err = 0; + int table_size, ix, err = 0, i; struct mlx5_flow_group *g; + u32 flags = 0, fdb_max; void *match_criteria; - u32 *flow_group_in; u8 *dmac; esw_debug(esw->dev, "Create offloads FDB Tables\n"); @@ -599,12 +788,29 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) goto ns_err; } - err = esw_create_offloads_fast_fdb_table(esw); - if (err) - goto fast_fdb_err; + max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); + fdb_max = 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size); + + esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d), groups(%d), max flow table size(2^%d))\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS, + fdb_max); + + for (i = 0; i < ARRAY_SIZE(ESW_POOLS); i++) + esw->fdb_table.offloads.fdb_left[i] = + ESW_POOLS[i] <= fdb_max ? ESW_SIZE / ESW_POOLS[i] : 0; table_size = nvports * MAX_SQ_NVPORTS + MAX_PF_SQ + 2; + /* create the slow path fdb with encap set, so further table instances + * can be created at run time while VFs are probed if the FW allows that. + */ + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) + flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | + MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); + + ft_attr.flags = flags; ft_attr.max_fte = table_size; ft_attr.prio = FDB_SLOW_PATH; @@ -616,6 +822,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.slow_fdb = fdb; + /* If lazy creation isn't supported, open the fast path tables now */ + if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, multi_fdb_encap) && + esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) { + esw->fdb_table.flags &= ~ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + esw_warn(dev, "Lazy creation of flow tables isn't supported, ignoring priorities\n"); + esw_get_prio_table(esw, 0, 1, 0); + esw_get_prio_table(esw, 0, 1, 1); + } else { + esw_debug(dev, "Lazy creation of flow tables supported, deferring table opening\n"); + esw->fdb_table.flags |= ESW_FDB_CHAINS_AND_PRIOS_SUPPORTED; + } + /* create send-to-vport group */ memset(flow_group_in, 0, inlen); MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -663,6 +881,7 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) if (err) goto miss_rule_err; + esw->nvports = nvports; kvfree(flow_group_in); return 0; @@ -671,10 +890,9 @@ miss_rule_err: miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: + esw_destroy_offloads_fast_fdb_tables(esw); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); slow_fdb_err: - esw_destroy_offloads_fast_fdb_table(esw); -fast_fdb_err: ns_err: kvfree(flow_group_in); return err; @@ -682,7 +900,7 @@ ns_err: static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) { - if (!esw->fdb_table.offloads.fast_fdb) + if (!esw->fdb_table.offloads.slow_fdb) return; esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); @@ -692,7 +910,7 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); - esw_destroy_offloads_fast_fdb_table(esw); + esw_destroy_offloads_fast_fdb_tables(esw); } static int esw_create_offloads_table(struct mlx5_eswitch *esw) @@ -775,10 +993,10 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) } struct mlx5_flow_handle * -mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, + struct mlx5_flow_destination *dest) { struct mlx5_flow_act flow_act = {0}; - struct mlx5_flow_destination dest = {}; struct mlx5_flow_handle *flow_rule; struct mlx5_flow_spec *spec; void *misc; @@ -796,12 +1014,10 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest.tir_num = tirn; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, - &flow_act, &dest, 1); + &flow_act, dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); goto out; @@ -812,29 +1028,35 @@ out: return flow_rule; } -static int esw_offloads_start(struct mlx5_eswitch *esw) +static int esw_offloads_start(struct mlx5_eswitch *esw, + struct netlink_ext_ack *extack) { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; if (esw->mode != SRIOV_LEGACY) { - esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); + NL_SET_ERR_MSG_MOD(extack, + "Can't set offloads mode, SRIOV legacy not enabled"); return -EINVAL; } mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); if (err) { - esw_warn(esw->dev, "Failed setting eswitch to offloads, err %d\n", err); + NL_SET_ERR_MSG_MOD(extack, + "Failed setting eswitch to offloads"); err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); - if (err1) - esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err1); + if (err1) { + NL_SET_ERR_MSG_MOD(extack, + "Failed setting eswitch back to legacy"); + } } if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) { if (mlx5_eswitch_inline_mode_get(esw, num_vfs, &esw->offloads.inline_mode)) { esw->offloads.inline_mode = MLX5_INLINE_MODE_L2; - esw_warn(esw->dev, "Inline mode is different between vports\n"); + NL_SET_ERR_MSG_MOD(extack, + "Inline mode is different between vports"); } } return err; @@ -945,6 +1167,8 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) { int err; + mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); + err = esw_create_offloads_fdb_tables(esw, nvports); if (err) return err; @@ -975,17 +1199,20 @@ create_ft_err: return err; } -static int esw_offloads_stop(struct mlx5_eswitch *esw) +static int esw_offloads_stop(struct mlx5_eswitch *esw, + struct netlink_ext_ack *extack) { int err, err1, num_vfs = esw->dev->priv.sriov.num_vfs; mlx5_eswitch_disable_sriov(esw); err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); if (err) { - esw_warn(esw->dev, "Failed setting eswitch to legacy, err %d\n", err); + NL_SET_ERR_MSG_MOD(extack, "Failed setting eswitch to legacy"); err1 = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); - if (err1) - esw_warn(esw->dev, "Failed setting eswitch back to offloads, err %d\n", err); + if (err1) { + NL_SET_ERR_MSG_MOD(extack, + "Failed setting eswitch back to offloads"); + } } /* enable back PF RoCE */ @@ -1094,7 +1321,8 @@ static int mlx5_devlink_eswitch_check(struct devlink *devlink) return 0; } -int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); u16 cur_mlx5_mode, mlx5_mode = 0; @@ -1113,9 +1341,9 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) return 0; if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) - return esw_offloads_start(dev->priv.eswitch); + return esw_offloads_start(dev->priv.eswitch, extack); else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) - return esw_offloads_stop(dev->priv.eswitch); + return esw_offloads_stop(dev->priv.eswitch, extack); else return -EINVAL; } @@ -1132,7 +1360,8 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) return esw_mode_to_devlink(dev->priv.eswitch->mode, mode); } -int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) +int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -1149,14 +1378,15 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) return 0; /* fall through */ case MLX5_CAP_INLINE_MODE_L2: - esw_warn(dev, "Inline mode can't be set\n"); + NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); return -EOPNOTSUPP; case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT: break; } if (esw->offloads.num_flows > 0) { - esw_warn(dev, "Can't set inline mode when flows are configured\n"); + NL_SET_ERR_MSG_MOD(extack, + "Can't set inline mode when flows are configured"); return -EOPNOTSUPP; } @@ -1167,8 +1397,8 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode) for (vport = 1; vport < esw->enabled_vports; vport++) { err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode); if (err) { - esw_warn(dev, "Failed to set min inline on vport %d\n", - vport); + NL_SET_ERR_MSG_MOD(extack, + "Failed to set min inline on vport"); goto revert_inline_mode; } } @@ -1234,7 +1464,8 @@ out: return 0; } -int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap, + struct netlink_ext_ack *extack) { struct mlx5_core_dev *dev = devlink_priv(devlink); struct mlx5_eswitch *esw = dev->priv.eswitch; @@ -1245,7 +1476,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) return err; if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && - (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) || + (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) || !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) return -EOPNOTSUPP; @@ -1261,19 +1492,24 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) return 0; if (esw->offloads.num_flows > 0) { - esw_warn(dev, "Can't set encapsulation when flows are configured\n"); + NL_SET_ERR_MSG_MOD(extack, + "Can't set encapsulation when flows are configured"); return -EOPNOTSUPP; } - esw_destroy_offloads_fast_fdb_table(esw); + esw_destroy_offloads_fdb_tables(esw); esw->offloads.encap = encap; - err = esw_create_offloads_fast_fdb_table(esw); + + err = esw_create_offloads_fdb_tables(esw, esw->nvports); + if (err) { - esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err); + NL_SET_ERR_MSG_MOD(extack, + "Failed re-creating fast FDB table"); esw->offloads.encap = !encap; - (void)esw_create_offloads_fast_fdb_table(esw); + (void)esw_create_offloads_fdb_tables(esw, esw->nvports); } + return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index b8ee9101c506..515e3d6de051 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -649,7 +649,7 @@ static bool mlx5_is_fpga_egress_ipsec_rule(struct mlx5_core_dev *dev, (match_criteria_enable & ~(MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS)) || (flow_act->action & ~(MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_ALLOW)) || - flow_act->has_flow_tag) + (flow_act->flags & FLOW_ACT_HAS_TAG)) return false; return true; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 8e01f818021b..08a891f9aade 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -152,7 +152,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *next_ft, unsigned int *table_id, u32 flags) { - int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN); + int en_encap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT); + int en_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP); u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; int err; @@ -169,9 +170,9 @@ static int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, } MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en, - en_encap_decap); - MLX5_SET(create_flow_table_in, in, flow_table_context.encap_en, - en_encap_decap); + en_decap); + MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en, + en_encap); switch (op_mod) { case FS_FT_OP_MOD_NORMAL: @@ -343,7 +344,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, flow_tag, fte->action.flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action.action); - MLX5_SET(flow_context, in_flow_context, encap_id, fte->action.encap_id); + MLX5_SET(flow_context, in_flow_context, packet_reformat_id, + fte->action.reformat_id); MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->action.modify_id); @@ -417,7 +419,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, continue; MLX5_SET(flow_counter_list, in_dests, flow_counter_id, - dst->dest_attr.counter->id); + dst->dest_attr.counter_id); in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); list_size++; } @@ -594,62 +596,78 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, *bytes = MLX5_GET64(traffic_counter, stats, octets); } -int mlx5_encap_alloc(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id) +int mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev, + int reformat_type, + size_t size, + void *reformat_data, + enum mlx5_flow_namespace_type namespace, + u32 *packet_reformat_id) { - int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); - u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)]; - void *encap_header_in; - void *header; + u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)]; + void *packet_reformat_context_in; + int max_encap_size; + void *reformat; int inlen; int err; u32 *in; + if (namespace == MLX5_FLOW_NAMESPACE_FDB) + max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size); + else + max_encap_size = MLX5_CAP_FLOWTABLE(dev, max_encap_header_size); + if (size > max_encap_size) { mlx5_core_warn(dev, "encap size %zd too big, max supported is %d\n", size, max_encap_size); return -EINVAL; } - in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + size, + in = kzalloc(MLX5_ST_SZ_BYTES(alloc_packet_reformat_context_in) + size, GFP_KERNEL); if (!in) return -ENOMEM; - encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header); - header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header); - inlen = header - (void *)in + size; + packet_reformat_context_in = MLX5_ADDR_OF(alloc_packet_reformat_context_in, + in, packet_reformat_context); + reformat = MLX5_ADDR_OF(packet_reformat_context_in, + packet_reformat_context_in, + reformat_data); + inlen = reformat - (void *)in + size; memset(in, 0, inlen); - MLX5_SET(alloc_encap_header_in, in, opcode, - MLX5_CMD_OP_ALLOC_ENCAP_HEADER); - MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size); - MLX5_SET(encap_header_in, encap_header_in, header_type, header_type); - memcpy(header, encap_header, size); + MLX5_SET(alloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_data_size, size); + MLX5_SET(packet_reformat_context_in, packet_reformat_context_in, + reformat_type, reformat_type); + memcpy(reformat, reformat_data, size); memset(out, 0, sizeof(out)); err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); - *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id); + *packet_reformat_id = MLX5_GET(alloc_packet_reformat_context_out, + out, packet_reformat_id); kfree(in); return err; } +EXPORT_SYMBOL(mlx5_packet_reformat_alloc); -void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id) +void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev, + u32 packet_reformat_id) { - u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)]; - u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)]; + u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)]; memset(in, 0, sizeof(in)); - MLX5_SET(dealloc_encap_header_in, in, opcode, - MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); - MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id); + MLX5_SET(dealloc_packet_reformat_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT); + MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id, + packet_reformat_id); mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_packet_reformat_dealloc); int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, u8 namespace, u8 num_actions, @@ -667,9 +685,14 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, table_type = FS_FT_FDB; break; case MLX5_FLOW_NAMESPACE_KERNEL: + case MLX5_FLOW_NAMESPACE_BYPASS: max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); table_type = FS_FT_NIC_RX; break; + case MLX5_FLOW_NAMESPACE_EGRESS: + max_actions = MLX5_CAP_FLOWTABLE_NIC_TX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_TX; + break; default: return -EOPNOTSUPP; } @@ -702,6 +725,7 @@ int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, kfree(in); return err; } +EXPORT_SYMBOL(mlx5_modify_header_alloc); void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) { @@ -716,6 +740,7 @@ void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } +EXPORT_SYMBOL(mlx5_modify_header_dealloc); static const struct mlx5_flow_cmds mlx5_flow_cmds = { .create_flow_table = mlx5_cmd_create_flow_table, @@ -760,8 +785,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ case FS_FT_FDB: case FS_FT_SNIFFER_RX: case FS_FT_SNIFFER_TX: - return mlx5_fs_cmd_get_fw_cmds(); case FS_FT_NIC_TX: + return mlx5_fs_cmd_get_fw_cmds(); default: return mlx5_fs_cmd_get_stub_cmds(); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 37d114c668b7..9d73eb955f75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,6 +40,7 @@ #include "diag/fs_tracepoint.h" #include "accel/ipsec.h" #include "fpga/ipsec.h" +#include "eswitch.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -76,6 +77,14 @@ FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), \ FS_CAP(flow_table_properties_nic_receive.flow_table_modify)) +#define FS_CHAINING_CAPS_EGRESS \ + FS_REQUIRED_CAPS( \ + FS_CAP(flow_table_properties_nic_transmit.flow_modify_en), \ + FS_CAP(flow_table_properties_nic_transmit.modify_root), \ + FS_CAP(flow_table_properties_nic_transmit \ + .identified_miss_table_mode), \ + FS_CAP(flow_table_properties_nic_transmit.flow_table_modify)) + #define LEFTOVERS_NUM_LEVELS 1 #define LEFTOVERS_NUM_PRIOS 1 @@ -151,6 +160,17 @@ static struct init_tree_node { } }; +static struct init_tree_node egress_root_fs = { + .type = FS_TYPE_NAMESPACE, + .ar_size = 1, + .children = (struct init_tree_node[]) { + ADD_PRIO(0, MLX5_BY_PASS_NUM_PRIOS, 0, + FS_CHAINING_CAPS_EGRESS, + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + } +}; + enum fs_i_lock_class { FS_LOCK_GRANDPARENT, FS_LOCK_PARENT, @@ -694,7 +714,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root, struct fs_node *iter = list_entry(start, struct fs_node, list); struct mlx5_flow_table *ft = NULL; - if (!root) + if (!root || root->type == FS_TYPE_PRIO_CHAINS) return NULL; list_for_each_advance_continue(iter, &root->children, reverse) { @@ -1388,7 +1408,7 @@ static bool check_conflicting_actions(u32 action1, u32 action2) return false; if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT | MLX5_FLOW_CONTEXT_ACTION_DECAP | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP | @@ -1408,7 +1428,7 @@ static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act return -EEXIST; } - if (flow_act->has_flow_tag && + if ((flow_act->flags & FLOW_ACT_HAS_TAG) && fte->action.flow_tag != flow_act->flow_tag) { mlx5_core_warn(get_dev(&fte->node), "FTE flow tag %u already exists with different flow tag %u\n", @@ -1455,29 +1475,8 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, return handle; } -struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle) +static bool counter_is_valid(u32 action) { - struct mlx5_flow_rule *dst; - struct fs_fte *fte; - - fs_get_obj(fte, handle->rule[0]->node.parent); - - fs_for_each_dst(dst, fte) { - if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) - return dst->dest_attr.counter; - } - - return NULL; -} - -static bool counter_is_valid(struct mlx5_fc *counter, u32 action) -{ - if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) - return !counter; - - if (!counter) - return false; - return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)); } @@ -1487,7 +1486,7 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, struct mlx5_flow_table *ft) { if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) - return counter_is_valid(dest->counter, action); + return counter_is_valid(action); if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return true; @@ -1629,6 +1628,8 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, search_again_locked: version = matched_fgs_get_version(match_head); + if (flow_act->flags & FLOW_ACT_NO_APPEND) + goto skip_search; /* Try to find a fg that already contains a matching fte */ list_for_each_entry(iter, match_head, list) { struct fs_fte *fte_tmp; @@ -1645,6 +1646,11 @@ search_again_locked: return rule; } +skip_search: + /* No group with matching fte found, or we skipped the search. + * Try to add a new fte to any matching fg. + */ + /* Check the ft version, for case that new flow group * was added while the fgs weren't locked */ @@ -1975,12 +1981,24 @@ void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) fg->id); } +struct mlx5_flow_namespace *mlx5_get_fdb_sub_ns(struct mlx5_core_dev *dev, + int n) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + + if (!steering || !steering->fdb_sub_ns) + return NULL; + + return steering->fdb_sub_ns[n]; +} +EXPORT_SYMBOL(mlx5_get_fdb_sub_ns); + struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) { struct mlx5_flow_steering *steering = dev->priv.steering; struct mlx5_flow_root_namespace *root_ns; - int prio; + int prio = 0; struct fs_prio *fs_prio; struct mlx5_flow_namespace *ns; @@ -1988,40 +2006,29 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return NULL; switch (type) { - case MLX5_FLOW_NAMESPACE_BYPASS: - case MLX5_FLOW_NAMESPACE_LAG: - case MLX5_FLOW_NAMESPACE_OFFLOADS: - case MLX5_FLOW_NAMESPACE_ETHTOOL: - case MLX5_FLOW_NAMESPACE_KERNEL: - case MLX5_FLOW_NAMESPACE_LEFTOVERS: - case MLX5_FLOW_NAMESPACE_ANCHOR: - prio = type; - break; case MLX5_FLOW_NAMESPACE_FDB: if (steering->fdb_root_ns) return &steering->fdb_root_ns->ns; - else - return NULL; + return NULL; case MLX5_FLOW_NAMESPACE_SNIFFER_RX: if (steering->sniffer_rx_root_ns) return &steering->sniffer_rx_root_ns->ns; - else - return NULL; + return NULL; case MLX5_FLOW_NAMESPACE_SNIFFER_TX: if (steering->sniffer_tx_root_ns) return &steering->sniffer_tx_root_ns->ns; - else - return NULL; - case MLX5_FLOW_NAMESPACE_EGRESS: - if (steering->egress_root_ns) - return &steering->egress_root_ns->ns; - else - return NULL; - default: return NULL; + default: + break; + } + + if (type == MLX5_FLOW_NAMESPACE_EGRESS) { + root_ns = steering->egress_root_ns; + } else { /* Must be NIC RX */ + root_ns = steering->root_ns; + prio = type; } - root_ns = steering->root_ns; if (!root_ns) return NULL; @@ -2064,8 +2071,10 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d } } -static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, - unsigned int prio, int num_levels) +static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels, + enum fs_node_type type) { struct fs_prio *fs_prio; @@ -2073,7 +2082,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, if (!fs_prio) return ERR_PTR(-ENOMEM); - fs_prio->node.type = FS_TYPE_PRIO; + fs_prio->node.type = type; tree_init_node(&fs_prio->node, NULL, del_sw_prio); tree_add_node(&fs_prio->node, &ns->node); fs_prio->num_levels = num_levels; @@ -2083,6 +2092,19 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, return fs_prio; } +static struct fs_prio *fs_create_prio_chained(struct mlx5_flow_namespace *ns, + unsigned int prio, + int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO_CHAINS); +} + +static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, + unsigned int prio, int num_levels) +{ + return _fs_create_prio(ns, prio, num_levels, FS_TYPE_PRIO); +} + static struct mlx5_flow_namespace *fs_init_namespace(struct mlx5_flow_namespace *ns) { @@ -2387,6 +2409,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_egress_acls_root_ns(dev); cleanup_ingress_acls_root_ns(dev); cleanup_root_ns(steering->fdb_root_ns); + steering->fdb_root_ns = NULL; + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; cleanup_root_ns(steering->sniffer_rx_root_ns); cleanup_root_ns(steering->sniffer_tx_root_ns); cleanup_root_ns(steering->egress_root_ns); @@ -2432,27 +2457,64 @@ static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { - struct fs_prio *prio; + struct mlx5_flow_namespace *ns; + struct fs_prio *maj_prio; + struct fs_prio *min_prio; + int levels; + int chain; + int prio; + int err; steering->fdb_root_ns = create_root_ns(steering, FS_FT_FDB); if (!steering->fdb_root_ns) return -ENOMEM; - prio = fs_create_prio(&steering->fdb_root_ns->ns, 0, 2); - if (IS_ERR(prio)) + steering->fdb_sub_ns = kzalloc(sizeof(steering->fdb_sub_ns) * + (FDB_MAX_CHAIN + 1), GFP_KERNEL); + if (!steering->fdb_sub_ns) + return -ENOMEM; + + levels = 2 * FDB_MAX_PRIO * (FDB_MAX_CHAIN + 1); + maj_prio = fs_create_prio_chained(&steering->fdb_root_ns->ns, 0, + levels); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); goto out_err; + } + + for (chain = 0; chain <= FDB_MAX_CHAIN; chain++) { + ns = fs_create_namespace(maj_prio); + if (IS_ERR(ns)) { + err = PTR_ERR(ns); + goto out_err; + } + + for (prio = 0; prio < FDB_MAX_PRIO * (chain + 1); prio++) { + min_prio = fs_create_prio(ns, prio, 2); + if (IS_ERR(min_prio)) { + err = PTR_ERR(min_prio); + goto out_err; + } + } + + steering->fdb_sub_ns[chain] = ns; + } - prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); - if (IS_ERR(prio)) + maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, 1, 1); + if (IS_ERR(maj_prio)) { + err = PTR_ERR(maj_prio); goto out_err; + } set_prio_attrs(steering->fdb_root_ns); return 0; out_err: cleanup_root_ns(steering->fdb_root_ns); + kfree(steering->fdb_sub_ns); + steering->fdb_sub_ns = NULL; steering->fdb_root_ns = NULL; - return PTR_ERR(prio); + return err; } static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) @@ -2537,16 +2599,23 @@ cleanup_root_ns: static int init_egress_root_ns(struct mlx5_flow_steering *steering) { - struct fs_prio *prio; + int err; steering->egress_root_ns = create_root_ns(steering, FS_FT_NIC_TX); if (!steering->egress_root_ns) return -ENOMEM; - /* create 1 prio*/ - prio = fs_create_prio(&steering->egress_root_ns->ns, 0, 1); - return PTR_ERR_OR_ZERO(prio); + err = init_root_tree(steering, &egress_root_fs, + &steering->egress_root_ns->ns.node); + if (err) + goto cleanup; + set_prio_attrs(steering->egress_root_ns); + return 0; +cleanup: + cleanup_root_ns(steering->egress_root_ns); + steering->egress_root_ns = NULL; + return err; } int mlx5_init_fs(struct mlx5_core_dev *dev) @@ -2614,7 +2683,7 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) goto err; } - if (MLX5_IPSEC_DEV(dev)) { + if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) { err = init_egress_root_ns(steering); if (err) goto err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 32070e5d993d..b51ad217da32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -36,10 +36,23 @@ #include <linux/refcount.h> #include <linux/mlx5/fs.h> #include <linux/rhashtable.h> +#include <linux/llist.h> + +/* FS_TYPE_PRIO_CHAINS is a PRIO that will have namespaces only, + * and those are in parallel to one another when going over them to connect + * a new flow table. Meaning the last flow table in a TYPE_PRIO prio in one + * parallel namespace will not automatically connect to the first flow table + * found in any prio in any next namespace, but skip the entire containing + * TYPE_PRIO_CHAINS prio. + * + * This is used to implement tc chains, each chain of prios is a different + * namespace inside a containing TYPE_PRIO_CHAINS prio. + */ enum fs_node_type { FS_TYPE_NAMESPACE, FS_TYPE_PRIO, + FS_TYPE_PRIO_CHAINS, FS_TYPE_FLOW_TABLE, FS_TYPE_FLOW_GROUP, FS_TYPE_FLOW_ENTRY, @@ -72,6 +85,7 @@ struct mlx5_flow_steering { struct kmem_cache *ftes_cache; struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; + struct mlx5_flow_namespace **fdb_sub_ns; struct mlx5_flow_root_namespace **esw_egress_root_ns; struct mlx5_flow_root_namespace **esw_ingress_root_ns; struct mlx5_flow_root_namespace *sniffer_tx_root_ns; @@ -138,8 +152,9 @@ struct mlx5_fc_cache { }; struct mlx5_fc { - struct rb_node node; struct list_head list; + struct llist_node addlist; + struct llist_node dellist; /* last{packets,bytes} members are used when calculating the delta since * last reading @@ -148,7 +163,6 @@ struct mlx5_fc { u64 lastbytes; u32 id; - bool deleted; bool aging; struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 58af6be13dfa..32accd6b041b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -52,11 +52,13 @@ * access to counter list: * - create (user context) * - mlx5_fc_create() only adds to an addlist to be used by - * mlx5_fc_stats_query_work(). addlist is protected by a spinlock. + * mlx5_fc_stats_query_work(). addlist is a lockless single linked list + * that doesn't require any additional synchronization when adding single + * node. * - spawn thread to do the actual destroy * * - destroy (user context) - * - mark a counter as deleted + * - add a counter to lockless dellist * - spawn thread to do the actual del * * - dump (user context) @@ -71,36 +73,55 @@ * elapsed, the thread will actually query the hardware. */ -static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) +static struct list_head *mlx5_fc_counters_lookup_next(struct mlx5_core_dev *dev, + u32 id) { - struct rb_node **new = &root->rb_node; - struct rb_node *parent = NULL; - - while (*new) { - struct mlx5_fc *this = rb_entry(*new, struct mlx5_fc, node); - int result = counter->id - this->id; - - parent = *new; - if (result < 0) - new = &((*new)->rb_left); - else - new = &((*new)->rb_right); - } + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + unsigned long next_id = (unsigned long)id + 1; + struct mlx5_fc *counter; + + rcu_read_lock(); + /* skip counters that are in idr, but not yet in counters list */ + while ((counter = idr_get_next_ul(&fc_stats->counters_idr, + &next_id)) != NULL && + list_empty(&counter->list)) + next_id++; + rcu_read_unlock(); + + return counter ? &counter->list : &fc_stats->counters; +} + +static void mlx5_fc_stats_insert(struct mlx5_core_dev *dev, + struct mlx5_fc *counter) +{ + struct list_head *next = mlx5_fc_counters_lookup_next(dev, counter->id); + + list_add_tail(&counter->list, next); +} + +static void mlx5_fc_stats_remove(struct mlx5_core_dev *dev, + struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - /* Add new node and rebalance tree. */ - rb_link_node(&counter->node, parent, new); - rb_insert_color(&counter->node, root); + list_del(&counter->list); + + spin_lock(&fc_stats->counters_idr_lock); + WARN_ON(!idr_remove(&fc_stats->counters_idr, counter->id)); + spin_unlock(&fc_stats->counters_idr_lock); } -/* The function returns the last node that was queried so the caller +/* The function returns the last counter that was queried so the caller * function can continue calling it till all counters are queried. */ -static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, +static struct mlx5_fc *mlx5_fc_stats_query(struct mlx5_core_dev *dev, struct mlx5_fc *first, u32 last_id) { + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter = NULL; struct mlx5_cmd_fc_bulk *b; - struct rb_node *node = NULL; + bool more = false; u32 afirst_id; int num; int err; @@ -130,14 +151,16 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, goto out; } - for (node = &first->node; node; node = rb_next(node)) { - struct mlx5_fc *counter = rb_entry(node, struct mlx5_fc, node); + counter = first; + list_for_each_entry_from(counter, &fc_stats->counters, list) { struct mlx5_fc_cache *c = &counter->cache; u64 packets; u64 bytes; - if (counter->id > last_id) + if (counter->id > last_id) { + more = true; break; + } mlx5_cmd_fc_bulk_get(dev, b, counter->id, &packets, &bytes); @@ -153,7 +176,14 @@ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, out: mlx5_cmd_fc_bulk_free(b); - return node; + return more ? counter : NULL; +} + +static void mlx5_free_fc(struct mlx5_core_dev *dev, + struct mlx5_fc *counter) +{ + mlx5_cmd_fc_free(dev, counter->id); + kfree(counter); } static void mlx5_fc_stats_work(struct work_struct *work) @@ -161,52 +191,36 @@ static void mlx5_fc_stats_work(struct work_struct *work) struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, priv.fc_stats.work.work); struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + /* Take dellist first to ensure that counters cannot be deleted before + * they are inserted. + */ + struct llist_node *dellist = llist_del_all(&fc_stats->dellist); + struct llist_node *addlist = llist_del_all(&fc_stats->addlist); + struct mlx5_fc *counter = NULL, *last = NULL, *tmp; unsigned long now = jiffies; - struct mlx5_fc *counter = NULL; - struct mlx5_fc *last = NULL; - struct rb_node *node; - LIST_HEAD(tmplist); - spin_lock(&fc_stats->addlist_lock); - - list_splice_tail_init(&fc_stats->addlist, &tmplist); - - if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters)) + if (addlist || !list_empty(&fc_stats->counters)) queue_delayed_work(fc_stats->wq, &fc_stats->work, fc_stats->sampling_interval); - spin_unlock(&fc_stats->addlist_lock); - - list_for_each_entry(counter, &tmplist, list) - mlx5_fc_stats_insert(&fc_stats->counters, counter); + llist_for_each_entry(counter, addlist, addlist) + mlx5_fc_stats_insert(dev, counter); - node = rb_first(&fc_stats->counters); - while (node) { - counter = rb_entry(node, struct mlx5_fc, node); + llist_for_each_entry_safe(counter, tmp, dellist, dellist) { + mlx5_fc_stats_remove(dev, counter); - node = rb_next(node); - - if (counter->deleted) { - rb_erase(&counter->node, &fc_stats->counters); - - mlx5_cmd_fc_free(dev, counter->id); - - kfree(counter); - continue; - } - - last = counter; + mlx5_free_fc(dev, counter); } - if (time_before(now, fc_stats->next_query) || !last) + if (time_before(now, fc_stats->next_query) || + list_empty(&fc_stats->counters)) return; + last = list_last_entry(&fc_stats->counters, struct mlx5_fc, list); - node = rb_first(&fc_stats->counters); - while (node) { - counter = rb_entry(node, struct mlx5_fc, node); - - node = mlx5_fc_stats_query(dev, counter, last->id); - } + counter = list_first_entry(&fc_stats->counters, struct mlx5_fc, + list); + while (counter) + counter = mlx5_fc_stats_query(dev, counter, last->id); fc_stats->next_query = now + fc_stats->sampling_interval; } @@ -220,24 +234,38 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) counter = kzalloc(sizeof(*counter), GFP_KERNEL); if (!counter) return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&counter->list); err = mlx5_cmd_fc_alloc(dev, &counter->id); if (err) goto err_out; if (aging) { + u32 id = counter->id; + counter->cache.lastuse = jiffies; counter->aging = true; - spin_lock(&fc_stats->addlist_lock); - list_add(&counter->list, &fc_stats->addlist); - spin_unlock(&fc_stats->addlist_lock); + idr_preload(GFP_KERNEL); + spin_lock(&fc_stats->counters_idr_lock); + + err = idr_alloc_u32(&fc_stats->counters_idr, counter, &id, id, + GFP_NOWAIT); + + spin_unlock(&fc_stats->counters_idr_lock); + idr_preload_end(); + if (err) + goto err_out_alloc; + + llist_add(&counter->addlist, &fc_stats->addlist); mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); } return counter; +err_out_alloc: + mlx5_cmd_fc_free(dev, counter->id); err_out: kfree(counter); @@ -245,6 +273,12 @@ err_out: } EXPORT_SYMBOL(mlx5_fc_create); +u32 mlx5_fc_id(struct mlx5_fc *counter) +{ + return counter->id; +} +EXPORT_SYMBOL(mlx5_fc_id); + void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; @@ -253,13 +287,12 @@ void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) return; if (counter->aging) { - counter->deleted = true; + llist_add(&counter->dellist, &fc_stats->dellist); mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); return; } - mlx5_cmd_fc_free(dev, counter->id); - kfree(counter); + mlx5_free_fc(dev, counter); } EXPORT_SYMBOL(mlx5_fc_destroy); @@ -267,9 +300,11 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; - fc_stats->counters = RB_ROOT; - INIT_LIST_HEAD(&fc_stats->addlist); - spin_lock_init(&fc_stats->addlist_lock); + spin_lock_init(&fc_stats->counters_idr_lock); + idr_init(&fc_stats->counters_idr); + INIT_LIST_HEAD(&fc_stats->counters); + init_llist_head(&fc_stats->addlist); + init_llist_head(&fc_stats->dellist); fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); if (!fc_stats->wq) @@ -284,34 +319,22 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) { struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct llist_node *tmplist; struct mlx5_fc *counter; struct mlx5_fc *tmp; - struct rb_node *node; cancel_delayed_work_sync(&dev->priv.fc_stats.work); destroy_workqueue(dev->priv.fc_stats.wq); dev->priv.fc_stats.wq = NULL; - list_for_each_entry_safe(counter, tmp, &fc_stats->addlist, list) { - list_del(&counter->list); - - mlx5_cmd_fc_free(dev, counter->id); + idr_destroy(&fc_stats->counters_idr); - kfree(counter); - } - - node = rb_first(&fc_stats->counters); - while (node) { - counter = rb_entry(node, struct mlx5_fc, node); - - node = rb_next(node); + tmplist = llist_del_all(&fc_stats->addlist); + llist_for_each_entry_safe(counter, tmp, tmplist, addlist) + mlx5_free_fc(dev, counter); - rb_erase(&counter->node, &fc_stats->counters); - - mlx5_cmd_fc_free(dev, counter->id); - - kfree(counter); - } + list_for_each_entry_safe(counter, tmp, &fc_stats->counters, list) + mlx5_free_fc(dev, counter); } int mlx5_fc_query(struct mlx5_core_dev *dev, struct mlx5_fc *counter, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 41ad24f0de2c..1ab6f7e3bec6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -250,7 +250,7 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) if (ret) return ret; - force_state = MLX5_GET(teardown_hca_out, out, force_state); + force_state = MLX5_GET(teardown_hca_out, out, state); if (force_state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { mlx5_core_warn(dev, "teardown with force mode failed, doing normal teardown\n"); return -EIO; @@ -259,6 +259,54 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev) return 0; } +#define MLX5_FAST_TEARDOWN_WAIT_MS 3000 +int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev) +{ + unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS; + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; + int state; + int ret; + + if (!MLX5_CAP_GEN(dev, fast_teardown)) { + mlx5_core_dbg(dev, "fast teardown is not supported in the firmware\n"); + return -EOPNOTSUPP; + } + + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + MLX5_SET(teardown_hca_in, in, profile, + MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN); + + ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (ret) + return ret; + + state = MLX5_GET(teardown_hca_out, out, state); + if (state == MLX5_TEARDOWN_HCA_OUT_FORCE_STATE_FAIL) { + mlx5_core_warn(dev, "teardown with fast mode failed\n"); + return -EIO; + } + + mlx5_set_nic_state(dev, MLX5_NIC_IFC_DISABLED); + + /* Loop until device state turns to disable */ + end = jiffies + msecs_to_jiffies(delay_ms); + do { + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + break; + + cond_resched(); + } while (!time_after(jiffies, end)); + + if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) { + dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n", + mlx5_get_nic_state(dev), delay_ms); + return -EIO; + } + + return 0; +} + enum mlxsw_reg_mcc_instruction { MLX5_REG_MCC_INSTRUCTION_LOCK_UPDATE_HANDLE = 0x01, MLX5_REG_MCC_INSTRUCTION_RELEASE_UPDATE_HANDLE = 0x02, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9f39aeca863f..43118de8ee99 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -59,22 +59,25 @@ enum { }; enum { - MLX5_NIC_IFC_FULL = 0, - MLX5_NIC_IFC_DISABLED = 1, - MLX5_NIC_IFC_NO_DRAM_NIC = 2, - MLX5_NIC_IFC_INVALID = 3 -}; - -enum { MLX5_DROP_NEW_HEALTH_WORK, MLX5_DROP_NEW_RECOVERY_WORK, }; -static u8 get_nic_state(struct mlx5_core_dev *dev) +u8 mlx5_get_nic_state(struct mlx5_core_dev *dev) { return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 3; } +void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state) +{ + u32 cur_cmdq_addr_l_sz; + + cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz); + iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) | + state << MLX5_NIC_IFC_OFFSET, + &dev->iseg->cmdq_addr_l_sz); +} + static void trigger_cmd_completions(struct mlx5_core_dev *dev) { unsigned long flags; @@ -103,7 +106,7 @@ static int in_fatal(struct mlx5_core_dev *dev) struct mlx5_core_health *health = &dev->priv.health; struct health_buffer __iomem *h = health->health; - if (get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) + if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) return 1; if (ioread32be(&h->fw_ver) == 0xffffffff) @@ -133,7 +136,7 @@ unlock: static void mlx5_handle_bad_state(struct mlx5_core_dev *dev) { - u8 nic_interface = get_nic_state(dev); + u8 nic_interface = mlx5_get_nic_state(dev); switch (nic_interface) { case MLX5_NIC_IFC_FULL: @@ -168,7 +171,7 @@ static void health_recover(struct work_struct *work) priv = container_of(health, struct mlx5_priv, health); dev = container_of(priv, struct mlx5_core_dev, priv); - nic_state = get_nic_state(dev); + nic_state = mlx5_get_nic_state(dev); if (nic_state == MLX5_NIC_IFC_INVALID) { dev_err(&dev->pdev->dev, "health recovery flow aborted since the nic state is invalid\n"); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index e3797a44e074..b59953daf8b4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -45,6 +45,7 @@ static int mlx5i_change_mtu(struct net_device *netdev, int new_mtu); static const struct net_device_ops mlx5i_netdev_ops = { .ndo_open = mlx5i_open, .ndo_stop = mlx5i_close, + .ndo_get_stats64 = mlx5i_get_stats, .ndo_init = mlx5i_dev_init, .ndo_uninit = mlx5i_dev_cleanup, .ndo_change_mtu = mlx5i_change_mtu, @@ -70,26 +71,25 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, } /* Called directly after IPoIB netdevice was created to initialize SW structs */ -void mlx5i_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +int mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); u16 max_mtu; + int err; - /* priv init */ - priv->mdev = mdev; - priv->netdev = netdev; - priv->profile = profile; - priv->ppriv = ppriv; - mutex_init(&priv->state_lock); + err = mlx5e_netdev_init(netdev, priv, mdev, profile, ppriv); + if (err) + return err; mlx5_query_port_max_mtu(mdev, &max_mtu, 1); netdev->mtu = max_mtu; mlx5e_build_nic_params(mdev, &priv->channels.params, - profile->max_nch(mdev), netdev->mtu); + mlx5e_get_netdev_max_channels(netdev), + netdev->mtu); mlx5i_build_nic_params(mdev, &priv->channels.params); mlx5e_timestamp_init(priv); @@ -106,12 +106,56 @@ void mlx5i_init(struct mlx5_core_dev *mdev, netdev->netdev_ops = &mlx5i_netdev_ops; netdev->ethtool_ops = &mlx5i_ethtool_ops; + + return 0; } /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ -static void mlx5i_cleanup(struct mlx5e_priv *priv) +void mlx5i_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_netdev_cleanup(priv->netdev, priv); +} + +static void mlx5i_grp_sw_update_stats(struct mlx5e_priv *priv) +{ + int max_nch = mlx5e_get_netdev_max_channels(priv->netdev); + struct mlx5e_sw_stats s = { 0 }; + int i, j; + + for (i = 0; i < max_nch; i++) { + struct mlx5e_channel_stats *channel_stats; + struct mlx5e_rq_stats *rq_stats; + + channel_stats = &priv->channel_stats[i]; + rq_stats = &channel_stats->rq; + + s.rx_packets += rq_stats->packets; + s.rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->max_opened_tc; j++) { + struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; + + s.tx_packets += sq_stats->packets; + s.tx_bytes += sq_stats->bytes; + s.tx_queue_dropped += sq_stats->dropped; + } + } + + memcpy(&priv->stats.sw, &s, sizeof(s)); +} + +void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { - /* Do nothing .. */ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5e_sw_stats *sstats = &priv->stats.sw; + + mlx5i_grp_sw_update_stats(priv); + + stats->rx_packets = sstats->rx_packets; + stats->rx_bytes = sstats->rx_bytes; + stats->tx_packets = sstats->tx_packets; + stats->tx_bytes = sstats->tx_bytes; + stats->tx_dropped = sstats->tx_queue_dropped; } int mlx5i_init_underlay_qp(struct mlx5e_priv *priv) @@ -306,17 +350,26 @@ static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) static int mlx5i_init_rx(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; int err; + mlx5e_create_q_counters(priv); + + err = mlx5e_open_drop_rq(priv, &priv->drop_rq); + if (err) { + mlx5_core_err(mdev, "open drop rq failed, %d\n", err); + goto err_destroy_q_counters; + } + err = mlx5e_create_indirect_rqt(priv); if (err) - return err; + goto err_close_drop_rq; err = mlx5e_create_direct_rqts(priv); if (err) goto err_destroy_indirect_rqts; - err = mlx5e_create_indirect_tirs(priv); + err = mlx5e_create_indirect_tirs(priv, true); if (err) goto err_destroy_direct_rqts; @@ -333,11 +386,15 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_indirect_tirs(priv, true); err_destroy_direct_rqts: mlx5e_destroy_direct_rqts(priv); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); +err_close_drop_rq: + mlx5e_close_drop_rq(&priv->drop_rq); +err_destroy_q_counters: + mlx5e_destroy_q_counters(priv); return err; } @@ -345,9 +402,11 @@ static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { mlx5i_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); - mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_indirect_tirs(priv, true); mlx5e_destroy_direct_rqts(priv); mlx5e_destroy_rqt(priv, &priv->indir_rqt); + mlx5e_close_drop_rq(&priv->drop_rq); + mlx5e_destroy_q_counters(priv); } static const struct mlx5e_profile mlx5i_nic_profile = { @@ -360,7 +419,6 @@ static const struct mlx5e_profile mlx5i_nic_profile = { .enable = NULL, /* mlx5i_enable */ .disable = NULL, /* mlx5i_disable */ .update_stats = NULL, /* mlx5i_update_stats */ - .max_nch = mlx5e_get_max_num_channels, .update_carrier = NULL, /* no HW update in IB link */ .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ @@ -592,7 +650,6 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev) mlx5e_detach_netdev(priv); profile->cleanup(priv); - destroy_workqueue(priv->wq); if (!ipriv->sub_interface) { mlx5i_pkey_qpn_ht_cleanup(netdev); @@ -600,58 +657,37 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev) } } -struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, - struct ib_device *ibdev, - const char *name, - void (*setup)(struct net_device *)) +static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev) +{ + return mdev->mlx5e_res.pdn != 0; +} + +static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev) +{ + if (mlx5_is_sub_interface(mdev)) + return mlx5i_pkey_get_profile(); + return &mlx5i_nic_profile; +} + +static int mlx5_rdma_setup_rn(struct ib_device *ibdev, u8 port_num, + struct net_device *netdev, void *param) { - const struct mlx5e_profile *profile; - struct net_device *netdev; + struct mlx5_core_dev *mdev = (struct mlx5_core_dev *)param; + const struct mlx5e_profile *prof = mlx5_get_profile(mdev); struct mlx5i_priv *ipriv; struct mlx5e_priv *epriv; struct rdma_netdev *rn; - bool sub_interface; - int nch; int err; - if (mlx5i_check_required_hca_cap(mdev)) { - mlx5_core_warn(mdev, "Accelerated mode is not supported\n"); - return ERR_PTR(-EOPNOTSUPP); - } - - /* TODO: Need to find a better way to check if child device*/ - sub_interface = (mdev->mlx5e_res.pdn != 0); - - if (sub_interface) - profile = mlx5i_pkey_get_profile(); - else - profile = &mlx5i_nic_profile; - - nch = profile->max_nch(mdev); - - netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv), - name, NET_NAME_UNKNOWN, - setup, - nch * MLX5E_MAX_NUM_TC, - nch); - if (!netdev) { - mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n"); - return NULL; - } - ipriv = netdev_priv(netdev); epriv = mlx5i_epriv(netdev); - epriv->wq = create_singlethread_workqueue("mlx5i"); - if (!epriv->wq) - goto err_free_netdev; - - ipriv->sub_interface = sub_interface; + ipriv->sub_interface = mlx5_is_sub_interface(mdev); if (!ipriv->sub_interface) { err = mlx5i_pkey_qpn_ht_init(netdev); if (err) { mlx5_core_warn(mdev, "allocate qpn_to_netdev ht failed\n"); - goto destroy_wq; + return err; } /* This should only be called once per mdev */ @@ -660,7 +696,7 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, goto destroy_ht; } - profile->init(mdev, netdev, profile, ipriv); + prof->init(mdev, netdev, prof, ipriv); mlx5e_attach_netdev(epriv); netif_carrier_off(netdev); @@ -676,15 +712,35 @@ struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, netdev->priv_destructor = mlx5_rdma_netdev_free; netdev->needs_free_netdev = 1; - return netdev; + return 0; destroy_ht: mlx5i_pkey_qpn_ht_cleanup(netdev); -destroy_wq: - destroy_workqueue(epriv->wq); -err_free_netdev: - free_netdev(netdev); + return err; +} - return NULL; +int mlx5_rdma_rn_get_params(struct mlx5_core_dev *mdev, + struct ib_device *device, + struct rdma_netdev_alloc_params *params) +{ + int nch; + int rc; + + rc = mlx5i_check_required_hca_cap(mdev); + if (rc) + return rc; + + nch = mlx5e_get_max_num_channels(mdev); + + *params = (struct rdma_netdev_alloc_params){ + .sizeof_priv = sizeof(struct mlx5i_priv) + + sizeof(struct mlx5e_priv), + .txqs = nch * MLX5E_MAX_NUM_TC, + .rxqs = nch, + .param = mdev, + .initialize_rdma_netdev = mlx5_rdma_setup_rn, + }; + + return 0; } -EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); +EXPORT_SYMBOL(mlx5_rdma_rn_get_params); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h index 0982c579ec74..9165ca567047 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h @@ -84,10 +84,11 @@ void mlx5i_dev_cleanup(struct net_device *dev); int mlx5i_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd); /* Parent profile functions */ -void mlx5i_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv); +int mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv); +void mlx5i_cleanup(struct mlx5e_priv *priv); /* Get child interface nic profile */ const struct mlx5e_profile *mlx5i_pkey_get_profile(void); @@ -120,6 +121,7 @@ static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq, netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_av *av, u32 dqpn, u32 dqkey); void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); #endif /* CONFIG_MLX5_CORE_IPOIB */ #endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c index 54a188f41f90..b491b8f5fd6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c @@ -146,6 +146,7 @@ static const struct net_device_ops mlx5i_pkey_netdev_ops = { .ndo_open = mlx5i_pkey_open, .ndo_stop = mlx5i_pkey_close, .ndo_init = mlx5i_pkey_dev_init, + .ndo_get_stats64 = mlx5i_get_stats, .ndo_uninit = mlx5i_pkey_dev_cleanup, .ndo_change_mtu = mlx5i_pkey_change_mtu, .ndo_do_ioctl = mlx5i_pkey_ioctl, @@ -274,14 +275,17 @@ static int mlx5i_pkey_change_mtu(struct net_device *netdev, int new_mtu) } /* Called directly after IPoIB netdevice was created to initialize SW structs */ -static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +static int mlx5i_pkey_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); + int err; - mlx5i_init(mdev, netdev, profile, ppriv); + err = mlx5i_init(mdev, netdev, profile, ppriv); + if (err) + return err; /* Override parent ndo */ netdev->netdev_ops = &mlx5i_pkey_netdev_ops; @@ -291,12 +295,14 @@ static void mlx5i_pkey_init(struct mlx5_core_dev *mdev, /* Use dummy rqs */ priv->channels.params.log_rq_mtu_frames = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + + return 0; } /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv) { - /* Do nothing .. */ + mlx5i_cleanup(priv); } static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv) @@ -345,7 +351,6 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = { .enable = NULL, .disable = NULL, .update_stats = NULL, - .max_nch = mlx5e_get_max_num_channels, .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ .max_tc = MLX5I_MAX_NUM_TC, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 3f767cde4c1d..0d90b1b4a3d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -111,10 +111,10 @@ static void mlx5_pps_out(struct work_struct *work) for (i = 0; i < clock->ptp_info.n_pins; i++) { u64 tstart; - write_lock_irqsave(&clock->lock, flags); + write_seqlock_irqsave(&clock->lock, flags); tstart = clock->pps_info.start[i]; clock->pps_info.start[i] = 0; - write_unlock_irqrestore(&clock->lock, flags); + write_sequnlock_irqrestore(&clock->lock, flags); if (!tstart) continue; @@ -132,10 +132,10 @@ static void mlx5_timestamp_overflow(struct work_struct *work) overflow_work); unsigned long flags; - write_lock_irqsave(&clock->lock, flags); + write_seqlock_irqsave(&clock->lock, flags); timecounter_read(&clock->tc); mlx5_update_clock_info_page(clock->mdev); - write_unlock_irqrestore(&clock->lock, flags); + write_sequnlock_irqrestore(&clock->lock, flags); schedule_delayed_work(&clock->overflow_work, clock->overflow_period); } @@ -147,10 +147,10 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, u64 ns = timespec64_to_ns(ts); unsigned long flags; - write_lock_irqsave(&clock->lock, flags); + write_seqlock_irqsave(&clock->lock, flags); timecounter_init(&clock->tc, &clock->cycles, ns); mlx5_update_clock_info_page(clock->mdev); - write_unlock_irqrestore(&clock->lock, flags); + write_sequnlock_irqrestore(&clock->lock, flags); return 0; } @@ -162,9 +162,9 @@ static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) u64 ns; unsigned long flags; - write_lock_irqsave(&clock->lock, flags); + write_seqlock_irqsave(&clock->lock, flags); ns = timecounter_read(&clock->tc); - write_unlock_irqrestore(&clock->lock, flags); + write_sequnlock_irqrestore(&clock->lock, flags); *ts = ns_to_timespec64(ns); @@ -177,10 +177,10 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) ptp_info); unsigned long flags; - write_lock_irqsave(&clock->lock, flags); + write_seqlock_irqsave(&clock->lock, flags); timecounter_adjtime(&clock->tc, delta); mlx5_update_clock_info_page(clock->mdev); - write_unlock_irqrestore(&clock->lock, flags); + write_sequnlock_irqrestore(&clock->lock, flags); return 0; } @@ -203,12 +203,12 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) adj *= delta; diff = div_u64(adj, 1000000000ULL); - write_lock_irqsave(&clock->lock, flags); + write_seqlock_irqsave(&clock->lock, flags); timecounter_read(&clock->tc); clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : clock->nominal_c_mult + diff; mlx5_update_clock_info_page(clock->mdev); - write_unlock_irqrestore(&clock->lock, flags); + write_sequnlock_irqrestore(&clock->lock, flags); return 0; } @@ -307,12 +307,12 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, ts.tv_nsec = rq->perout.start.nsec; ns = timespec64_to_ns(&ts); cycles_now = mlx5_read_internal_timer(mdev); - write_lock_irqsave(&clock->lock, flags); + write_seqlock_irqsave(&clock->lock, flags); nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); nsec_delta = ns - nsec_now; cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, clock->cycles.mult); - write_unlock_irqrestore(&clock->lock, flags); + write_sequnlock_irqrestore(&clock->lock, flags); time_stamp = cycles_now + cycles_delta; field_select = MLX5_MTPPS_FS_PIN_MODE | MLX5_MTPPS_FS_PATTERN | @@ -471,14 +471,14 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev, ts.tv_sec += 1; ts.tv_nsec = 0; ns = timespec64_to_ns(&ts); - write_lock_irqsave(&clock->lock, flags); + write_seqlock_irqsave(&clock->lock, flags); nsec_now = timecounter_cyc2time(&clock->tc, cycles_now); nsec_delta = ns - nsec_now; cycles_delta = div64_u64(nsec_delta << clock->cycles.shift, clock->cycles.mult); clock->pps_info.start[pin] = cycles_now + cycles_delta; schedule_work(&clock->pps_info.out_work); - write_unlock_irqrestore(&clock->lock, flags); + write_sequnlock_irqrestore(&clock->lock, flags); break; default: mlx5_core_err(mdev, " Unhandled event\n"); @@ -498,7 +498,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); return; } - rwlock_init(&clock->lock); + seqlock_init(&clock->lock); clock->cycles.read = read_internal_timer; clock->cycles.shift = MLX5_CYCLES_SHIFT; clock->cycles.mult = clocksource_khz2mult(dev_freq, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index 02e2e4575e4f..263cb6e2aeee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -46,11 +46,13 @@ static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, u64 timestamp) { + unsigned int seq; u64 nsec; - read_lock(&clock->lock); - nsec = timecounter_cyc2time(&clock->tc, timestamp); - read_unlock(&clock->lock); + do { + seq = read_seqbegin(&clock->lock); + nsec = timecounter_cyc2time(&clock->tc, timestamp); + } while (read_seqretry(&clock->lock, seq)); return ns_to_ktime(nsec); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index b5e9f664fc66..28132c7dc05f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1594,12 +1594,17 @@ static const struct pci_error_handlers mlx5_err_handler = { static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) { - int ret; + bool fast_teardown = false, force_teardown = false; + int ret = 1; + + fast_teardown = MLX5_CAP_GEN(dev, fast_teardown); + force_teardown = MLX5_CAP_GEN(dev, force_teardown); + + mlx5_core_dbg(dev, "force teardown firmware support=%d\n", force_teardown); + mlx5_core_dbg(dev, "fast teardown firmware support=%d\n", fast_teardown); - if (!MLX5_CAP_GEN(dev, force_teardown)) { - mlx5_core_dbg(dev, "force teardown is not supported in the firmware\n"); + if (!fast_teardown && !force_teardown) return -EOPNOTSUPP; - } if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { mlx5_core_dbg(dev, "Device in internal error state, giving up\n"); @@ -1612,13 +1617,19 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) mlx5_drain_health_wq(dev); mlx5_stop_health_poll(dev, false); + ret = mlx5_cmd_fast_teardown_hca(dev); + if (!ret) + goto succeed; + ret = mlx5_cmd_force_teardown_hca(dev); - if (ret) { - mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); - mlx5_start_health_poll(dev); - return ret; - } + if (!ret) + goto succeed; + + mlx5_core_dbg(dev, "Firmware couldn't do fast unload error: %d\n", ret); + mlx5_start_health_poll(dev); + return ret; +succeed: mlx5_enter_error_state(dev, true); /* Some platforms requiring freeing the IRQ's in the shutdown diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b4134fa0bba3..0594d0961cb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -39,6 +39,7 @@ #include <linux/if_link.h> #include <linux/firmware.h> #include <linux/mlx5/cq.h> +#include <linux/mlx5/fs.h> #define DRIVER_NAME "mlx5_core" #define DRIVER_VERSION "5.0-0" @@ -95,6 +96,8 @@ int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id); int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev); int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev); +int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev); + void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_core_page_fault(struct mlx5_core_dev *dev, @@ -169,17 +172,6 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); void mlx5_dev_list_lock(void); void mlx5_dev_list_unlock(void); int mlx5_dev_list_trylock(void); -int mlx5_encap_alloc(struct mlx5_core_dev *dev, - int header_type, - size_t size, - void *encap_header, - u32 *encap_id); -void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); - -int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, - u8 namespace, u8 num_actions, - void *modify_actions, u32 *modify_header_id); -void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id); bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); @@ -214,4 +206,14 @@ int mlx5_lag_allow(struct mlx5_core_dev *dev); int mlx5_lag_forbid(struct mlx5_core_dev *dev); void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); + +enum { + MLX5_NIC_IFC_FULL = 0, + MLX5_NIC_IFC_DISABLED = 1, + MLX5_NIC_IFC_NO_DRAM_NIC = 2, + MLX5_NIC_IFC_INVALID = 3 +}; + +u8 mlx5_get_nic_state(struct mlx5_core_dev *dev); +void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state); #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 4ca07bfb6b14..91b8139a388d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -211,6 +211,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev, } qp->qpn = MLX5_GET(create_dct_out, out, dctn); + qp->uid = MLX5_GET(create_dct_in, in, uid); err = create_resource_common(dev, qp, MLX5_RES_DCT); if (err) goto err_cmd; @@ -219,6 +220,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev, err_cmd: MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); MLX5_SET(destroy_dct_in, din, dctn, qp->qpn); + MLX5_SET(destroy_dct_in, din, uid, qp->uid); mlx5_cmd_exec(dev, (void *)&in, sizeof(din), (void *)&out, sizeof(dout)); return err; @@ -240,6 +242,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, if (err) return err; + qp->uid = MLX5_GET(create_qp_in, in, uid); qp->qpn = MLX5_GET(create_qp_out, out, qpn); mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); @@ -261,6 +264,7 @@ err_cmd: memset(dout, 0, sizeof(dout)); MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, din, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, din, uid, qp->uid); mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } @@ -275,6 +279,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT); MLX5_SET(drain_dct_in, in, dctn, qp->qpn); + MLX5_SET(drain_dct_in, in, uid, qp->uid); return mlx5_cmd_exec(dev, (void *)&in, sizeof(in), (void *)&out, sizeof(out)); } @@ -301,6 +306,7 @@ destroy: destroy_resource_common(dev, &dct->mqp); MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); + MLX5_SET(destroy_dct_in, in, uid, qp->uid); err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), (void *)&out, sizeof(out)); return err; @@ -320,6 +326,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + MLX5_SET(destroy_qp_in, in, uid, qp->uid); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; @@ -373,7 +380,7 @@ static void mbox_free(struct mbox_info *mbox) static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, u32 opt_param_mask, void *qpc, - struct mbox_info *mbox) + struct mbox_info *mbox, u16 uid) { mbox->out = NULL; mbox->in = NULL; @@ -381,26 +388,32 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, #define MBOX_ALLOC(mbox, typ) \ mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out)) -#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \ - MLX5_SET(typ##_in, in, opcode, _opcode); \ - MLX5_SET(typ##_in, in, qpn, _qpn) - -#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \ - MOD_QP_IN_SET(typ, in, _opcode, _qpn); \ - MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \ - memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc)) +#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid) \ + do { \ + MLX5_SET(typ##_in, in, opcode, _opcode); \ + MLX5_SET(typ##_in, in, qpn, _qpn); \ + MLX5_SET(typ##_in, in, uid, _uid); \ + } while (0) + +#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid) \ + do { \ + MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid); \ + MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \ + memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, \ + MLX5_ST_SZ_BYTES(qpc)); \ + } while (0) switch (opcode) { /* 2RST & 2ERR */ case MLX5_CMD_OP_2RST_QP: if (MBOX_ALLOC(mbox, qp_2rst)) return -ENOMEM; - MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn); + MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid); break; case MLX5_CMD_OP_2ERR_QP: if (MBOX_ALLOC(mbox, qp_2err)) return -ENOMEM; - MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn); + MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid); break; /* MODIFY with QPC */ @@ -408,37 +421,37 @@ static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, if (MBOX_ALLOC(mbox, rst2init_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_INIT2RTR_QP: if (MBOX_ALLOC(mbox, init2rtr_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_RTR2RTS_QP: if (MBOX_ALLOC(mbox, rtr2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_RTS2RTS_QP: if (MBOX_ALLOC(mbox, rts2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_SQERR2RTS_QP: if (MBOX_ALLOC(mbox, sqerr2rts_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; case MLX5_CMD_OP_INIT2INIT_QP: if (MBOX_ALLOC(mbox, init2init_qp)) return -ENOMEM; MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn, - opt_param_mask, qpc); + opt_param_mask, qpc, uid); break; default: mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n", @@ -456,7 +469,7 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, int err; err = modify_qp_mbox_alloc(dev, opcode, qp->qpn, - opt_param_mask, qpc, &mbox); + opt_param_mask, qpc, &mbox, qp->uid); if (err) return err; @@ -531,6 +544,17 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); +static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {}; + + MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); + MLX5_SET(destroy_rq_in, in, rqn, rqn); + MLX5_SET(destroy_rq_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *rq) { @@ -541,6 +565,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, if (err) return err; + rq->uid = MLX5_GET(create_rq_in, in, uid); rq->qpn = rqn; err = create_resource_common(dev, rq, MLX5_RES_RQ); if (err) @@ -549,7 +574,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return 0; err_destroy_rq: - mlx5_core_destroy_rq(dev, rq->qpn); + destroy_rq_tracked(dev, rq->qpn, rq->uid); return err; } @@ -559,10 +584,21 @@ void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *rq) { destroy_resource_common(dev, rq); - mlx5_core_destroy_rq(dev, rq->qpn); + destroy_rq_tracked(dev, rq->qpn, rq->uid); } EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked); +static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid) +{ + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {}; + + MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); + MLX5_SET(destroy_sq_in, in, sqn, sqn); + MLX5_SET(destroy_sq_in, in, uid, uid); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, struct mlx5_core_qp *sq) { @@ -573,6 +609,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, if (err) return err; + sq->uid = MLX5_GET(create_sq_in, in, uid); sq->qpn = sqn; err = create_resource_common(dev, sq, MLX5_RES_SQ); if (err) @@ -581,7 +618,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return 0; err_destroy_sq: - mlx5_core_destroy_sq(dev, sq->qpn); + destroy_sq_tracked(dev, sq->qpn, sq->uid); return err; } @@ -591,7 +628,7 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq) { destroy_resource_common(dev, sq); - mlx5_core_destroy_sq(dev, sq->qpn); + destroy_sq_tracked(dev, sq->qpn, sq->uid); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index 23cc337a96c9..6a6fc9be01e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -73,7 +73,7 @@ static int get_pas_size(struct mlx5_srq_attr *in) u32 rq_sz = 1 << (log_srq_size + 4 + log_rq_stride); u32 page_size = 1 << log_page_size; u32 rq_sz_po = rq_sz + (page_offset * po_quanta); - u32 rq_num_pas = (rq_sz_po + page_size - 1) / page_size; + u32 rq_num_pas = DIV_ROUND_UP(rq_sz_po, page_size); return rq_num_pas * sizeof(u64); } @@ -166,6 +166,7 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, if (!create_in) return -ENOMEM; + MLX5_SET(create_srq_in, create_in, uid, in->uid); srqc = MLX5_ADDR_OF(create_srq_in, create_in, srq_context_entry); pas = MLX5_ADDR_OF(create_srq_in, create_in, pas); @@ -178,8 +179,10 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, err = mlx5_cmd_exec(dev, create_in, inlen, create_out, sizeof(create_out)); kvfree(create_in); - if (!err) + if (!err) { srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); + srq->uid = in->uid; + } return err; } @@ -193,6 +196,7 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(destroy_srq_in, srq_in, opcode, MLX5_CMD_OP_DESTROY_SRQ); MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); + MLX5_SET(destroy_srq_in, srq_in, uid, srq->uid); return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), srq_out, sizeof(srq_out)); @@ -208,6 +212,7 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(arm_rq_in, srq_in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_SRQ); MLX5_SET(arm_rq_in, srq_in, srq_number, srq->srqn); MLX5_SET(arm_rq_in, srq_in, lwm, lwm); + MLX5_SET(arm_rq_in, srq_in, uid, srq->uid); return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), srq_out, sizeof(srq_out)); @@ -260,6 +265,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, if (!create_in) return -ENOMEM; + MLX5_SET(create_xrc_srq_in, create_in, uid, in->uid); xrc_srqc = MLX5_ADDR_OF(create_xrc_srq_in, create_in, xrc_srq_context_entry); pas = MLX5_ADDR_OF(create_xrc_srq_in, create_in, pas); @@ -277,6 +283,7 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, goto out; srq->srqn = MLX5_GET(create_xrc_srq_out, create_out, xrc_srqn); + srq->uid = in->uid; out: kvfree(create_in); return err; @@ -291,6 +298,7 @@ static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); + MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, uid, srq->uid); return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, sizeof(xrcsrq_out)); @@ -306,6 +314,7 @@ static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); + MLX5_SET(arm_xrc_srq_in, xrcsrq_in, uid, srq->uid); return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, sizeof(xrcsrq_out)); @@ -365,10 +374,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, wq = MLX5_ADDR_OF(rmpc, rmpc, wq); MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); + MLX5_SET(create_rmp_in, create_in, uid, in->uid); set_wq(wq, in); memcpy(MLX5_ADDR_OF(rmpc, rmpc, wq.pas), in->pas, pas_size); err = mlx5_core_create_rmp(dev, create_in, inlen, &srq->srqn); + if (!err) + srq->uid = in->uid; kvfree(create_in); return err; @@ -377,7 +389,13 @@ static int create_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, static int destroy_rmp_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - return mlx5_core_destroy_rmp(dev, srq->srqn); + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {}; + + MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); + MLX5_SET(destroy_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(destroy_rmp_in, in, uid, srq->uid); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int arm_rmp_cmd(struct mlx5_core_dev *dev, @@ -400,6 +418,7 @@ static int arm_rmp_cmd(struct mlx5_core_dev *dev, MLX5_SET(modify_rmp_in, in, rmp_state, MLX5_RMPC_STATE_RDY); MLX5_SET(modify_rmp_in, in, rmpn, srq->srqn); + MLX5_SET(modify_rmp_in, in, uid, srq->uid); MLX5_SET(wq, wq, lwm, lwm); MLX5_SET(rmp_bitmask, bitmask, lwm, 1); MLX5_SET(rmpc, rmpc, state, MLX5_RMPC_STATE_RDY); @@ -469,11 +488,14 @@ static int create_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(xrqc, xrqc, user_index, in->user_index); MLX5_SET(xrqc, xrqc, cqn, in->cqn); MLX5_SET(create_xrq_in, create_in, opcode, MLX5_CMD_OP_CREATE_XRQ); + MLX5_SET(create_xrq_in, create_in, uid, in->uid); err = mlx5_cmd_exec(dev, create_in, inlen, create_out, sizeof(create_out)); kvfree(create_in); - if (!err) + if (!err) { srq->srqn = MLX5_GET(create_xrq_out, create_out, xrqn); + srq->uid = in->uid; + } return err; } @@ -485,6 +507,7 @@ static int destroy_xrq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) MLX5_SET(destroy_xrq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRQ); MLX5_SET(destroy_xrq_in, in, xrqn, srq->srqn); + MLX5_SET(destroy_xrq_in, in, uid, srq->uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -500,6 +523,7 @@ static int arm_xrq_cmd(struct mlx5_core_dev *dev, MLX5_SET(arm_rq_in, in, op_mod, MLX5_ARM_RQ_IN_OP_MOD_XRQ); MLX5_SET(arm_rq_in, in, srq_number, srq->srqn); MLX5_SET(arm_rq_in, in, lwm, lwm); + MLX5_SET(arm_rq_in, in, uid, srq->uid); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index b02af317c125..cfbea66b4879 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1201,3 +1201,12 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev) return err; } EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); + +u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) +{ + if (!mdev->sys_image_guid) + mlx5_query_nic_vport_system_image_guid(mdev, &mdev->sys_image_guid); + + return mdev->sys_image_guid; +} +EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index ddca327e8950..2dcbf1ebfd6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -49,54 +49,37 @@ u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq) return (u32)wq->fbc.sz_m1 + 1; } -static u32 mlx5_wq_cyc_get_byte_size(struct mlx5_wq_cyc *wq) +static u32 wq_get_byte_sz(u8 log_sz, u8 log_stride) { - return mlx5_wq_cyc_get_size(wq) << wq->fbc.log_stride; -} - -static u32 mlx5_wq_qp_get_byte_size(struct mlx5_wq_qp *wq) -{ - return mlx5_wq_cyc_get_byte_size(&wq->rq) + - mlx5_wq_cyc_get_byte_size(&wq->sq); -} - -static u32 mlx5_cqwq_get_byte_size(struct mlx5_cqwq *wq) -{ - return mlx5_cqwq_get_size(wq) << wq->fbc.log_stride; -} - -static u32 mlx5_wq_ll_get_byte_size(struct mlx5_wq_ll *wq) -{ - return mlx5_wq_ll_get_size(wq) << wq->fbc.log_stride; + return ((u32)1 << log_sz) << log_stride; } int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_cyc *wq, struct mlx5_wq_ctrl *wq_ctrl) { + u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); + u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; int err; - mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride), - MLX5_GET(wq, wqc, log_wq_sz), - fbc); - wq->sz = wq->fbc.sz_m1 + 1; - err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } - err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq), + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), &wq_ctrl->buf, param->buf_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); goto err_db_free; } - fbc->frag_buf = wq_ctrl->buf; - wq->db = wq_ctrl->db.db; + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); + wq->sz = mlx5_wq_cyc_get_size(wq); wq_ctrl->mdev = mdev; @@ -108,46 +91,19 @@ err_db_free: return err; } -static void mlx5_qp_set_frag_buf(struct mlx5_frag_buf *buf, - struct mlx5_wq_qp *qp) -{ - struct mlx5_frag_buf_ctrl *sq_fbc; - struct mlx5_frag_buf *rqb, *sqb; - - rqb = &qp->rq.fbc.frag_buf; - *rqb = *buf; - rqb->size = mlx5_wq_cyc_get_byte_size(&qp->rq); - rqb->npages = DIV_ROUND_UP(rqb->size, PAGE_SIZE); - - sq_fbc = &qp->sq.fbc; - sqb = &sq_fbc->frag_buf; - *sqb = *buf; - sqb->size = mlx5_wq_cyc_get_byte_size(&qp->sq); - sqb->npages = DIV_ROUND_UP(sqb->size, PAGE_SIZE); - sqb->frags += rqb->npages; /* first part is for the rq */ - if (sq_fbc->strides_offset) - sqb->frags--; -} - int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *qpc, struct mlx5_wq_qp *wq, struct mlx5_wq_ctrl *wq_ctrl) { - u16 sq_strides_offset; - u32 rq_pg_remainder; - int err; + u8 log_rq_stride = MLX5_GET(qpc, qpc, log_rq_stride) + 4; + u8 log_rq_sz = MLX5_GET(qpc, qpc, log_rq_size); + u8 log_sq_stride = ilog2(MLX5_SEND_WQE_BB); + u8 log_sq_sz = MLX5_GET(qpc, qpc, log_sq_size); - mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4, - MLX5_GET(qpc, qpc, log_rq_size), - &wq->rq.fbc); + u32 rq_byte_size; + int err; - rq_pg_remainder = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE; - sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB; - mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB), - MLX5_GET(qpc, qpc, log_sq_size), - sq_strides_offset, - &wq->sq.fbc); err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { @@ -155,14 +111,32 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, return err; } - err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_qp_get_byte_size(wq), + err = mlx5_frag_buf_alloc_node(mdev, + wq_get_byte_sz(log_rq_sz, log_rq_stride) + + wq_get_byte_sz(log_sq_sz, log_sq_stride), &wq_ctrl->buf, param->buf_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); goto err_db_free; } - mlx5_qp_set_frag_buf(&wq_ctrl->buf, wq); + mlx5_init_fbc(wq_ctrl->buf.frags, log_rq_stride, log_rq_sz, &wq->rq.fbc); + + rq_byte_size = wq_get_byte_sz(log_rq_sz, log_rq_stride); + + if (rq_byte_size < PAGE_SIZE) { + /* SQ starts within the same page of the RQ */ + u16 sq_strides_offset = rq_byte_size / MLX5_SEND_WQE_BB; + + mlx5_init_fbc_offset(wq_ctrl->buf.frags, + log_sq_stride, log_sq_sz, sq_strides_offset, + &wq->sq.fbc); + } else { + u16 rq_npages = rq_byte_size >> PAGE_SHIFT; + + mlx5_init_fbc(wq_ctrl->buf.frags + rq_npages, + log_sq_stride, log_sq_sz, &wq->sq.fbc); + } wq->rq.db = &wq_ctrl->db.db[MLX5_RCV_DBR]; wq->sq.db = &wq_ctrl->db.db[MLX5_SND_DBR]; @@ -181,17 +155,19 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, struct mlx5_wq_ctrl *wq_ctrl) { + u8 log_wq_stride = MLX5_GET(cqc, cqc, cqe_sz) + 6; + u8 log_wq_sz = MLX5_GET(cqc, cqc, log_cq_size); int err; - mlx5_core_init_cq_frag_buf(&wq->fbc, cqc); - err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } - err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), &wq_ctrl->buf, param->buf_numa_node); if (err) { @@ -200,8 +176,7 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, goto err_db_free; } - wq->fbc.frag_buf = wq_ctrl->buf; - wq->db = wq_ctrl->db.db; + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, &wq->fbc); wq_ctrl->mdev = mdev; @@ -217,30 +192,29 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *wqc, struct mlx5_wq_ll *wq, struct mlx5_wq_ctrl *wq_ctrl) { + u8 log_wq_stride = MLX5_GET(wq, wqc, log_wq_stride); + u8 log_wq_sz = MLX5_GET(wq, wqc, log_wq_sz); struct mlx5_frag_buf_ctrl *fbc = &wq->fbc; struct mlx5_wqe_srq_next_seg *next_seg; int err; int i; - mlx5_fill_fbc(MLX5_GET(wq, wqc, log_wq_stride), - MLX5_GET(wq, wqc, log_wq_sz), - fbc); - err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } - err = mlx5_frag_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq), + wq->db = wq_ctrl->db.db; + + err = mlx5_frag_buf_alloc_node(mdev, wq_get_byte_sz(log_wq_sz, log_wq_stride), &wq_ctrl->buf, param->buf_numa_node); if (err) { mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", err); goto err_db_free; } - wq->fbc.frag_buf = wq_ctrl->buf; - wq->db = wq_ctrl->db.db; + mlx5_init_fbc(wq_ctrl->buf.frags, log_wq_stride, log_wq_sz, fbc); for (i = 0; i < fbc->sz_m1; i++) { next_seg = mlx5_wq_ll_get_wqe(wq, i); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 68fa44a41485..1f77e97e2d7a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -27,7 +27,8 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_acl_flex_keys.o \ spectrum1_mr_tcam.o spectrum2_mr_tcam.o \ spectrum_mr_tcam.o spectrum_mr.o \ - spectrum_qdisc.o spectrum_span.o + spectrum_qdisc.o spectrum_span.o \ + spectrum_nve.o spectrum_nve_vxlan.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h index 83f452b7ccbb..bb99f6d41fe0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h +++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h @@ -221,7 +221,7 @@ MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8); MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8); /* pci_eqe_cqn - * Completion Queue that triggeret this EQE. + * Completion Queue that triggered this EQE. */ MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7); diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 6e8b619b769b..32cb6718bb17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -295,6 +295,7 @@ enum mlxsw_reg_sfd_rec_type { MLXSW_REG_SFD_REC_TYPE_UNICAST = 0x0, MLXSW_REG_SFD_REC_TYPE_UNICAST_LAG = 0x1, MLXSW_REG_SFD_REC_TYPE_MULTICAST = 0x2, + MLXSW_REG_SFD_REC_TYPE_UNICAST_TUNNEL = 0xC, }; /* reg_sfd_rec_type @@ -525,6 +526,61 @@ mlxsw_reg_sfd_mc_pack(char *payload, int rec_index, mlxsw_reg_sfd_mc_mid_set(payload, rec_index, mid); } +/* reg_sfd_uc_tunnel_uip_msb + * When protocol is IPv4, the most significant byte of the underlay IPv4 + * destination IP. + * When protocol is IPv6, reserved. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_uip_msb, MLXSW_REG_SFD_BASE_LEN, 24, + 8, MLXSW_REG_SFD_REC_LEN, 0x08, false); + +/* reg_sfd_uc_tunnel_fid + * Filtering ID. + * Access: Index + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_fid, MLXSW_REG_SFD_BASE_LEN, 0, 16, + MLXSW_REG_SFD_REC_LEN, 0x08, false); + +enum mlxsw_reg_sfd_uc_tunnel_protocol { + MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4, + MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV6, +}; + +/* reg_sfd_uc_tunnel_protocol + * IP protocol. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_protocol, MLXSW_REG_SFD_BASE_LEN, 27, + 1, MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +/* reg_sfd_uc_tunnel_uip_lsb + * When protocol is IPv4, the least significant bytes of the underlay + * IPv4 destination IP. + * When protocol is IPv6, pointer to the underlay IPv6 destination IP + * which is configured by RIPS. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, sfd, uc_tunnel_uip_lsb, MLXSW_REG_SFD_BASE_LEN, 0, + 24, MLXSW_REG_SFD_REC_LEN, 0x0C, false); + +static inline void +mlxsw_reg_sfd_uc_tunnel_pack(char *payload, int rec_index, + enum mlxsw_reg_sfd_rec_policy policy, + const char *mac, u16 fid, + enum mlxsw_reg_sfd_rec_action action, u32 uip, + enum mlxsw_reg_sfd_uc_tunnel_protocol proto) +{ + mlxsw_reg_sfd_rec_pack(payload, rec_index, + MLXSW_REG_SFD_REC_TYPE_UNICAST_TUNNEL, mac, + action); + mlxsw_reg_sfd_rec_policy_set(payload, rec_index, policy); + mlxsw_reg_sfd_uc_tunnel_uip_msb_set(payload, rec_index, uip >> 24); + mlxsw_reg_sfd_uc_tunnel_uip_lsb_set(payload, rec_index, uip); + mlxsw_reg_sfd_uc_tunnel_fid_set(payload, rec_index, fid); + mlxsw_reg_sfd_uc_tunnel_protocol_set(payload, rec_index, proto); +} + /* SFN - Switch FDB Notification Register * ------------------------------------------- * The switch provides notifications on newly learned FDB entries and @@ -1069,6 +1125,8 @@ enum mlxsw_reg_sfdf_flush_type { MLXSW_REG_SFDF_FLUSH_PER_PORT_AND_FID, MLXSW_REG_SFDF_FLUSH_PER_LAG, MLXSW_REG_SFDF_FLUSH_PER_LAG_AND_FID, + MLXSW_REG_SFDF_FLUSH_PER_NVE, + MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID, }; /* reg_sfdf_flush_type @@ -1079,6 +1137,10 @@ enum mlxsw_reg_sfdf_flush_type { * 3 - All FID dynamic entries pointing to port are flushed. * 4 - All dynamic entries pointing to LAG are flushed. * 5 - All FID dynamic entries pointing to LAG are flushed. + * 6 - All entries of type "Unicast Tunnel" or "Multicast Tunnel" are + * flushed. + * 7 - All entries of type "Unicast Tunnel" or "Multicast Tunnel" are + * flushed, per FID. * Access: RW */ MLXSW_ITEM32(reg, sfdf, flush_type, 0x04, 28, 4); @@ -1315,12 +1377,19 @@ MLXSW_ITEM32(reg, slcr, type, 0x00, 0, 4); */ MLXSW_ITEM32(reg, slcr, lag_hash, 0x04, 0, 20); -static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) +/* reg_slcr_seed + * LAG seed value. The seed is the same for all ports. + * Access: RW + */ +MLXSW_ITEM32(reg, slcr, seed, 0x08, 0, 32); + +static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash, u32 seed) { MLXSW_REG_ZERO(slcr, payload); mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC); mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); + mlxsw_reg_slcr_seed_set(payload, seed); } /* SLCOR - Switch LAG Collector Register @@ -8279,6 +8348,508 @@ static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, mlxsw_reg_mgpc_opcode_set(payload, opcode); } +/* MPRS - Monitoring Parsing State Register + * ---------------------------------------- + * The MPRS register is used for setting up the parsing for hash, + * policy-engine and routing. + */ +#define MLXSW_REG_MPRS_ID 0x9083 +#define MLXSW_REG_MPRS_LEN 0x14 + +MLXSW_REG_DEFINE(mprs, MLXSW_REG_MPRS_ID, MLXSW_REG_MPRS_LEN); + +/* reg_mprs_parsing_depth + * Minimum parsing depth. + * Need to enlarge parsing depth according to L3, MPLS, tunnels, ACL + * rules, traps, hash, etc. Default is 96 bytes. Reserved when SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, mprs, parsing_depth, 0x00, 0, 16); + +/* reg_mprs_parsing_en + * Parsing enable. + * Bit 0 - Enable parsing of NVE of types VxLAN, VxLAN-GPE, GENEVE and + * NVGRE. Default is enabled. Reserved when SwitchX-2. + * Access: RW + */ +MLXSW_ITEM32(reg, mprs, parsing_en, 0x04, 0, 16); + +/* reg_mprs_vxlan_udp_dport + * VxLAN UDP destination port. + * Used for identifying VxLAN packets and for dport field in + * encapsulation. Default is 4789. + * Access: RW + */ +MLXSW_ITEM32(reg, mprs, vxlan_udp_dport, 0x10, 0, 16); + +static inline void mlxsw_reg_mprs_pack(char *payload, u16 parsing_depth, + u16 vxlan_udp_dport) +{ + MLXSW_REG_ZERO(mprs, payload); + mlxsw_reg_mprs_parsing_depth_set(payload, parsing_depth); + mlxsw_reg_mprs_parsing_en_set(payload, true); + mlxsw_reg_mprs_vxlan_udp_dport_set(payload, vxlan_udp_dport); +} + +/* TNGCR - Tunneling NVE General Configuration Register + * ---------------------------------------------------- + * The TNGCR register is used for setting up the NVE Tunneling configuration. + */ +#define MLXSW_REG_TNGCR_ID 0xA001 +#define MLXSW_REG_TNGCR_LEN 0x44 + +MLXSW_REG_DEFINE(tngcr, MLXSW_REG_TNGCR_ID, MLXSW_REG_TNGCR_LEN); + +enum mlxsw_reg_tngcr_type { + MLXSW_REG_TNGCR_TYPE_VXLAN, + MLXSW_REG_TNGCR_TYPE_VXLAN_GPE, + MLXSW_REG_TNGCR_TYPE_GENEVE, + MLXSW_REG_TNGCR_TYPE_NVGRE, +}; + +/* reg_tngcr_type + * Tunnel type for encapsulation and decapsulation. The types are mutually + * exclusive. + * Note: For Spectrum the NVE parsing must be enabled in MPRS. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, type, 0x00, 0, 4); + +/* reg_tngcr_nve_valid + * The VTEP is valid. Allows adding FDB entries for tunnel encapsulation. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_valid, 0x04, 31, 1); + +/* reg_tngcr_nve_ttl_uc + * The TTL for NVE tunnel encapsulation underlay unicast packets. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_ttl_uc, 0x04, 0, 8); + +/* reg_tngcr_nve_ttl_mc + * The TTL for NVE tunnel encapsulation underlay multicast packets. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_ttl_mc, 0x08, 0, 8); + +enum { + /* Do not copy flow label. Calculate flow label using nve_flh. */ + MLXSW_REG_TNGCR_FL_NO_COPY, + /* Copy flow label from inner packet if packet is IPv6 and + * encapsulation is by IPv6. Otherwise, calculate flow label using + * nve_flh. + */ + MLXSW_REG_TNGCR_FL_COPY, +}; + +/* reg_tngcr_nve_flc + * For NVE tunnel encapsulation: Flow label copy from inner packet. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_flc, 0x0C, 25, 1); + +enum { + /* Flow label is static. In Spectrum this means '0'. Spectrum-2 + * uses {nve_fl_prefix, nve_fl_suffix}. + */ + MLXSW_REG_TNGCR_FL_NO_HASH, + /* 8 LSBs of the flow label are calculated from ECMP hash of the + * inner packet. 12 MSBs are configured by nve_fl_prefix. + */ + MLXSW_REG_TNGCR_FL_HASH, +}; + +/* reg_tngcr_nve_flh + * NVE flow label hash. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_flh, 0x0C, 24, 1); + +/* reg_tngcr_nve_fl_prefix + * NVE flow label prefix. Constant 12 MSBs of the flow label. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_fl_prefix, 0x0C, 8, 12); + +/* reg_tngcr_nve_fl_suffix + * NVE flow label suffix. Constant 8 LSBs of the flow label. + * Reserved when nve_flh=1 and for Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_fl_suffix, 0x0C, 0, 8); + +enum { + /* Source UDP port is fixed (default '0') */ + MLXSW_REG_TNGCR_UDP_SPORT_NO_HASH, + /* Source UDP port is calculated based on hash */ + MLXSW_REG_TNGCR_UDP_SPORT_HASH, +}; + +/* reg_tngcr_nve_udp_sport_type + * NVE UDP source port type. + * Spectrum uses LAG hash (SLCRv2). Spectrum-2 uses ECMP hash (RECRv2). + * When the source UDP port is calculated based on hash, then the 8 LSBs + * are calculated from hash the 8 MSBs are configured by + * nve_udp_sport_prefix. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_udp_sport_type, 0x10, 24, 1); + +/* reg_tngcr_nve_udp_sport_prefix + * NVE UDP source port prefix. Constant 8 MSBs of the UDP source port. + * Reserved when NVE type is NVGRE. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_udp_sport_prefix, 0x10, 8, 8); + +/* reg_tngcr_nve_group_size_mc + * The amount of sequential linked lists of MC entries. The first linked + * list is configured by SFD.underlay_mc_ptr. + * Valid values: 1, 2, 4, 8, 16, 32, 64 + * The linked list are configured by TNUMT. + * The hash is set by LAG hash. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_group_size_mc, 0x18, 0, 8); + +/* reg_tngcr_nve_group_size_flood + * The amount of sequential linked lists of flooding entries. The first + * linked list is configured by SFMR.nve_tunnel_flood_ptr + * Valid values: 1, 2, 4, 8, 16, 32, 64 + * The linked list are configured by TNUMT. + * The hash is set by LAG hash. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, nve_group_size_flood, 0x1C, 0, 8); + +/* reg_tngcr_learn_enable + * During decapsulation, whether to learn from NVE port. + * Reserved when Spectrum-2. See TNPC. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, learn_enable, 0x20, 31, 1); + +/* reg_tngcr_underlay_virtual_router + * Underlay virtual router. + * Reserved when Spectrum-2. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, underlay_virtual_router, 0x20, 0, 16); + +/* reg_tngcr_underlay_rif + * Underlay ingress router interface. RIF type should be loopback generic. + * Reserved when Spectrum. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, underlay_rif, 0x24, 0, 16); + +/* reg_tngcr_usipv4 + * Underlay source IPv4 address of the NVE. + * Access: RW + */ +MLXSW_ITEM32(reg, tngcr, usipv4, 0x28, 0, 32); + +/* reg_tngcr_usipv6 + * Underlay source IPv6 address of the NVE. For Spectrum, must not be + * modified under traffic of NVE tunneling encapsulation. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, tngcr, usipv6, 0x30, 16); + +static inline void mlxsw_reg_tngcr_pack(char *payload, + enum mlxsw_reg_tngcr_type type, + bool valid, u8 ttl) +{ + MLXSW_REG_ZERO(tngcr, payload); + mlxsw_reg_tngcr_type_set(payload, type); + mlxsw_reg_tngcr_nve_valid_set(payload, valid); + mlxsw_reg_tngcr_nve_ttl_uc_set(payload, ttl); + mlxsw_reg_tngcr_nve_ttl_mc_set(payload, ttl); + mlxsw_reg_tngcr_nve_flc_set(payload, MLXSW_REG_TNGCR_FL_NO_COPY); + mlxsw_reg_tngcr_nve_flh_set(payload, 0); + mlxsw_reg_tngcr_nve_udp_sport_type_set(payload, + MLXSW_REG_TNGCR_UDP_SPORT_HASH); + mlxsw_reg_tngcr_nve_udp_sport_prefix_set(payload, 0); + mlxsw_reg_tngcr_nve_group_size_mc_set(payload, 1); + mlxsw_reg_tngcr_nve_group_size_flood_set(payload, 1); +} + +/* TNUMT - Tunneling NVE Underlay Multicast Table Register + * ------------------------------------------------------- + * The TNUMT register is for building the underlay MC table. It is used + * for MC, flooding and BC traffic into the NVE tunnel. + */ +#define MLXSW_REG_TNUMT_ID 0xA003 +#define MLXSW_REG_TNUMT_LEN 0x20 + +MLXSW_REG_DEFINE(tnumt, MLXSW_REG_TNUMT_ID, MLXSW_REG_TNUMT_LEN); + +enum mlxsw_reg_tnumt_record_type { + MLXSW_REG_TNUMT_RECORD_TYPE_IPV4, + MLXSW_REG_TNUMT_RECORD_TYPE_IPV6, + MLXSW_REG_TNUMT_RECORD_TYPE_LABEL, +}; + +/* reg_tnumt_record_type + * Record type. + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, record_type, 0x00, 28, 4); + +enum mlxsw_reg_tnumt_tunnel_port { + MLXSW_REG_TNUMT_TUNNEL_PORT_NVE, + MLXSW_REG_TNUMT_TUNNEL_PORT_VPLS, + MLXSW_REG_TNUMT_TUNNEL_FLEX_TUNNEL0, + MLXSW_REG_TNUMT_TUNNEL_FLEX_TUNNEL1, +}; + +/* reg_tnumt_tunnel_port + * Tunnel port. + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, tunnel_port, 0x00, 24, 4); + +/* reg_tnumt_underlay_mc_ptr + * Index to the underlay multicast table. + * For Spectrum the index is to the KVD linear. + * Access: Index + */ +MLXSW_ITEM32(reg, tnumt, underlay_mc_ptr, 0x00, 0, 24); + +/* reg_tnumt_vnext + * The next_underlay_mc_ptr is valid. + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, vnext, 0x04, 31, 1); + +/* reg_tnumt_next_underlay_mc_ptr + * The next index to the underlay multicast table. + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, next_underlay_mc_ptr, 0x04, 0, 24); + +/* reg_tnumt_record_size + * Number of IP addresses in the record. + * Range is 1..cap_max_nve_mc_entries_ipv{4,6} + * Access: RW + */ +MLXSW_ITEM32(reg, tnumt, record_size, 0x08, 0, 3); + +/* reg_tnumt_udip + * The underlay IPv4 addresses. udip[i] is reserved if i >= size + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, tnumt, udip, 0x0C, 0, 32, 0x04, 0x00, false); + +/* reg_tnumt_udip_ptr + * The pointer to the underlay IPv6 addresses. udip_ptr[i] is reserved if + * i >= size. The IPv6 addresses are configured by RIPS. + * Access: RW + */ +MLXSW_ITEM32_INDEXED(reg, tnumt, udip_ptr, 0x0C, 0, 24, 0x04, 0x00, false); + +static inline void mlxsw_reg_tnumt_pack(char *payload, + enum mlxsw_reg_tnumt_record_type type, + enum mlxsw_reg_tnumt_tunnel_port tport, + u32 underlay_mc_ptr, bool vnext, + u32 next_underlay_mc_ptr, + u8 record_size) +{ + MLXSW_REG_ZERO(tnumt, payload); + mlxsw_reg_tnumt_record_type_set(payload, type); + mlxsw_reg_tnumt_tunnel_port_set(payload, tport); + mlxsw_reg_tnumt_underlay_mc_ptr_set(payload, underlay_mc_ptr); + mlxsw_reg_tnumt_vnext_set(payload, vnext); + mlxsw_reg_tnumt_next_underlay_mc_ptr_set(payload, next_underlay_mc_ptr); + mlxsw_reg_tnumt_record_size_set(payload, record_size); +} + +/* TNQCR - Tunneling NVE QoS Configuration Register + * ------------------------------------------------ + * The TNQCR register configures how QoS is set in encapsulation into the + * underlay network. + */ +#define MLXSW_REG_TNQCR_ID 0xA010 +#define MLXSW_REG_TNQCR_LEN 0x0C + +MLXSW_REG_DEFINE(tnqcr, MLXSW_REG_TNQCR_ID, MLXSW_REG_TNQCR_LEN); + +/* reg_tnqcr_enc_set_dscp + * For encapsulation: How to set DSCP field: + * 0 - Copy the DSCP from the overlay (inner) IP header to the underlay + * (outer) IP header. If there is no IP header, use TNQDR.dscp + * 1 - Set the DSCP field as TNQDR.dscp + * Access: RW + */ +MLXSW_ITEM32(reg, tnqcr, enc_set_dscp, 0x04, 28, 1); + +static inline void mlxsw_reg_tnqcr_pack(char *payload) +{ + MLXSW_REG_ZERO(tnqcr, payload); + mlxsw_reg_tnqcr_enc_set_dscp_set(payload, 0); +} + +/* TNQDR - Tunneling NVE QoS Default Register + * ------------------------------------------ + * The TNQDR register configures the default QoS settings for NVE + * encapsulation. + */ +#define MLXSW_REG_TNQDR_ID 0xA011 +#define MLXSW_REG_TNQDR_LEN 0x08 + +MLXSW_REG_DEFINE(tnqdr, MLXSW_REG_TNQDR_ID, MLXSW_REG_TNQDR_LEN); + +/* reg_tnqdr_local_port + * Local port number (receive port). CPU port is supported. + * Access: Index + */ +MLXSW_ITEM32(reg, tnqdr, local_port, 0x00, 16, 8); + +/* reg_tnqdr_dscp + * For encapsulation, the default DSCP. + * Access: RW + */ +MLXSW_ITEM32(reg, tnqdr, dscp, 0x04, 0, 6); + +static inline void mlxsw_reg_tnqdr_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(tnqdr, payload); + mlxsw_reg_tnqdr_local_port_set(payload, local_port); + mlxsw_reg_tnqdr_dscp_set(payload, 0); +} + +/* TNEEM - Tunneling NVE Encapsulation ECN Mapping Register + * -------------------------------------------------------- + * The TNEEM register maps ECN of the IP header at the ingress to the + * encapsulation to the ECN of the underlay network. + */ +#define MLXSW_REG_TNEEM_ID 0xA012 +#define MLXSW_REG_TNEEM_LEN 0x0C + +MLXSW_REG_DEFINE(tneem, MLXSW_REG_TNEEM_ID, MLXSW_REG_TNEEM_LEN); + +/* reg_tneem_overlay_ecn + * ECN of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tneem, overlay_ecn, 0x04, 24, 2); + +/* reg_tneem_underlay_ecn + * ECN of the IP header in the underlay network. + * Access: RW + */ +MLXSW_ITEM32(reg, tneem, underlay_ecn, 0x04, 16, 2); + +static inline void mlxsw_reg_tneem_pack(char *payload, u8 overlay_ecn, + u8 underlay_ecn) +{ + MLXSW_REG_ZERO(tneem, payload); + mlxsw_reg_tneem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tneem_underlay_ecn_set(payload, underlay_ecn); +} + +/* TNDEM - Tunneling NVE Decapsulation ECN Mapping Register + * -------------------------------------------------------- + * The TNDEM register configures the actions that are done in the + * decapsulation. + */ +#define MLXSW_REG_TNDEM_ID 0xA013 +#define MLXSW_REG_TNDEM_LEN 0x0C + +MLXSW_REG_DEFINE(tndem, MLXSW_REG_TNDEM_ID, MLXSW_REG_TNDEM_LEN); + +/* reg_tndem_underlay_ecn + * ECN field of the IP header in the underlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tndem, underlay_ecn, 0x04, 24, 2); + +/* reg_tndem_overlay_ecn + * ECN field of the IP header in the overlay network. + * Access: Index + */ +MLXSW_ITEM32(reg, tndem, overlay_ecn, 0x04, 16, 2); + +/* reg_tndem_eip_ecn + * Egress IP ECN. ECN field of the IP header of the packet which goes out + * from the decapsulation. + * Access: RW + */ +MLXSW_ITEM32(reg, tndem, eip_ecn, 0x04, 8, 2); + +/* reg_tndem_trap_en + * Trap enable: + * 0 - No trap due to decap ECN + * 1 - Trap enable with trap_id + * Access: RW + */ +MLXSW_ITEM32(reg, tndem, trap_en, 0x08, 28, 4); + +/* reg_tndem_trap_id + * Trap ID. Either DECAP_ECN0 or DECAP_ECN1. + * Reserved when trap_en is '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, tndem, trap_id, 0x08, 0, 9); + +static inline void mlxsw_reg_tndem_pack(char *payload, u8 underlay_ecn, + u8 overlay_ecn, u8 ecn, bool trap_en, + u16 trap_id) +{ + MLXSW_REG_ZERO(tndem, payload); + mlxsw_reg_tndem_underlay_ecn_set(payload, underlay_ecn); + mlxsw_reg_tndem_overlay_ecn_set(payload, overlay_ecn); + mlxsw_reg_tndem_eip_ecn_set(payload, ecn); + mlxsw_reg_tndem_trap_en_set(payload, trap_en); + mlxsw_reg_tndem_trap_id_set(payload, trap_id); +} + +/* TNPC - Tunnel Port Configuration Register + * ----------------------------------------- + * The TNPC register is used for tunnel port configuration. + * Reserved when Spectrum. + */ +#define MLXSW_REG_TNPC_ID 0xA020 +#define MLXSW_REG_TNPC_LEN 0x18 + +MLXSW_REG_DEFINE(tnpc, MLXSW_REG_TNPC_ID, MLXSW_REG_TNPC_LEN); + +enum mlxsw_reg_tnpc_tunnel_port { + MLXSW_REG_TNPC_TUNNEL_PORT_NVE, + MLXSW_REG_TNPC_TUNNEL_PORT_VPLS, + MLXSW_REG_TNPC_TUNNEL_FLEX_TUNNEL0, + MLXSW_REG_TNPC_TUNNEL_FLEX_TUNNEL1, +}; + +/* reg_tnpc_tunnel_port + * Tunnel port. + * Access: Index + */ +MLXSW_ITEM32(reg, tnpc, tunnel_port, 0x00, 0, 4); + +/* reg_tnpc_learn_enable_v6 + * During IPv6 underlay decapsulation, whether to learn from tunnel port. + * Access: RW + */ +MLXSW_ITEM32(reg, tnpc, learn_enable_v6, 0x04, 1, 1); + +/* reg_tnpc_learn_enable_v4 + * During IPv4 underlay decapsulation, whether to learn from tunnel port. + * Access: RW + */ +MLXSW_ITEM32(reg, tnpc, learn_enable_v4, 0x04, 0, 1); + +static inline void mlxsw_reg_tnpc_pack(char *payload, + enum mlxsw_reg_tnpc_tunnel_port tport, + bool learn_enable) +{ + MLXSW_REG_ZERO(tnpc, payload); + mlxsw_reg_tnpc_tunnel_port_set(payload, tport); + mlxsw_reg_tnpc_learn_enable_v4_set(payload, learn_enable); + mlxsw_reg_tnpc_learn_enable_v6_set(payload, learn_enable); +} + /* TIGCR - Tunneling IPinIP General Configuration Register * ------------------------------------------------------- * The TIGCR register is used for setting up the IPinIP Tunnel configuration. @@ -8336,8 +8907,15 @@ MLXSW_ITEM32(reg, sbpr, dir, 0x00, 24, 2); */ MLXSW_ITEM32(reg, sbpr, pool, 0x00, 0, 4); +/* reg_sbpr_infi_size + * Size is infinite. + * Access: RW + */ +MLXSW_ITEM32(reg, sbpr, infi_size, 0x04, 31, 1); + /* reg_sbpr_size * Pool size in buffer cells. + * Reserved when infi_size = 1. * Access: RW */ MLXSW_ITEM32(reg, sbpr, size, 0x04, 0, 24); @@ -8355,13 +8933,15 @@ MLXSW_ITEM32(reg, sbpr, mode, 0x08, 0, 4); static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool, enum mlxsw_reg_sbxx_dir dir, - enum mlxsw_reg_sbpr_mode mode, u32 size) + enum mlxsw_reg_sbpr_mode mode, u32 size, + bool infi_size) { MLXSW_REG_ZERO(sbpr, payload); mlxsw_reg_sbpr_pool_set(payload, pool); mlxsw_reg_sbpr_dir_set(payload, dir); mlxsw_reg_sbpr_mode_set(payload, mode); mlxsw_reg_sbpr_size_set(payload, size); + mlxsw_reg_sbpr_infi_size_set(payload, infi_size); } /* SBCM - Shared Buffer Class Management Register @@ -8409,6 +8989,12 @@ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24); #define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1 #define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14 +/* reg_sbcm_infi_max + * Max buffer is infinite. + * Access: RW + */ +MLXSW_ITEM32(reg, sbcm, infi_max, 0x1C, 31, 1); + /* reg_sbcm_max_buff * When the pool associated to the port-pg/tclass is configured to * static, Maximum buffer size for the limiter configured in cells. @@ -8418,6 +9004,7 @@ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24); * 0: 0 * i: (1/128)*2^(i-1), for i=1..14 * 0xFF: Infinity + * Reserved when infi_max = 1. * Access: RW */ MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24); @@ -8430,7 +9017,8 @@ MLXSW_ITEM32(reg, sbcm, pool, 0x24, 0, 4); static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, enum mlxsw_reg_sbxx_dir dir, - u32 min_buff, u32 max_buff, u8 pool) + u32 min_buff, u32 max_buff, + bool infi_max, u8 pool) { MLXSW_REG_ZERO(sbcm, payload); mlxsw_reg_sbcm_local_port_set(payload, local_port); @@ -8438,6 +9026,7 @@ static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, mlxsw_reg_sbcm_dir_set(payload, dir); mlxsw_reg_sbcm_min_buff_set(payload, min_buff); mlxsw_reg_sbcm_max_buff_set(payload, max_buff); + mlxsw_reg_sbcm_infi_max_set(payload, infi_max); mlxsw_reg_sbcm_pool_set(payload, pool); } @@ -8810,6 +9399,14 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mcc), MLXSW_REG(mcda), MLXSW_REG(mgpc), + MLXSW_REG(mprs), + MLXSW_REG(tngcr), + MLXSW_REG(tnumt), + MLXSW_REG(tnqcr), + MLXSW_REG(tnqdr), + MLXSW_REG(tneem), + MLXSW_REG(tndem), + MLXSW_REG(tnpc), MLXSW_REG(tigcr), MLXSW_REG(sbpr), MLXSW_REG(sbcm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index 79a31de7c825..99b341539870 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -46,6 +46,8 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_RIFS, MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES, MLXSW_RES_ID_MAX_LPM_TREES, + MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4, + MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6, /* Internal resources. * Determined by the SW, not queried from the HW. @@ -96,6 +98,8 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, [MLXSW_RES_ID_MC_ERIF_LIST_ENTRIES] = 0x2C10, [MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30, + [MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV4] = 0x2E02, + [MLXSW_RES_ID_MAX_NVE_MC_ENTRIES_IPV6] = 0x2E03, }; struct mlxsw_res { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 30bb2c533cec..8a4983adae94 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -21,6 +21,7 @@ #include <linux/dcbnl.h> #include <linux/inetdevice.h> #include <linux/netlink.h> +#include <linux/random.h> #include <net/switchdev.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mirred.h> @@ -331,7 +332,10 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp) return -EINVAL; } if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) == - MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor)) + MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) && + (rev->minor > req_rev->minor || + (rev->minor == req_rev->minor && + rev->subminor >= req_rev->subminor))) return 0; dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n", @@ -2804,6 +2808,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) MLXSW_REG_QEEC_MAS_DIS); if (err) return err; + + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, + i + 8, i, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; } /* Map all priorities to traffic class 0. */ @@ -2983,6 +2994,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_qdiscs_init; } + err = mlxsw_sp_port_nve_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize NVE\n", + mlxsw_sp_port->local_port); + goto err_port_nve_init; + } + mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_get(mlxsw_sp_port, 1); if (IS_ERR(mlxsw_sp_port_vlan)) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create VID 1\n", @@ -3011,6 +3029,8 @@ err_register_netdev: mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_vlan_put(mlxsw_sp_port_vlan); err_port_vlan_get: + mlxsw_sp_port_nve_fini(mlxsw_sp_port); +err_port_nve_init: mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); err_port_qdiscs_init: mlxsw_sp_port_fids_fini(mlxsw_sp_port); @@ -3050,6 +3070,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_vlan_flush(mlxsw_sp_port); + mlxsw_sp_port_nve_fini(mlxsw_sp_port); mlxsw_sp_tc_qdisc_fini(mlxsw_sp_port); mlxsw_sp_port_fids_fini(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); @@ -3459,6 +3480,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false), /* PKT Sample trap */ @@ -3472,6 +3494,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(RPF, TRAP_TO_CPU, RPF, false), MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false), MLXSW_SP_RXL_MR_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false), + /* NVE traps */ + MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false), }; static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) @@ -3656,8 +3680,10 @@ static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) { char slcr_pl[MLXSW_REG_SLCR_LEN]; + u32 seed; int err; + get_random_bytes(&seed, sizeof(seed)); mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | MLXSW_REG_SLCR_LAG_HASH_DMAC | MLXSW_REG_SLCR_LAG_HASH_ETHERTYPE | @@ -3666,7 +3692,7 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) MLXSW_REG_SLCR_LAG_HASH_DIP | MLXSW_REG_SLCR_LAG_HASH_SPORT | MLXSW_REG_SLCR_LAG_HASH_DPORT | - MLXSW_REG_SLCR_LAG_HASH_IPPROTO); + MLXSW_REG_SLCR_LAG_HASH_IPPROTO, seed); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); if (err) return err; @@ -3779,6 +3805,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_afa_init; } + err = mlxsw_sp_nve_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize NVE\n"); + goto err_nve_init; + } + err = mlxsw_sp_router_init(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to initialize router\n"); @@ -3825,6 +3857,8 @@ err_acl_init: err_netdev_notifier: mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + mlxsw_sp_nve_fini(mlxsw_sp); +err_nve_init: mlxsw_sp_afa_fini(mlxsw_sp); err_afa_init: mlxsw_sp_counter_pool_fini(mlxsw_sp); @@ -3857,6 +3891,7 @@ static int mlxsw_sp1_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->afk_ops = &mlxsw_sp1_afk_ops; mlxsw_sp->mr_tcam_ops = &mlxsw_sp1_mr_tcam_ops; mlxsw_sp->acl_tcam_ops = &mlxsw_sp1_acl_tcam_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp1_nve_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -3871,6 +3906,7 @@ static int mlxsw_sp2_init(struct mlxsw_core *mlxsw_core, mlxsw_sp->afk_ops = &mlxsw_sp2_afk_ops; mlxsw_sp->mr_tcam_ops = &mlxsw_sp2_mr_tcam_ops; mlxsw_sp->acl_tcam_ops = &mlxsw_sp2_acl_tcam_ops; + mlxsw_sp->nve_ops_arr = mlxsw_sp2_nve_ops_arr; return mlxsw_sp_init(mlxsw_core, mlxsw_bus_info); } @@ -3884,6 +3920,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_acl_fini(mlxsw_sp); unregister_netdevice_notifier(&mlxsw_sp->netdevice_nb); mlxsw_sp_router_fini(mlxsw_sp); + mlxsw_sp_nve_fini(mlxsw_sp); mlxsw_sp_afa_fini(mlxsw_sp); mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); @@ -4550,6 +4587,41 @@ static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_sp_port_vp_mode_set(mlxsw_sp_port, false); } +static bool mlxsw_sp_bridge_has_multiple_vxlans(struct net_device *br_dev) +{ + unsigned int num_vxlans = 0; + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev)) + num_vxlans++; + } + + return num_vxlans > 1; +} + +static bool mlxsw_sp_bridge_vxlan_is_valid(struct net_device *br_dev, + struct netlink_ext_ack *extack) +{ + if (br_multicast_enabled(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multicast can not be enabled on a bridge with a VxLAN device"); + return false; + } + + if (br_vlan_enabled(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "VLAN filtering can not be enabled on a bridge with a VxLAN device"); + return false; + } + + if (mlxsw_sp_bridge_has_multiple_vxlans(br_dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multiple VxLAN devices are not supported in a VLAN-unaware bridge"); + return false; + } + + return true; +} + static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, struct net_device *dev, unsigned long event, void *ptr) @@ -4579,6 +4651,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *lower_dev, } if (!info->linking) break; + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) && + mlxsw_sp_bridge_has_vxlan(upper_dev) && + !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; if (netdev_has_any_upper_dev(upper_dev) && (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, @@ -4736,6 +4813,11 @@ static int mlxsw_sp_netdevice_port_vlan_event(struct net_device *vlan_dev, } if (!info->linking) break; + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, upper_dev) && + mlxsw_sp_bridge_has_vxlan(upper_dev) && + !mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; if (netdev_has_any_upper_dev(upper_dev) && (!netif_is_bridge_master(upper_dev) || !mlxsw_sp_bridge_device_is_offloaded(mlxsw_sp, @@ -4882,6 +4964,63 @@ static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) return netif_is_l3_master(info->upper_dev); } +static int mlxsw_sp_netdevice_vxlan_event(struct mlxsw_sp *mlxsw_sp, + struct net_device *dev, + unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *cu_info; + struct netdev_notifier_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *upper_dev; + + extack = netdev_notifier_info_to_extack(info); + + switch (event) { + case NETDEV_CHANGEUPPER: + cu_info = container_of(info, + struct netdev_notifier_changeupper_info, + info); + upper_dev = cu_info->upper_dev; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + if (!mlxsw_sp_bridge_vxlan_is_valid(upper_dev, extack)) + return -EOPNOTSUPP; + if (cu_info->linking) { + if (!netif_running(dev)) + return 0; + return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, + dev, extack); + } else { + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev); + } + break; + case NETDEV_PRE_UP: + upper_dev = netdev_master_upper_dev_get(dev); + if (!upper_dev) + return 0; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + return mlxsw_sp_bridge_vxlan_join(mlxsw_sp, upper_dev, dev, + extack); + case NETDEV_DOWN: + upper_dev = netdev_master_upper_dev_get(dev); + if (!upper_dev) + return 0; + if (!netif_is_bridge_master(upper_dev)) + return 0; + if (!mlxsw_sp_lower_get(upper_dev)) + return 0; + mlxsw_sp_bridge_vxlan_leave(mlxsw_sp, upper_dev, dev); + break; + } + + return 0; +} + static int mlxsw_sp_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -4898,6 +5037,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *nb, } mlxsw_sp_span_respin(mlxsw_sp); + if (netif_is_vxlan(dev)) + err = mlxsw_sp_netdevice_vxlan_event(mlxsw_sp, dev, event, ptr); if (mlxsw_sp_netdev_is_ipip_ol(mlxsw_sp, dev)) err = mlxsw_sp_netdevice_ipip_ol_event(mlxsw_sp, dev, event, ptr); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 3cdb7aca90b7..0875a79cbe7b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -16,6 +16,7 @@ #include <net/psample.h> #include <net/pkt_cls.h> #include <net/red.h> +#include <net/vxlan.h> #include "port.h" #include "core.h" @@ -55,6 +56,8 @@ enum mlxsw_sp_resource_id { struct mlxsw_sp_port; struct mlxsw_sp_rif; struct mlxsw_sp_span_entry; +enum mlxsw_sp_l3proto; +union mlxsw_sp_l3addr; struct mlxsw_sp_upper { struct net_device *dev; @@ -113,9 +116,11 @@ struct mlxsw_sp_acl; struct mlxsw_sp_counter_pool; struct mlxsw_sp_fid_core; struct mlxsw_sp_kvdl; +struct mlxsw_sp_nve; struct mlxsw_sp_kvdl_ops; struct mlxsw_sp_mr_tcam_ops; struct mlxsw_sp_acl_tcam_ops; +struct mlxsw_sp_nve_ops; struct mlxsw_sp { struct mlxsw_sp_port **ports; @@ -132,6 +137,7 @@ struct mlxsw_sp { struct mlxsw_sp_acl *acl; struct mlxsw_sp_fid_core *fid_core; struct mlxsw_sp_kvdl *kvdl; + struct mlxsw_sp_nve *nve; struct notifier_block netdevice_nb; struct mlxsw_sp_counter_pool *counter_pool; @@ -146,6 +152,7 @@ struct mlxsw_sp { const struct mlxsw_afk_ops *afk_ops; const struct mlxsw_sp_mr_tcam_ops *mr_tcam_ops; const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops; + const struct mlxsw_sp_nve_ops **nve_ops_arr; }; static inline struct mlxsw_sp_upper * @@ -235,6 +242,25 @@ struct mlxsw_sp_port { struct mlxsw_sp_acl_block *eg_acl_block; }; +static inline struct net_device * +mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev) +{ + struct net_device *dev; + struct list_head *iter; + + netdev_for_each_lower_dev(br_dev, dev, iter) { + if (netif_is_vxlan(dev)) + return dev; + } + + return NULL; +} + +static inline bool mlxsw_sp_bridge_has_vxlan(struct net_device *br_dev) +{ + return !!mlxsw_sp_bridge_vxlan_dev_find(br_dev); +} + static inline bool mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) { @@ -330,6 +356,13 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *br_dev); bool mlxsw_sp_bridge_device_is_offloaded(const struct mlxsw_sp *mlxsw_sp, const struct net_device *br_dev); +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack); +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev); /* spectrum.c */ int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, @@ -383,6 +416,17 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) #endif /* spectrum_router.c */ +enum mlxsw_sp_l3proto { + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_L3_PROTO_IPV6, +#define MLXSW_SP_L3_PROTO_MAX (MLXSW_SP_L3_PROTO_IPV6 + 1) +}; + +union mlxsw_sp_l3addr { + __be32 addr4; + struct in6_addr addr6; +}; + int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); @@ -416,6 +460,19 @@ mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, struct net_device *dev); +struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); +u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); +struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip, + u32 tunnel_index); +void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip); +int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + u16 *vr_id); /* spectrum_kvdl.c */ enum mlxsw_sp_kvdl_entry_type { @@ -423,6 +480,7 @@ enum mlxsw_sp_kvdl_entry_type { MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET, MLXSW_SP_KVDL_ENTRY_TYPE_PBS, MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR, + MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, }; static inline unsigned int @@ -433,6 +491,7 @@ mlxsw_sp_kvdl_entry_size(enum mlxsw_sp_kvdl_entry_type type) case MLXSW_SP_KVDL_ENTRY_TYPE_ACTSET: /* fall through */ case MLXSW_SP_KVDL_ENTRY_TYPE_PBS: /* fall through */ case MLXSW_SP_KVDL_ENTRY_TYPE_MCRIGR: /* fall through */ + case MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT: /* fall through */ default: return 1; } @@ -662,6 +721,16 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_prio_qopt_offload *p); /* spectrum_fid.c */ +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, + __be32 vni); +int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni); +int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index); +void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid); +bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid); +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni); +void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid); +bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid); int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type, u8 local_port, bool member); @@ -680,6 +749,8 @@ u16 mlxsw_sp_fid_8021q_vid(const struct mlxsw_sp_fid *fid); struct mlxsw_sp_fid *mlxsw_sp_fid_8021q_get(struct mlxsw_sp *mlxsw_sp, u16 vid); struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, int br_ifindex); +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, + int br_ifindex); struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, u16 rif_index); struct mlxsw_sp_fid *mlxsw_sp_fid_dummy_get(struct mlxsw_sp *mlxsw_sp); @@ -725,4 +796,39 @@ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp1_mr_tcam_ops; /* spectrum2_mr_tcam.c */ extern const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops; +/* spectrum_nve.c */ +enum mlxsw_sp_nve_type { + MLXSW_SP_NVE_TYPE_VXLAN, +}; + +struct mlxsw_sp_nve_params { + enum mlxsw_sp_nve_type type; + __be32 vni; + const struct net_device *dev; +}; + +extern const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[]; +extern const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[]; + +int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); +void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr); +u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp); +bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, + u32 tb_id, __be32 addr); +int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack); +void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid); +int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp); + #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c index 68c8b148bef2..8d14770766b4 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_kvdl.c @@ -35,6 +35,7 @@ static const struct mlxsw_sp2_kvdl_part_info mlxsw_sp2_kvdl_parts_info[] = { MAX_KVD_ACTION_SETS), MLXSW_SP2_KVDL_PART_INFO(PBS, 0x24, KVD_SIZE, KVD_SIZE), MLXSW_SP2_KVDL_PART_INFO(MCRIGR, 0x26, KVD_SIZE, KVD_SIZE), + MLXSW_SP2_KVDL_PART_INFO(TNUMT, 0x29, KVD_SIZE, KVD_SIZE), }; #define MLXSW_SP2_KVDL_PARTS_INFO_LEN ARRAY_SIZE(mlxsw_sp2_kvdl_parts_info) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 3589432d1643..12c61e0cc570 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -25,28 +25,52 @@ struct mlxsw_cp_sb_occ { struct mlxsw_sp_sb_cm { u32 min_buff; u32 max_buff; - u8 pool; + u16 pool_index; struct mlxsw_cp_sb_occ occ; }; +#define MLXSW_SP_SB_INFI -1U + struct mlxsw_sp_sb_pm { u32 min_buff; u32 max_buff; struct mlxsw_cp_sb_occ occ; }; -#define MLXSW_SP_SB_POOL_COUNT 4 -#define MLXSW_SP_SB_TC_COUNT 8 +struct mlxsw_sp_sb_pool_des { + enum mlxsw_reg_sbxx_dir dir; + u8 pool; +}; + +/* Order ingress pools before egress pools. */ +static const struct mlxsw_sp_sb_pool_des mlxsw_sp_sb_pool_dess[] = { + {MLXSW_REG_SBXX_DIR_INGRESS, 0}, + {MLXSW_REG_SBXX_DIR_INGRESS, 1}, + {MLXSW_REG_SBXX_DIR_INGRESS, 2}, + {MLXSW_REG_SBXX_DIR_INGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 0}, + {MLXSW_REG_SBXX_DIR_EGRESS, 1}, + {MLXSW_REG_SBXX_DIR_EGRESS, 2}, + {MLXSW_REG_SBXX_DIR_EGRESS, 3}, + {MLXSW_REG_SBXX_DIR_EGRESS, 15}, +}; + +#define MLXSW_SP_SB_POOL_DESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pool_dess) + +#define MLXSW_SP_SB_ING_TC_COUNT 8 +#define MLXSW_SP_SB_EG_TC_COUNT 16 struct mlxsw_sp_sb_port { - struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; - struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; + struct mlxsw_sp_sb_cm ing_cms[MLXSW_SP_SB_ING_TC_COUNT]; + struct mlxsw_sp_sb_cm eg_cms[MLXSW_SP_SB_EG_TC_COUNT]; + struct mlxsw_sp_sb_pm pms[MLXSW_SP_SB_POOL_DESS_LEN]; }; struct mlxsw_sp_sb { - struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT]; + struct mlxsw_sp_sb_pr prs[MLXSW_SP_SB_POOL_DESS_LEN]; struct mlxsw_sp_sb_port *ports; u32 cell_size; + u64 sb_size; }; u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, u32 cells) @@ -60,95 +84,122 @@ u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, u32 bytes) } static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, - u8 pool, - enum mlxsw_reg_sbxx_dir dir) + u16 pool_index) { - return &mlxsw_sp->sb->prs[dir][pool]; + return &mlxsw_sp->sb->prs[pool_index]; +} + +static bool mlxsw_sp_sb_cm_exists(u8 pg_buff, enum mlxsw_reg_sbxx_dir dir) +{ + if (dir == MLXSW_REG_SBXX_DIR_INGRESS) + return pg_buff < MLXSW_SP_SB_ING_TC_COUNT; + else + return pg_buff < MLXSW_SP_SB_EG_TC_COUNT; } static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 pg_buff, enum mlxsw_reg_sbxx_dir dir) { - return &mlxsw_sp->sb->ports[local_port].cms[dir][pg_buff]; + struct mlxsw_sp_sb_port *sb_port = &mlxsw_sp->sb->ports[local_port]; + + WARN_ON(!mlxsw_sp_sb_cm_exists(pg_buff, dir)); + if (dir == MLXSW_REG_SBXX_DIR_INGRESS) + return &sb_port->ing_cms[pg_buff]; + else + return &sb_port->eg_cms[pg_buff]; } static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 pool, - enum mlxsw_reg_sbxx_dir dir) + u8 local_port, u16 pool_index) { - return &mlxsw_sp->sb->ports[local_port].pms[dir][pool]; + return &mlxsw_sp->sb->ports[local_port].pms[pool_index]; } -static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool, - enum mlxsw_reg_sbxx_dir dir, - enum mlxsw_reg_sbpr_mode mode, u32 size) +static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u16 pool_index, + enum mlxsw_reg_sbpr_mode mode, + u32 size, bool infi_size) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbpr_pl[MLXSW_REG_SBPR_LEN]; struct mlxsw_sp_sb_pr *pr; int err; - mlxsw_reg_sbpr_pack(sbpr_pl, pool, dir, mode, size); + mlxsw_reg_sbpr_pack(sbpr_pl, des->pool, des->dir, mode, + size, infi_size); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); if (err) return err; - pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + if (infi_size) + size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp->sb->sb_size); + pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); pr->mode = mode; pr->size = size; return 0; } static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u8 pg_buff, enum mlxsw_reg_sbxx_dir dir, - u32 min_buff, u32 max_buff, u8 pool) + u8 pg_buff, u32 min_buff, u32 max_buff, + bool infi_max, u16 pool_index) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbcm_pl[MLXSW_REG_SBCM_LEN]; + struct mlxsw_sp_sb_cm *cm; int err; - mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, dir, - min_buff, max_buff, pool); + mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, des->dir, + min_buff, max_buff, infi_max, des->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); if (err) return err; - if (pg_buff < MLXSW_SP_SB_TC_COUNT) { - struct mlxsw_sp_sb_cm *cm; - cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); + if (mlxsw_sp_sb_cm_exists(pg_buff, des->dir)) { + if (infi_max) + max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, + mlxsw_sp->sb->sb_size); + + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, + des->dir); cm->min_buff = min_buff; cm->max_buff = max_buff; - cm->pool = pool; + cm->pool_index = pool_index; } return 0; } static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u8 pool, enum mlxsw_reg_sbxx_dir dir, - u32 min_buff, u32 max_buff) + u16 pool_index, u32 min_buff, u32 max_buff) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; int err; - mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, false, min_buff, max_buff); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl); if (err) return err; - pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index); pm->min_buff = min_buff; pm->max_buff = max_buff; return 0; } static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u8 pool, enum mlxsw_reg_sbxx_dir dir, - struct list_head *bulk_list) + u16 pool_index, struct list_head *bulk_list) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; - mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, true, 0, 0); + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, + true, 0, 0); return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, bulk_list, NULL, 0); } @@ -163,14 +214,16 @@ static void mlxsw_sp_sb_pm_occ_query_cb(struct mlxsw_core *mlxsw_core, } static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, - u8 pool, enum mlxsw_reg_sbxx_dir dir, - struct list_head *bulk_list) + u16 pool_index, struct list_head *bulk_list) { + const struct mlxsw_sp_sb_pool_des *des = + &mlxsw_sp_sb_pool_dess[pool_index]; char sbpm_pl[MLXSW_REG_SBPM_LEN]; struct mlxsw_sp_sb_pm *pm; - pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); - mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, 0, 0); + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool_index); + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, des->pool, des->dir, + false, 0, 0); return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, bulk_list, mlxsw_sp_sb_pm_occ_query_cb, @@ -254,63 +307,54 @@ static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) .size = _size, \ } -static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = { +static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs[] = { + /* Ingress pools. */ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_INGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_INGRESS_MNG_SIZE), -}; - -#define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress) - -static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = { + /* Egress pools. */ MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_STATIC, MLXSW_SP_SB_INFI), }; -#define MLXSW_SP_SB_PRS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_egress) +#define MLXSW_SP_SB_PRS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs) -static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_reg_sbxx_dir dir, - const struct mlxsw_sp_sb_pr *prs, - size_t prs_len) +static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_sb_pr *prs, + size_t prs_len) { int i; int err; for (i = 0; i < prs_len; i++) { - u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size); - - err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, prs[i].mode, size); + u32 size = prs[i].size; + u32 size_cells; + + if (size == MLXSW_SP_SB_INFI) { + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, + 0, true); + } else { + size_cells = mlxsw_sp_bytes_cells(mlxsw_sp, size); + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, prs[i].mode, + size_cells, false); + } if (err) return err; } return 0; } -static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp) -{ - int err; - - err = __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS, - mlxsw_sp_sb_prs_ingress, - MLXSW_SP_SB_PRS_INGRESS_LEN); - if (err) - return err; - return __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS, - mlxsw_sp_sb_prs_egress, - MLXSW_SP_SB_PRS_EGRESS_LEN); -} - #define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool) \ { \ .min_buff = _min_buff, \ .max_buff = _max_buff, \ - .pool = _pool, \ + .pool_index = _pool, \ } static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { @@ -329,38 +373,38 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { #define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress) static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(1500, 9, 0), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(0, 140000, 15), - MLXSW_SP_SB_CM(1, 0xff, 0), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(1500, 9, 4), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(0, MLXSW_SP_SB_INFI, 8), + MLXSW_SP_SB_CM(1, 0xff, 4), }; #define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress) -#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 0) +#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 4) static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 0), + MLXSW_SP_SB_CM(MLXSW_PORT_MAX_MTU, 0, 4), MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, @@ -390,6 +434,14 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { #define MLXSW_SP_CPU_PORT_SB_MCS_LEN \ ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms) +static bool +mlxsw_sp_sb_pool_is_static(struct mlxsw_sp *mlxsw_sp, u16 pool_index) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); + + return pr->mode == MLXSW_REG_SBPR_MODE_STATIC; +} + static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, enum mlxsw_reg_sbxx_dir dir, const struct mlxsw_sp_sb_cm *cms, @@ -401,16 +453,29 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, for (i = 0; i < cms_len; i++) { const struct mlxsw_sp_sb_cm *cm; u32 min_buff; + u32 max_buff; if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) continue; /* PG number 8 does not exist, skip it */ cm = &cms[i]; - /* All pools are initialized using dynamic thresholds, - * therefore 'max_buff' isn't specified in cells. - */ + if (WARN_ON(mlxsw_sp_sb_pool_dess[cm->pool_index].dir != dir)) + continue; + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff); - err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir, - min_buff, cm->max_buff, cm->pool); + max_buff = cm->max_buff; + if (max_buff == MLXSW_SP_SB_INFI) { + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, + min_buff, 0, + true, cm->pool_index); + } else { + if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, + cm->pool_index)) + max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, + max_buff); + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, + min_buff, max_buff, + false, cm->pool_index); + } if (err) return err; } @@ -448,91 +513,74 @@ static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) .max_buff = _max_buff, \ } -static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_ingress[] = { +static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { + /* Ingress pools. */ MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), -}; - -#define MLXSW_SP_SB_PMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_ingress) - -static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_egress[] = { + /* Egress pools. */ MLXSW_SP_SB_PM(0, 7), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(10000, 90000), }; -#define MLXSW_SP_SB_PMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_egress) +#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms) -static int __mlxsw_sp_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, - enum mlxsw_reg_sbxx_dir dir, - const struct mlxsw_sp_sb_pm *pms, - size_t pms_len) +static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; int i; int err; - for (i = 0; i < pms_len; i++) { - const struct mlxsw_sp_sb_pm *pm; + for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { + const struct mlxsw_sp_sb_pm *pm = &mlxsw_sp_sb_pms[i]; + u32 max_buff; + u32 min_buff; - pm = &pms[i]; - err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, dir, - pm->min_buff, pm->max_buff); + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, pm->min_buff); + max_buff = pm->max_buff; + if (mlxsw_sp_sb_pool_is_static(mlxsw_sp, i)) + max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, max_buff); + err = mlxsw_sp_sb_pm_write(mlxsw_sp, mlxsw_sp_port->local_port, + i, min_buff, max_buff); if (err) return err; } return 0; } -static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) -{ - int err; - - err = __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, - MLXSW_REG_SBXX_DIR_INGRESS, - mlxsw_sp_sb_pms_ingress, - MLXSW_SP_SB_PMS_INGRESS_LEN); - if (err) - return err; - return __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, - MLXSW_REG_SBXX_DIR_EGRESS, - mlxsw_sp_sb_pms_egress, - MLXSW_SP_SB_PMS_EGRESS_LEN); -} - struct mlxsw_sp_sb_mm { u32 min_buff; u32 max_buff; - u8 pool; + u16 pool_index; }; #define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \ { \ .min_buff = _min_buff, \ .max_buff = _max_buff, \ - .pool = _pool, \ + .pool_index = _pool, \ } static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), - MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), + MLXSW_SP_SB_MM(0, 6, 4), }; #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) @@ -544,16 +592,18 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) int err; for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) { + const struct mlxsw_sp_sb_pool_des *des; const struct mlxsw_sp_sb_mm *mc; u32 min_buff; mc = &mlxsw_sp_sb_mms[i]; - /* All pools are initialized using dynamic thresholds, - * therefore 'max_buff' isn't specified in cells. + des = &mlxsw_sp_sb_pool_dess[mc->pool_index]; + /* All pools used by sb_mm's are initialized using dynamic + * thresholds, therefore 'max_buff' isn't specified in cells. */ min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff); mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff, - mc->pool); + des->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl); if (err) return err; @@ -561,9 +611,24 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) return 0; } +static void mlxsw_sp_pool_count(u16 *p_ingress_len, u16 *p_egress_len) +{ + int i; + + for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; ++i) + if (mlxsw_sp_sb_pool_dess[i].dir == MLXSW_REG_SBXX_DIR_EGRESS) + goto out; + WARN(1, "No egress pools\n"); + +out: + *p_ingress_len = i; + *p_egress_len = MLXSW_SP_SB_POOL_DESS_LEN - i; +} + int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { - u64 sb_size; + u16 ing_pool_count; + u16 eg_pool_count; int err; if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE)) @@ -571,17 +636,19 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE)) return -EIO; - sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE); mlxsw_sp->sb = kzalloc(sizeof(*mlxsw_sp->sb), GFP_KERNEL); if (!mlxsw_sp->sb) return -ENOMEM; mlxsw_sp->sb->cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); + mlxsw_sp->sb->sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + MAX_BUFFER_SIZE); err = mlxsw_sp_sb_ports_init(mlxsw_sp); if (err) goto err_sb_ports_init; - err = mlxsw_sp_sb_prs_init(mlxsw_sp); + err = mlxsw_sp_sb_prs_init(mlxsw_sp, mlxsw_sp_sb_prs, + MLXSW_SP_SB_PRS_LEN); if (err) goto err_sb_prs_init; err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); @@ -590,11 +657,13 @@ int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) err = mlxsw_sp_sb_mms_init(mlxsw_sp); if (err) goto err_sb_mms_init; - err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size, - MLXSW_SP_SB_POOL_COUNT, - MLXSW_SP_SB_POOL_COUNT, - MLXSW_SP_SB_TC_COUNT, - MLXSW_SP_SB_TC_COUNT); + mlxsw_sp_pool_count(&ing_pool_count, &eg_pool_count); + err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, + mlxsw_sp->sb->sb_size, + ing_pool_count, + eg_pool_count, + MLXSW_SP_SB_ING_TC_COUNT, + MLXSW_SP_SB_EG_TC_COUNT); if (err) goto err_devlink_sb_register; @@ -632,36 +701,15 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) return err; } -static u8 pool_get(u16 pool_index) -{ - return pool_index % MLXSW_SP_SB_POOL_COUNT; -} - -static u16 pool_index_get(u8 pool, enum mlxsw_reg_sbxx_dir dir) -{ - u16 pool_index; - - pool_index = pool; - if (dir == MLXSW_REG_SBXX_DIR_EGRESS) - pool_index += MLXSW_SP_SB_POOL_COUNT; - return pool_index; -} - -static enum mlxsw_reg_sbxx_dir dir_get(u16 pool_index) -{ - return pool_index < MLXSW_SP_SB_POOL_COUNT ? - MLXSW_REG_SBXX_DIR_INGRESS : MLXSW_REG_SBXX_DIR_EGRESS; -} - int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info) { + enum mlxsw_reg_sbxx_dir dir = mlxsw_sp_sb_pool_dess[pool_index].dir; struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); - struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + struct mlxsw_sp_sb_pr *pr; + pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); pool_info->pool_type = (enum devlink_sb_pool_type) dir; pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size); pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; @@ -674,34 +722,32 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size); - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); enum mlxsw_reg_sbpr_mode mode; if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) return -EINVAL; mode = (enum mlxsw_reg_sbpr_mode) threshold_type; - return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size); + return mlxsw_sp_sb_pr_write(mlxsw_sp, pool_index, mode, + pool_size, false); } #define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */ -static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool, - enum mlxsw_reg_sbxx_dir dir, u32 max_buff) +static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u16 pool_index, + u32 max_buff) { - struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff); } -static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool, - enum mlxsw_reg_sbxx_dir dir, u32 threshold, - u32 *p_max_buff) +static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u16 pool_index, + u32 threshold, u32 *p_max_buff) { - struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool_index); if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) { int val; @@ -725,12 +771,10 @@ int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, mlxsw_core_port_driver_priv(mlxsw_core_port); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, - pool, dir); + pool_index); - *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool, dir, + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool_index, pm->max_buff); return 0; } @@ -743,17 +787,15 @@ int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, mlxsw_core_port_driver_priv(mlxsw_core_port); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); u32 max_buff; int err; - err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, threshold, &max_buff); if (err) return err; - return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool, dir, + return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool_index, 0, max_buff); } @@ -771,9 +813,9 @@ int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); - *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir, + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool_index, cm->max_buff); - *p_pool_index = pool_index_get(cm->pool, dir); + *p_pool_index = cm->pool_index; return 0; } @@ -788,24 +830,24 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; - u8 pool = pool_get(pool_index); u32 max_buff; int err; - if (dir != dir_get(pool_index)) + if (dir != mlxsw_sp_sb_pool_dess[pool_index].dir) return -EINVAL; - err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool_index, threshold, &max_buff); if (err) return err; - return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir, - 0, max_buff, pool); + return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, + 0, max_buff, false, pool_index); } #define MASKED_COUNT_MAX \ - (MLXSW_REG_SBSR_REC_MAX_COUNT / (MLXSW_SP_SB_TC_COUNT * 2)) + (MLXSW_REG_SBSR_REC_MAX_COUNT / \ + (MLXSW_SP_SB_ING_TC_COUNT + MLXSW_SP_SB_EG_TC_COUNT)) struct mlxsw_sp_sb_sr_occ_query_cb_ctx { u8 masked_count; @@ -831,7 +873,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; - for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) { cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, MLXSW_REG_SBXX_DIR_INGRESS); mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, @@ -845,7 +887,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; - for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) { cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, MLXSW_REG_SBXX_DIR_EGRESS); mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, @@ -880,23 +922,17 @@ next_batch: local_port_1 = local_port; masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, false); - for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); - } for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); - for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { - err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, - MLXSW_REG_SBXX_DIR_INGRESS, - &bulk_list); - if (err) - goto out; + for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) { err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, - MLXSW_REG_SBXX_DIR_EGRESS, &bulk_list); if (err) goto out; @@ -945,23 +981,17 @@ next_batch: local_port++; masked_count = 0; mlxsw_reg_sbsr_pack(sbsr_pl, true); - for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + for (i = 0; i < MLXSW_SP_SB_ING_TC_COUNT; i++) mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + for (i = 0; i < MLXSW_SP_SB_EG_TC_COUNT; i++) mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); - } for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); - for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { - err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, - MLXSW_REG_SBXX_DIR_INGRESS, - &bulk_list); - if (err) - goto out; + for (i = 0; i < MLXSW_SP_SB_POOL_DESS_LEN; i++) { err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, - MLXSW_REG_SBXX_DIR_EGRESS, &bulk_list); if (err) goto out; @@ -994,10 +1024,8 @@ int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, mlxsw_core_port_driver_priv(mlxsw_core_port); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; - u8 pool = pool_get(pool_index); - enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, - pool, dir); + pool_index); *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur); *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 715d24ff937e..a3db033d7399 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -6,6 +6,7 @@ #include <linux/if_vlan.h> #include <linux/if_bridge.h> #include <linux/netdevice.h> +#include <linux/rhashtable.h> #include <linux/rtnetlink.h> #include "spectrum.h" @@ -14,6 +15,7 @@ struct mlxsw_sp_fid_family; struct mlxsw_sp_fid_core { + struct rhashtable vni_ht; struct mlxsw_sp_fid_family *fid_family_arr[MLXSW_SP_FID_TYPE_MAX]; unsigned int *port_fid_mappings; }; @@ -24,6 +26,12 @@ struct mlxsw_sp_fid { unsigned int ref_count; u16 fid_index; struct mlxsw_sp_fid_family *fid_family; + + struct rhash_head vni_ht_node; + __be32 vni; + u32 nve_flood_index; + u8 vni_valid:1, + nve_flood_index_valid:1; }; struct mlxsw_sp_fid_8021q { @@ -36,6 +44,12 @@ struct mlxsw_sp_fid_8021d { int br_ifindex; }; +static const struct rhashtable_params mlxsw_sp_fid_vni_ht_params = { + .key_len = sizeof_field(struct mlxsw_sp_fid, vni), + .key_offset = offsetof(struct mlxsw_sp_fid, vni), + .head_offset = offsetof(struct mlxsw_sp_fid, vni_ht_node), +}; + struct mlxsw_sp_flood_table { enum mlxsw_sp_flood_type packet_type; enum mlxsw_reg_sfgc_bridge_type bridge_type; @@ -56,6 +70,11 @@ struct mlxsw_sp_fid_ops { struct mlxsw_sp_port *port, u16 vid); void (*port_vid_unmap)(struct mlxsw_sp_fid *fid, struct mlxsw_sp_port *port, u16 vid); + int (*vni_set)(struct mlxsw_sp_fid *fid, __be32 vni); + void (*vni_clear)(struct mlxsw_sp_fid *fid); + int (*nve_flood_index_set)(struct mlxsw_sp_fid *fid, + u32 nve_flood_index); + void (*nve_flood_index_clear)(struct mlxsw_sp_fid *fid); }; struct mlxsw_sp_fid_family { @@ -94,6 +113,117 @@ static const int *mlxsw_sp_packet_type_sfgc_types[] = { [MLXSW_SP_FLOOD_TYPE_MC] = mlxsw_sp_sfgc_mc_packet_types, }; +struct mlxsw_sp_fid *mlxsw_sp_fid_lookup_by_vni(struct mlxsw_sp *mlxsw_sp, + __be32 vni) +{ + struct mlxsw_sp_fid *fid; + + fid = rhashtable_lookup_fast(&mlxsw_sp->fid_core->vni_ht, &vni, + mlxsw_sp_fid_vni_ht_params); + if (fid) + fid->ref_count++; + + return fid; +} + +int mlxsw_sp_fid_vni(const struct mlxsw_sp_fid *fid, __be32 *vni) +{ + if (!fid->vni_valid) + return -EINVAL; + + *vni = fid->vni; + + return 0; +} + +int mlxsw_sp_fid_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + int err; + + if (WARN_ON(!ops->nve_flood_index_set || fid->nve_flood_index_valid)) + return -EINVAL; + + err = ops->nve_flood_index_set(fid, nve_flood_index); + if (err) + return err; + + fid->nve_flood_index = nve_flood_index; + fid->nve_flood_index_valid = true; + + return 0; +} + +void mlxsw_sp_fid_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + + if (WARN_ON(!ops->nve_flood_index_clear || !fid->nve_flood_index_valid)) + return; + + fid->nve_flood_index_valid = false; + ops->nve_flood_index_clear(fid); +} + +bool mlxsw_sp_fid_nve_flood_index_is_set(const struct mlxsw_sp_fid *fid) +{ + return fid->nve_flood_index_valid; +} + +int mlxsw_sp_fid_vni_set(struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + int err; + + if (WARN_ON(!ops->vni_set || fid->vni_valid)) + return -EINVAL; + + fid->vni = vni; + err = rhashtable_lookup_insert_fast(&mlxsw_sp->fid_core->vni_ht, + &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); + if (err) + return err; + + err = ops->vni_set(fid, vni); + if (err) + goto err_vni_set; + + fid->vni_valid = true; + + return 0; + +err_vni_set: + rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); + return err; +} + +void mlxsw_sp_fid_vni_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + const struct mlxsw_sp_fid_ops *ops = fid_family->ops; + struct mlxsw_sp *mlxsw_sp = fid_family->mlxsw_sp; + + if (WARN_ON(!ops->vni_clear || !fid->vni_valid)) + return; + + fid->vni_valid = false; + ops->vni_clear(fid); + rhashtable_remove_fast(&mlxsw_sp->fid_core->vni_ht, &fid->vni_ht_node, + mlxsw_sp_fid_vni_ht_params); +} + +bool mlxsw_sp_fid_vni_is_set(const struct mlxsw_sp_fid *fid) +{ + return fid->vni_valid; +} + static const struct mlxsw_sp_flood_table * mlxsw_sp_fid_flood_table_lookup(const struct mlxsw_sp_fid *fid, enum mlxsw_sp_flood_type packet_type) @@ -217,6 +347,21 @@ static int mlxsw_sp_fid_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); } +static int mlxsw_sp_fid_vni_op(struct mlxsw_sp *mlxsw_sp, u16 fid_index, + __be32 vni, bool vni_valid, u32 nve_flood_index, + bool nve_flood_index_valid) +{ + char sfmr_pl[MLXSW_REG_SFMR_LEN]; + + mlxsw_reg_sfmr_pack(sfmr_pl, MLXSW_REG_SFMR_OP_CREATE_FID, fid_index, + 0); + mlxsw_reg_sfmr_vv_set(sfmr_pl, vni_valid); + mlxsw_reg_sfmr_vni_set(sfmr_pl, be32_to_cpu(vni)); + mlxsw_reg_sfmr_vtfp_set(sfmr_pl, nve_flood_index_valid); + mlxsw_reg_sfmr_nve_tunnel_flood_ptr_set(sfmr_pl, nve_flood_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfmr), sfmr_pl); +} + static int mlxsw_sp_fid_vid_map(struct mlxsw_sp *mlxsw_sp, u16 fid_index, u16 vid, bool valid) { @@ -393,6 +538,8 @@ static int mlxsw_sp_fid_8021d_configure(struct mlxsw_sp_fid *fid) static void mlxsw_sp_fid_8021d_deconfigure(struct mlxsw_sp_fid *fid) { + if (fid->vni_valid) + mlxsw_sp_nve_fid_disable(fid->fid_family->mlxsw_sp, fid); mlxsw_sp_fid_op(fid->fid_family->mlxsw_sp, fid->fid_index, 0, false); } @@ -531,6 +678,41 @@ mlxsw_sp_fid_8021d_port_vid_unmap(struct mlxsw_sp_fid *fid, mlxsw_sp_port->local_port, vid, false); } +static int mlxsw_sp_fid_8021d_vni_set(struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, vni, + true, fid->nve_flood_index, + fid->nve_flood_index_valid); +} + +static void mlxsw_sp_fid_8021d_vni_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, 0, false, + fid->nve_flood_index, fid->nve_flood_index_valid); +} + +static int mlxsw_sp_fid_8021d_nve_flood_index_set(struct mlxsw_sp_fid *fid, + u32 nve_flood_index) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + return mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, + fid->vni, fid->vni_valid, nve_flood_index, + true); +} + +static void mlxsw_sp_fid_8021d_nve_flood_index_clear(struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_fid_family *fid_family = fid->fid_family; + + mlxsw_sp_fid_vni_op(fid_family->mlxsw_sp, fid->fid_index, fid->vni, + fid->vni_valid, 0, false); +} + static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { .setup = mlxsw_sp_fid_8021d_setup, .configure = mlxsw_sp_fid_8021d_configure, @@ -540,6 +722,10 @@ static const struct mlxsw_sp_fid_ops mlxsw_sp_fid_8021d_ops = { .flood_index = mlxsw_sp_fid_8021d_flood_index, .port_vid_map = mlxsw_sp_fid_8021d_port_vid_map, .port_vid_unmap = mlxsw_sp_fid_8021d_port_vid_unmap, + .vni_set = mlxsw_sp_fid_8021d_vni_set, + .vni_clear = mlxsw_sp_fid_8021d_vni_clear, + .nve_flood_index_set = mlxsw_sp_fid_8021d_nve_flood_index_set, + .nve_flood_index_clear = mlxsw_sp_fid_8021d_nve_flood_index_clear, }; static const struct mlxsw_sp_flood_table mlxsw_sp_fid_8021d_flood_tables[] = { @@ -708,14 +894,12 @@ static const struct mlxsw_sp_fid_family *mlxsw_sp_fid_family_arr[] = { [MLXSW_SP_FID_TYPE_DUMMY] = &mlxsw_sp_fid_dummy_family, }; -static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, - enum mlxsw_sp_fid_type type, - const void *arg) +static struct mlxsw_sp_fid *mlxsw_sp_fid_lookup(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type, + const void *arg) { struct mlxsw_sp_fid_family *fid_family; struct mlxsw_sp_fid *fid; - u16 fid_index; - int err; fid_family = mlxsw_sp->fid_core->fid_family_arr[type]; list_for_each_entry(fid, &fid_family->fids_list, list) { @@ -725,6 +909,23 @@ static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, return fid; } + return NULL; +} + +static struct mlxsw_sp_fid *mlxsw_sp_fid_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_fid_type type, + const void *arg) +{ + struct mlxsw_sp_fid_family *fid_family; + struct mlxsw_sp_fid *fid; + u16 fid_index; + int err; + + fid = mlxsw_sp_fid_lookup(mlxsw_sp, type, arg); + if (fid) + return fid; + + fid_family = mlxsw_sp->fid_core->fid_family_arr[type]; fid = kzalloc(fid_family->fid_size, GFP_KERNEL); if (!fid) return ERR_PTR(-ENOMEM); @@ -784,6 +985,13 @@ struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_get(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fid_get(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, &br_ifindex); } +struct mlxsw_sp_fid *mlxsw_sp_fid_8021d_lookup(struct mlxsw_sp *mlxsw_sp, + int br_ifindex) +{ + return mlxsw_sp_fid_lookup(mlxsw_sp, MLXSW_SP_FID_TYPE_8021D, + &br_ifindex); +} + struct mlxsw_sp_fid *mlxsw_sp_fid_rfid_get(struct mlxsw_sp *mlxsw_sp, u16 rif_index) { @@ -918,6 +1126,10 @@ int mlxsw_sp_fids_init(struct mlxsw_sp *mlxsw_sp) return -ENOMEM; mlxsw_sp->fid_core = fid_core; + err = rhashtable_init(&fid_core->vni_ht, &mlxsw_sp_fid_vni_ht_params); + if (err) + goto err_rhashtable_init; + fid_core->port_fid_mappings = kcalloc(max_ports, sizeof(unsigned int), GFP_KERNEL); if (!fid_core->port_fid_mappings) { @@ -944,6 +1156,8 @@ err_fid_ops_register: } kfree(fid_core->port_fid_mappings); err_alloc_port_fid_mappings: + rhashtable_destroy(&fid_core->vni_ht); +err_rhashtable_init: kfree(fid_core); return err; } @@ -957,5 +1171,6 @@ void mlxsw_sp_fids_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_fid_family_unregister(mlxsw_sp, fid_core->fid_family_arr[i]); kfree(fid_core->port_fid_mappings); + rhashtable_destroy(&fid_core->vni_ht); kfree(fid_core); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c new file mode 100644 index 000000000000..ad06d9969bc1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.c @@ -0,0 +1,982 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/err.h> +#include <linux/gfp.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/slab.h> +#include <net/inet_ecn.h> +#include <net/ipv6.h> + +#include "reg.h" +#include "spectrum.h" +#include "spectrum_nve.h" + +const struct mlxsw_sp_nve_ops *mlxsw_sp1_nve_ops_arr[] = { + [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp1_nve_vxlan_ops, +}; + +const struct mlxsw_sp_nve_ops *mlxsw_sp2_nve_ops_arr[] = { + [MLXSW_SP_NVE_TYPE_VXLAN] = &mlxsw_sp2_nve_vxlan_ops, +}; + +struct mlxsw_sp_nve_mc_entry; +struct mlxsw_sp_nve_mc_record; +struct mlxsw_sp_nve_mc_list; + +struct mlxsw_sp_nve_mc_record_ops { + enum mlxsw_reg_tnumt_record_type type; + int (*entry_add)(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr); + void (*entry_del)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry); + void (*entry_set)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index); + bool (*entry_compare)(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr); +}; + +struct mlxsw_sp_nve_mc_list_key { + u16 fid_index; +}; + +struct mlxsw_sp_nve_mc_ipv6_entry { + struct in6_addr addr6; + u32 addr6_kvdl_index; +}; + +struct mlxsw_sp_nve_mc_entry { + union { + __be32 addr4; + struct mlxsw_sp_nve_mc_ipv6_entry ipv6_entry; + }; + u8 valid:1; +}; + +struct mlxsw_sp_nve_mc_record { + struct list_head list; + enum mlxsw_sp_l3proto proto; + unsigned int num_entries; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_nve_mc_list *mc_list; + const struct mlxsw_sp_nve_mc_record_ops *ops; + u32 kvdl_index; + struct mlxsw_sp_nve_mc_entry entries[0]; +}; + +struct mlxsw_sp_nve_mc_list { + struct list_head records_list; + struct rhash_head ht_node; + struct mlxsw_sp_nve_mc_list_key key; +}; + +static const struct rhashtable_params mlxsw_sp_nve_mc_list_ht_params = { + .key_len = sizeof(struct mlxsw_sp_nve_mc_list_key), + .key_offset = offsetof(struct mlxsw_sp_nve_mc_list, key), + .head_offset = offsetof(struct mlxsw_sp_nve_mc_list, ht_node), +}; + +static int +mlxsw_sp_nve_mc_record_ipv4_entry_add(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + mc_entry->addr4 = addr->addr4; + + return 0; +} + +static void +mlxsw_sp_nve_mc_record_ipv4_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry) +{ +} + +static void +mlxsw_sp_nve_mc_record_ipv4_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index) +{ + u32 udip = be32_to_cpu(mc_entry->addr4); + + mlxsw_reg_tnumt_udip_set(tnumt_pl, entry_index, udip); +} + +static bool +mlxsw_sp_nve_mc_record_ipv4_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + return mc_entry->addr4 == addr->addr4; +} + +static const struct mlxsw_sp_nve_mc_record_ops +mlxsw_sp_nve_mc_record_ipv4_ops = { + .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV4, + .entry_add = &mlxsw_sp_nve_mc_record_ipv4_entry_add, + .entry_del = &mlxsw_sp_nve_mc_record_ipv4_entry_del, + .entry_set = &mlxsw_sp_nve_mc_record_ipv4_entry_set, + .entry_compare = &mlxsw_sp_nve_mc_record_ipv4_entry_compare, +}; + +static int +mlxsw_sp_nve_mc_record_ipv6_entry_add(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + WARN_ON(1); + + return -EINVAL; +} + +static void +mlxsw_sp_nve_mc_record_ipv6_entry_del(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry) +{ +} + +static void +mlxsw_sp_nve_mc_record_ipv6_entry_set(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + char *tnumt_pl, unsigned int entry_index) +{ + u32 udip_ptr = mc_entry->ipv6_entry.addr6_kvdl_index; + + mlxsw_reg_tnumt_udip_ptr_set(tnumt_pl, entry_index, udip_ptr); +} + +static bool +mlxsw_sp_nve_mc_record_ipv6_entry_compare(const struct mlxsw_sp_nve_mc_record *mc_record, + const struct mlxsw_sp_nve_mc_entry *mc_entry, + const union mlxsw_sp_l3addr *addr) +{ + return ipv6_addr_equal(&mc_entry->ipv6_entry.addr6, &addr->addr6); +} + +static const struct mlxsw_sp_nve_mc_record_ops +mlxsw_sp_nve_mc_record_ipv6_ops = { + .type = MLXSW_REG_TNUMT_RECORD_TYPE_IPV6, + .entry_add = &mlxsw_sp_nve_mc_record_ipv6_entry_add, + .entry_del = &mlxsw_sp_nve_mc_record_ipv6_entry_del, + .entry_set = &mlxsw_sp_nve_mc_record_ipv6_entry_set, + .entry_compare = &mlxsw_sp_nve_mc_record_ipv6_entry_compare, +}; + +static const struct mlxsw_sp_nve_mc_record_ops * +mlxsw_sp_nve_mc_record_ops_arr[] = { + [MLXSW_SP_L3_PROTO_IPV4] = &mlxsw_sp_nve_mc_record_ipv4_ops, + [MLXSW_SP_L3_PROTO_IPV6] = &mlxsw_sp_nve_mc_record_ipv6_ops, +}; + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_find(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + return rhashtable_lookup_fast(&nve->mc_list_ht, key, + mlxsw_sp_nve_mc_list_ht_params); +} + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + struct mlxsw_sp_nve_mc_list *mc_list; + int err; + + mc_list = kmalloc(sizeof(*mc_list), GFP_KERNEL); + if (!mc_list) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&mc_list->records_list); + mc_list->key = *key; + + err = rhashtable_insert_fast(&nve->mc_list_ht, &mc_list->ht_node, + mlxsw_sp_nve_mc_list_ht_params); + if (err) + goto err_rhashtable_insert; + + return mc_list; + +err_rhashtable_insert: + kfree(mc_list); + return ERR_PTR(err); +} + +static void mlxsw_sp_nve_mc_list_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + rhashtable_remove_fast(&nve->mc_list_ht, &mc_list->ht_node, + mlxsw_sp_nve_mc_list_ht_params); + WARN_ON(!list_empty(&mc_list->records_list)); + kfree(mc_list); +} + +static struct mlxsw_sp_nve_mc_list * +mlxsw_sp_nve_mc_list_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_mc_list_key *key) +{ + struct mlxsw_sp_nve_mc_list *mc_list; + + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, key); + if (mc_list) + return mc_list; + + return mlxsw_sp_nve_mc_list_create(mlxsw_sp, key); +} + +static void +mlxsw_sp_nve_mc_list_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + if (!list_empty(&mc_list->records_list)) + return; + mlxsw_sp_nve_mc_list_destroy(mlxsw_sp, mc_list); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto) +{ + unsigned int num_max_entries = mlxsw_sp->nve->num_max_mc_entries[proto]; + struct mlxsw_sp_nve_mc_record *mc_record; + int err; + + mc_record = kzalloc(sizeof(*mc_record) + num_max_entries * + sizeof(struct mlxsw_sp_nve_mc_entry), GFP_KERNEL); + if (!mc_record) + return ERR_PTR(-ENOMEM); + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1, + &mc_record->kvdl_index); + if (err) + goto err_kvdl_alloc; + + mc_record->ops = mlxsw_sp_nve_mc_record_ops_arr[proto]; + mc_record->mlxsw_sp = mlxsw_sp; + mc_record->mc_list = mc_list; + mc_record->proto = proto; + list_add_tail(&mc_record->list, &mc_list->records_list); + + return mc_record; + +err_kvdl_alloc: + kfree(mc_record); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nve_mc_record_destroy(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp; + + list_del(&mc_record->list); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_TNUMT, 1, + mc_record->kvdl_index); + WARN_ON(mc_record->num_entries); + kfree(mc_record); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + list_for_each_entry_reverse(mc_record, &mc_list->records_list, list) { + unsigned int num_entries = mc_record->num_entries; + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + + if (mc_record->proto == proto && + num_entries < nve->num_max_mc_entries[proto]) + return mc_record; + } + + return mlxsw_sp_nve_mc_record_create(mlxsw_sp, mc_list, proto); +} + +static void +mlxsw_sp_nve_mc_record_put(struct mlxsw_sp_nve_mc_record *mc_record) +{ + if (mc_record->num_entries != 0) + return; + + mlxsw_sp_nve_mc_record_destroy(mc_record); +} + +static struct mlxsw_sp_nve_mc_entry * +mlxsw_sp_nve_mc_free_entry_find(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + if (mc_record->entries[i].valid) + continue; + return &mc_record->entries[i]; + } + + return NULL; +} + +static int +mlxsw_sp_nve_mc_record_refresh(struct mlxsw_sp_nve_mc_record *mc_record) +{ + enum mlxsw_reg_tnumt_record_type type = mc_record->ops->type; + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + struct mlxsw_sp *mlxsw_sp = mc_record->mlxsw_sp; + char tnumt_pl[MLXSW_REG_TNUMT_LEN]; + unsigned int num_max_entries; + unsigned int num_entries = 0; + u32 next_kvdl_index = 0; + bool next_valid = false; + int i; + + if (!list_is_last(&mc_record->list, &mc_list->records_list)) { + struct mlxsw_sp_nve_mc_record *next_record; + + next_record = list_next_entry(mc_record, list); + next_kvdl_index = next_record->kvdl_index; + next_valid = true; + } + + mlxsw_reg_tnumt_pack(tnumt_pl, type, MLXSW_REG_TNUMT_TUNNEL_PORT_NVE, + mc_record->kvdl_index, next_valid, + next_kvdl_index, mc_record->num_entries); + + num_max_entries = mlxsw_sp->nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_entry = &mc_record->entries[i]; + if (!mc_entry->valid) + continue; + mc_record->ops->entry_set(mc_record, mc_entry, tnumt_pl, + num_entries++); + } + + WARN_ON(num_entries != mc_record->num_entries); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnumt), tnumt_pl); +} + +static bool +mlxsw_sp_nve_mc_record_is_first(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + struct mlxsw_sp_nve_mc_record *first_record; + + first_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + + return mc_record == first_record; +} + +static struct mlxsw_sp_nve_mc_entry * +mlxsw_sp_nve_mc_entry_find(struct mlxsw_sp_nve_mc_record *mc_record, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_entry = &mc_record->entries[i]; + if (!mc_entry->valid) + continue; + if (mc_record->ops->entry_compare(mc_record, mc_entry, addr)) + return mc_entry; + } + + return NULL; +} + +static int +mlxsw_sp_nve_mc_record_ip_add(struct mlxsw_sp_nve_mc_record *mc_record, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_entry *mc_entry = NULL; + int err; + + mc_entry = mlxsw_sp_nve_mc_free_entry_find(mc_record); + if (WARN_ON(!mc_entry)) + return -EINVAL; + + err = mc_record->ops->entry_add(mc_record, mc_entry, addr); + if (err) + return err; + mc_record->num_entries++; + mc_entry->valid = true; + + err = mlxsw_sp_nve_mc_record_refresh(mc_record); + if (err) + goto err_record_refresh; + + /* If this is a new record and not the first one, then we need to + * update the next pointer of the previous entry + */ + if (mc_record->num_entries != 1 || + mlxsw_sp_nve_mc_record_is_first(mc_record)) + return 0; + + err = mlxsw_sp_nve_mc_record_refresh(list_prev_entry(mc_record, list)); + if (err) + goto err_prev_record_refresh; + + return 0; + +err_prev_record_refresh: +err_record_refresh: + mc_entry->valid = false; + mc_record->num_entries--; + mc_record->ops->entry_del(mc_record, mc_entry); + return err; +} + +static void +mlxsw_sp_nve_mc_record_entry_del(struct mlxsw_sp_nve_mc_record *mc_record, + struct mlxsw_sp_nve_mc_entry *mc_entry) +{ + struct mlxsw_sp_nve_mc_list *mc_list = mc_record->mc_list; + + mc_entry->valid = false; + mc_record->num_entries--; + + /* When the record continues to exist we only need to invalidate + * the requested entry + */ + if (mc_record->num_entries != 0) { + mlxsw_sp_nve_mc_record_refresh(mc_record); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* If the record needs to be deleted, but it is not the first, + * then we need to make sure that the previous record no longer + * points to it. Remove deleted record from the list to reflect + * that and then re-add it at the end, so that it could be + * properly removed by the record destruction code + */ + if (!mlxsw_sp_nve_mc_record_is_first(mc_record)) { + struct mlxsw_sp_nve_mc_record *prev_record; + + prev_record = list_prev_entry(mc_record, list); + list_del(&mc_record->list); + mlxsw_sp_nve_mc_record_refresh(prev_record); + list_add_tail(&mc_record->list, &mc_list->records_list); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* If the first record needs to be deleted, but the list is not + * singular, then the second record needs to be written in the + * first record's address, as this address is stored as a property + * of the FID + */ + if (mlxsw_sp_nve_mc_record_is_first(mc_record) && + !list_is_singular(&mc_list->records_list)) { + struct mlxsw_sp_nve_mc_record *next_record; + + next_record = list_next_entry(mc_record, list); + swap(mc_record->kvdl_index, next_record->kvdl_index); + mlxsw_sp_nve_mc_record_refresh(next_record); + mc_record->ops->entry_del(mc_record, mc_entry); + return; + } + + /* This is the last case where the last remaining record needs to + * be deleted. Simply delete the entry + */ + mc_record->ops->entry_del(mc_record, mc_entry); +} + +static struct mlxsw_sp_nve_mc_record * +mlxsw_sp_nve_mc_record_find(struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr, + struct mlxsw_sp_nve_mc_entry **mc_entry) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + list_for_each_entry(mc_record, &mc_list->records_list, list) { + if (mc_record->proto != proto) + continue; + + *mc_entry = mlxsw_sp_nve_mc_entry_find(mc_record, addr); + if (*mc_entry) + return mc_record; + } + + return NULL; +} + +static int mlxsw_sp_nve_mc_list_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + int err; + + mc_record = mlxsw_sp_nve_mc_record_get(mlxsw_sp, mc_list, proto); + if (IS_ERR(mc_record)) + return PTR_ERR(mc_record); + + err = mlxsw_sp_nve_mc_record_ip_add(mc_record, addr); + if (err) + goto err_ip_add; + + return 0; + +err_ip_add: + mlxsw_sp_nve_mc_record_put(mc_record); + return err; +} + +static void mlxsw_sp_nve_mc_list_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_mc_list *mc_list, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + struct mlxsw_sp_nve_mc_entry *mc_entry; + + mc_record = mlxsw_sp_nve_mc_record_find(mc_list, proto, addr, + &mc_entry); + if (WARN_ON(!mc_record)) + return; + + mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry); + mlxsw_sp_nve_mc_record_put(mc_record); +} + +static int +mlxsw_sp_nve_fid_flood_index_set(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + /* The address of the first record in the list is a property of + * the FID and we never change it. It only needs to be set when + * a new list is created + */ + if (mlxsw_sp_fid_nve_flood_index_is_set(fid)) + return 0; + + mc_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + + return mlxsw_sp_fid_nve_flood_index_set(fid, mc_record->kvdl_index); +} + +static void +mlxsw_sp_nve_fid_flood_index_clear(struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_mc_list *mc_list) +{ + struct mlxsw_sp_nve_mc_record *mc_record; + + /* The address of the first record needs to be invalidated only when + * the last record is about to be removed + */ + if (!list_is_singular(&mc_list->records_list)) + return; + + mc_record = list_first_entry(&mc_list->records_list, + struct mlxsw_sp_nve_mc_record, list); + if (mc_record->num_entries != 1) + return; + + return mlxsw_sp_fid_nve_flood_index_clear(fid); +} + +int mlxsw_sp_nve_flood_ip_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + int err; + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_get(mlxsw_sp, &key); + if (IS_ERR(mc_list)) + return PTR_ERR(mc_list); + + err = mlxsw_sp_nve_mc_list_ip_add(mlxsw_sp, mc_list, proto, addr); + if (err) + goto err_add_ip; + + err = mlxsw_sp_nve_fid_flood_index_set(fid, mc_list); + if (err) + goto err_fid_flood_index_set; + + return 0; + +err_fid_flood_index_set: + mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr); +err_add_ip: + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); + return err; +} + +void mlxsw_sp_nve_flood_ip_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid, + enum mlxsw_sp_l3proto proto, + union mlxsw_sp_l3addr *addr) +{ + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key); + if (WARN_ON(!mc_list)) + return; + + mlxsw_sp_nve_fid_flood_index_clear(fid, mc_list); + mlxsw_sp_nve_mc_list_ip_del(mlxsw_sp, mc_list, proto, addr); + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); +} + +static void +mlxsw_sp_nve_mc_record_delete(struct mlxsw_sp_nve_mc_record *mc_record) +{ + struct mlxsw_sp_nve *nve = mc_record->mlxsw_sp->nve; + unsigned int num_max_entries; + int i; + + num_max_entries = nve->num_max_mc_entries[mc_record->proto]; + for (i = 0; i < num_max_entries; i++) { + struct mlxsw_sp_nve_mc_entry *mc_entry = &mc_record->entries[i]; + + if (!mc_entry->valid) + continue; + mlxsw_sp_nve_mc_record_entry_del(mc_record, mc_entry); + } + + WARN_ON(mc_record->num_entries); + mlxsw_sp_nve_mc_record_put(mc_record); +} + +static void mlxsw_sp_nve_flood_ip_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid) +{ + struct mlxsw_sp_nve_mc_record *mc_record, *tmp; + struct mlxsw_sp_nve_mc_list_key key = { 0 }; + struct mlxsw_sp_nve_mc_list *mc_list; + + if (!mlxsw_sp_fid_nve_flood_index_is_set(fid)) + return; + + mlxsw_sp_fid_nve_flood_index_clear(fid); + + key.fid_index = mlxsw_sp_fid_index(fid); + mc_list = mlxsw_sp_nve_mc_list_find(mlxsw_sp, &key); + if (WARN_ON(!mc_list)) + return; + + list_for_each_entry_safe(mc_record, tmp, &mc_list->records_list, list) + mlxsw_sp_nve_mc_record_delete(mc_record); + + WARN_ON(!list_empty(&mc_list->records_list)); + mlxsw_sp_nve_mc_list_put(mlxsw_sp, mc_list); +} + +u32 mlxsw_sp_nve_decap_tunnel_index_get(const struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(mlxsw_sp->nve->num_nve_tunnels == 0); + + return mlxsw_sp->nve->tunnel_index; +} + +bool mlxsw_sp_nve_ipv4_route_is_decap(const struct mlxsw_sp *mlxsw_sp, + u32 tb_id, __be32 addr) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + struct mlxsw_sp_nve_config *config = &nve->config; + + if (nve->num_nve_tunnels && + config->ul_proto == MLXSW_SP_L3_PROTO_IPV4 && + config->ul_sip.addr4 == addr && config->ul_tb_id == tb_id) + return true; + + return false; +} + +static int mlxsw_sp_nve_tunnel_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nve_config *config) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + int err; + + if (nve->num_nve_tunnels++ != 0) + return 0; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + &nve->tunnel_index); + if (err) + goto err_kvdl_alloc; + + ops = nve->nve_ops_arr[config->type]; + err = ops->init(nve, config); + if (err) + goto err_ops_init; + + return 0; + +err_ops_init: + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + nve->tunnel_index); +err_kvdl_alloc: + nve->num_nve_tunnels--; + return err; +} + +static void mlxsw_sp_nve_tunnel_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + + ops = nve->nve_ops_arr[nve->config.type]; + + if (mlxsw_sp->nve->num_nve_tunnels == 1) { + ops->fini(nve); + mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1, + nve->tunnel_index); + } + nve->num_nve_tunnels--; +} + +static void mlxsw_sp_nve_fdb_flush_by_fid(struct mlxsw_sp *mlxsw_sp, + u16 fid_index) +{ + char sfdf_pl[MLXSW_REG_SFDF_LEN]; + + mlxsw_reg_sfdf_pack(sfdf_pl, MLXSW_REG_SFDF_FLUSH_PER_NVE_AND_FID); + mlxsw_reg_sfdf_fid_set(sfdf_pl, fid_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfdf), sfdf_pl); +} + +int mlxsw_sp_nve_fid_enable(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *fid, + struct mlxsw_sp_nve_params *params, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_nve *nve = mlxsw_sp->nve; + const struct mlxsw_sp_nve_ops *ops; + struct mlxsw_sp_nve_config config; + int err; + + ops = nve->nve_ops_arr[params->type]; + + if (!ops->can_offload(nve, params->dev, extack)) + return -EOPNOTSUPP; + + memset(&config, 0, sizeof(config)); + ops->nve_config(nve, params->dev, &config); + if (nve->num_nve_tunnels && + memcmp(&config, &nve->config, sizeof(config))) { + NL_SET_ERR_MSG_MOD(extack, "Conflicting NVE tunnels configuration"); + return -EOPNOTSUPP; + } + + err = mlxsw_sp_nve_tunnel_init(mlxsw_sp, &config); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize NVE tunnel"); + return err; + } + + err = mlxsw_sp_fid_vni_set(fid, params->vni); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to set VNI on FID"); + goto err_fid_vni_set; + } + + nve->config = config; + + return 0; + +err_fid_vni_set: + mlxsw_sp_nve_tunnel_fini(mlxsw_sp); + return err; +} + +void mlxsw_sp_nve_fid_disable(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fid *fid) +{ + u16 fid_index = mlxsw_sp_fid_index(fid); + + mlxsw_sp_nve_flood_ip_flush(mlxsw_sp, fid); + mlxsw_sp_nve_fdb_flush_by_fid(mlxsw_sp, fid_index); + mlxsw_sp_fid_vni_clear(fid); + mlxsw_sp_nve_tunnel_fini(mlxsw_sp); +} + +int mlxsw_sp_port_nve_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char tnqdr_pl[MLXSW_REG_TNQDR_LEN]; + + mlxsw_reg_tnqdr_pack(tnqdr_pl, mlxsw_sp_port->local_port); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqdr), tnqdr_pl); +} + +void mlxsw_sp_port_nve_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ +} + +static int mlxsw_sp_nve_qos_init(struct mlxsw_sp *mlxsw_sp) +{ + char tnqcr_pl[MLXSW_REG_TNQCR_LEN]; + + mlxsw_reg_tnqcr_pack(tnqcr_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tnqcr), tnqcr_pl); +} + +static int mlxsw_sp_nve_ecn_encap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + u8 outer_ecn = INET_ECN_encapsulate(0, i); + char tneem_pl[MLXSW_REG_TNEEM_LEN]; + int err; + + mlxsw_reg_tneem_pack(tneem_pl, i, outer_ecn); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tneem), + tneem_pl); + if (err) + return err; + } + + return 0; +} + +static int __mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp, + u8 inner_ecn, u8 outer_ecn) +{ + char tndem_pl[MLXSW_REG_TNDEM_LEN]; + bool trap_en, set_ce = false; + u8 new_inner_ecn; + + trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce); + new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn; + + mlxsw_reg_tndem_pack(tndem_pl, outer_ecn, inner_ecn, new_inner_ecn, + trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tndem), tndem_pl); +} + +static int mlxsw_sp_nve_ecn_decap_init(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + /* Iterate over inner ECN values */ + for (i = INET_ECN_NOT_ECT; i <= INET_ECN_CE; i++) { + int j; + + /* Iterate over outer ECN values */ + for (j = INET_ECN_NOT_ECT; j <= INET_ECN_CE; j++) { + int err; + + err = __mlxsw_sp_nve_ecn_decap_init(mlxsw_sp, i, j); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_nve_ecn_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + err = mlxsw_sp_nve_ecn_encap_init(mlxsw_sp); + if (err) + return err; + + return mlxsw_sp_nve_ecn_decap_init(mlxsw_sp); +} + +static int mlxsw_sp_nve_resources_query(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4) || + !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6)) + return -EIO; + max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV4); + mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV4] = max; + max = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_NVE_MC_ENTRIES_IPV6); + mlxsw_sp->nve->num_max_mc_entries[MLXSW_SP_L3_PROTO_IPV6] = max; + + return 0; +} + +int mlxsw_sp_nve_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_nve *nve; + int err; + + nve = kzalloc(sizeof(*mlxsw_sp->nve), GFP_KERNEL); + if (!nve) + return -ENOMEM; + mlxsw_sp->nve = nve; + nve->mlxsw_sp = mlxsw_sp; + nve->nve_ops_arr = mlxsw_sp->nve_ops_arr; + + err = rhashtable_init(&nve->mc_list_ht, + &mlxsw_sp_nve_mc_list_ht_params); + if (err) + goto err_rhashtable_init; + + err = mlxsw_sp_nve_qos_init(mlxsw_sp); + if (err) + goto err_nve_qos_init; + + err = mlxsw_sp_nve_ecn_init(mlxsw_sp); + if (err) + goto err_nve_ecn_init; + + err = mlxsw_sp_nve_resources_query(mlxsw_sp); + if (err) + goto err_nve_resources_query; + + return 0; + +err_nve_resources_query: +err_nve_ecn_init: +err_nve_qos_init: + rhashtable_destroy(&nve->mc_list_ht); +err_rhashtable_init: + mlxsw_sp->nve = NULL; + kfree(nve); + return err; +} + +void mlxsw_sp_nve_fini(struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(mlxsw_sp->nve->num_nve_tunnels); + rhashtable_destroy(&mlxsw_sp->nve->mc_list_ht); + mlxsw_sp->nve = NULL; + kfree(mlxsw_sp->nve); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h new file mode 100644 index 000000000000..4cc3297e13d6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */ +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#ifndef _MLXSW_SPECTRUM_NVE_H +#define _MLXSW_SPECTRUM_NVE_H + +#include <linux/netlink.h> +#include <linux/rhashtable.h> + +#include "spectrum.h" + +struct mlxsw_sp_nve_config { + enum mlxsw_sp_nve_type type; + u8 ttl; + u8 learning_en:1; + __be16 udp_dport; + __be32 flowlabel; + u32 ul_tb_id; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr ul_sip; +}; + +struct mlxsw_sp_nve { + struct mlxsw_sp_nve_config config; + struct rhashtable mc_list_ht; + struct mlxsw_sp *mlxsw_sp; + const struct mlxsw_sp_nve_ops **nve_ops_arr; + unsigned int num_nve_tunnels; /* Protected by RTNL */ + unsigned int num_max_mc_entries[MLXSW_SP_L3_PROTO_MAX]; + u32 tunnel_index; +}; + +struct mlxsw_sp_nve_ops { + enum mlxsw_sp_nve_type type; + bool (*can_offload)(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack); + void (*nve_config)(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct mlxsw_sp_nve_config *config); + int (*init)(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config); + void (*fini)(struct mlxsw_sp_nve *nve); +}; + +extern const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops; +extern const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops; + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c new file mode 100644 index 000000000000..d21c7be5b1c9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_nve_vxlan.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 +/* Copyright (c) 2018 Mellanox Technologies. All rights reserved */ + +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/random.h> +#include <net/vxlan.h> + +#include "reg.h" +#include "spectrum_nve.h" + +/* Eth (18B) | IPv6 (40B) | UDP (8B) | VxLAN (8B) | Eth (14B) | IPv6 (40B) + * + * In the worst case - where we have a VLAN tag on the outer Ethernet + * header and IPv6 in overlay and underlay - we need to parse 128 bytes + */ +#define MLXSW_SP_NVE_VXLAN_PARSING_DEPTH 128 +#define MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH 96 + +#define MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS VXLAN_F_UDP_ZERO_CSUM_TX + +static bool mlxsw_sp1_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_config *cfg = &vxlan->cfg; + + if (cfg->saddr.sa.sa_family != AF_INET) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only IPv4 underlay is supported"); + return false; + } + + if (vxlan_addr_multicast(&cfg->remote_ip)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Multicast destination IP is not supported"); + return false; + } + + if (vxlan_addr_any(&cfg->saddr)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Source address must be specified"); + return false; + } + + if (cfg->remote_ifindex) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Local interface is not supported"); + return false; + } + + if (cfg->port_min || cfg->port_max) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Only default UDP source port range is supported"); + return false; + } + + if (cfg->tos != 1) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TOS must be configured to inherit"); + return false; + } + + if (cfg->flags & VXLAN_F_TTL_INHERIT) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to inherit"); + return false; + } + + if (cfg->flags & VXLAN_F_LEARN) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Learning is not supported"); + return false; + } + + if (!(cfg->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: UDP checksum is not supported"); + return false; + } + + if (cfg->flags & ~MLXSW_SP_NVE_VXLAN_SUPPORTED_FLAGS) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Unsupported flag"); + return false; + } + + if (cfg->ttl == 0) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: TTL must not be configured to 0"); + return false; + } + + if (cfg->label != 0) { + NL_SET_ERR_MSG_MOD(extack, "VxLAN: Flow label must be configured to 0"); + return false; + } + + return true; +} + +static void mlxsw_sp_nve_vxlan_config(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct mlxsw_sp_nve_config *config) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_config *cfg = &vxlan->cfg; + + config->type = MLXSW_SP_NVE_TYPE_VXLAN; + config->ttl = cfg->ttl; + config->flowlabel = cfg->label; + config->learning_en = cfg->flags & VXLAN_F_LEARN ? 1 : 0; + config->ul_tb_id = RT_TABLE_MAIN; + config->ul_proto = MLXSW_SP_L3_PROTO_IPV4; + config->ul_sip.addr4 = cfg->saddr.sin.sin_addr.s_addr; + config->udp_dport = cfg->dst_port; +} + +static int mlxsw_sp_nve_parsing_set(struct mlxsw_sp *mlxsw_sp, + unsigned int parsing_depth, + __be16 udp_dport) +{ + char mprs_pl[MLXSW_REG_MPRS_LEN]; + + mlxsw_reg_mprs_pack(mprs_pl, parsing_depth, be16_to_cpu(udp_dport)); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mprs), mprs_pl); +} + +static int +mlxsw_sp1_nve_vxlan_config_set(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nve_config *config) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + u16 ul_vr_id; + u8 udp_sport; + int err; + + err = mlxsw_sp_router_tb_id_vr_id(mlxsw_sp, config->ul_tb_id, + &ul_vr_id); + if (err) + return err; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, true, + config->ttl); + /* VxLAN driver's default UDP source port range is 32768 (0x8000) + * to 60999 (0xee47). Set the upper 8 bits of the UDP source port + * to a random number between 0x80 and 0xee + */ + get_random_bytes(&udp_sport, sizeof(udp_sport)); + udp_sport = (udp_sport % (0xee - 0x80 + 1)) + 0x80; + mlxsw_reg_tngcr_nve_udp_sport_prefix_set(tngcr_pl, udp_sport); + mlxsw_reg_tngcr_learn_enable_set(tngcr_pl, config->learning_en); + mlxsw_reg_tngcr_underlay_virtual_router_set(tngcr_pl, ul_vr_id); + mlxsw_reg_tngcr_usipv4_set(tngcr_pl, be32_to_cpu(config->ul_sip.addr4)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); +} + +static void mlxsw_sp1_nve_vxlan_config_clear(struct mlxsw_sp *mlxsw_sp) +{ + char tngcr_pl[MLXSW_REG_TNGCR_LEN]; + + mlxsw_reg_tngcr_pack(tngcr_pl, MLXSW_REG_TNGCR_TYPE_VXLAN, false, 0); + + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tngcr), tngcr_pl); +} + +static int mlxsw_sp1_nve_vxlan_rtdp_set(struct mlxsw_sp *mlxsw_sp, + unsigned int tunnel_index) +{ + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_NVE, tunnel_index); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static int mlxsw_sp1_nve_vxlan_init(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config) +{ + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + int err; + + err = mlxsw_sp_nve_parsing_set(mlxsw_sp, + MLXSW_SP_NVE_VXLAN_PARSING_DEPTH, + config->udp_dport); + if (err) + return err; + + err = mlxsw_sp1_nve_vxlan_config_set(mlxsw_sp, config); + if (err) + goto err_config_set; + + err = mlxsw_sp1_nve_vxlan_rtdp_set(mlxsw_sp, nve->tunnel_index); + if (err) + goto err_rtdp_set; + + err = mlxsw_sp_router_nve_promote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, + &config->ul_sip, + nve->tunnel_index); + if (err) + goto err_promote_decap; + + return 0; + +err_promote_decap: +err_rtdp_set: + mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp); +err_config_set: + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); + return err; +} + +static void mlxsw_sp1_nve_vxlan_fini(struct mlxsw_sp_nve *nve) +{ + struct mlxsw_sp_nve_config *config = &nve->config; + struct mlxsw_sp *mlxsw_sp = nve->mlxsw_sp; + + mlxsw_sp_router_nve_demote_decap(mlxsw_sp, config->ul_tb_id, + config->ul_proto, &config->ul_sip); + mlxsw_sp1_nve_vxlan_config_clear(mlxsw_sp); + mlxsw_sp_nve_parsing_set(mlxsw_sp, MLXSW_SP_NVE_DEFAULT_PARSING_DEPTH, + config->udp_dport); +} + +const struct mlxsw_sp_nve_ops mlxsw_sp1_nve_vxlan_ops = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .can_offload = mlxsw_sp1_nve_vxlan_can_offload, + .nve_config = mlxsw_sp_nve_vxlan_config, + .init = mlxsw_sp1_nve_vxlan_init, + .fini = mlxsw_sp1_nve_vxlan_fini, +}; + +static bool mlxsw_sp2_nve_vxlan_can_offload(const struct mlxsw_sp_nve *nve, + const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + return false; +} + +static int mlxsw_sp2_nve_vxlan_init(struct mlxsw_sp_nve *nve, + const struct mlxsw_sp_nve_config *config) +{ + return -EOPNOTSUPP; +} + +static void mlxsw_sp2_nve_vxlan_fini(struct mlxsw_sp_nve *nve) +{ +} + +const struct mlxsw_sp_nve_ops mlxsw_sp2_nve_vxlan_ops = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .can_offload = mlxsw_sp2_nve_vxlan_can_offload, + .nve_config = mlxsw_sp_nve_vxlan_config, + .init = mlxsw_sp2_nve_vxlan_init, + .fini = mlxsw_sp2_nve_vxlan_fini, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 2ab9cf25a08a..9e9bb57134f2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -366,6 +366,7 @@ enum mlxsw_sp_fib_entry_type { * encapsulating entries.) */ MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP, + MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP, }; struct mlxsw_sp_nexthop_group; @@ -741,6 +742,19 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, return NULL; } +int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + u16 *vr_id) +{ + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + return -ESRCH; + *vr_id = vr->id; + + return 0; +} + static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, enum mlxsw_sp_l3proto proto) { @@ -1128,6 +1142,52 @@ mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); } +static struct mlxsw_sp_fib_entry * +mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, + enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + enum mlxsw_sp_fib_entry_type type) +{ + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_node *fib_node; + unsigned char addr_prefix_len; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + const void *addrp; + size_t addr_len; + u32 addr4; + + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, proto); + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + addr4 = be32_to_cpu(addr->addr4); + addrp = &addr4; + addr_len = 4; + addr_prefix_len = 32; + break; + case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ + default: + WARN_ON(1); + return NULL; + } + + fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len, + addr_prefix_len); + if (!fib_node || list_empty(&fib_node->entry_list)) + return NULL; + + fib_entry = list_first_entry(&fib_node->entry_list, + struct mlxsw_sp_fib_entry, list); + if (fib_entry->type != type) + return NULL; + + return fib_entry; +} + /* Given an IPIP entry, find the corresponding decap route. */ static struct mlxsw_sp_fib_entry * mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, @@ -1765,6 +1825,56 @@ mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp, return 0; } +int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip, + u32 tunnel_index) +{ + enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + struct mlxsw_sp_fib_entry *fib_entry; + int err; + + /* It is valid to create a tunnel with a local IP and only later + * assign this IP address to a local interface + */ + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, + ul_proto, ul_sip, + type); + if (!fib_entry) + return 0; + + fib_entry->decap.tunnel_index = tunnel_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + if (err) + goto err_fib_entry_update; + + return 0; + +err_fib_entry_update: + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); + return err; +} + +void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, + enum mlxsw_sp_l3proto ul_proto, + const union mlxsw_sp_l3addr *ul_sip) +{ + enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + struct mlxsw_sp_fib_entry *fib_entry; + + fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id, + ul_proto, ul_sip, + type); + if (!fib_entry) + return; + + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; @@ -3815,6 +3925,7 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: return !!nh_group->nh_rif; case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: return true; default: return false; @@ -3848,7 +3959,8 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) int i; if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || - fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) { + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) { nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; return; } @@ -4072,6 +4184,18 @@ mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, fib_entry->decap.tunnel_index); } +static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, + fib_entry->decap.tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) @@ -4086,6 +4210,8 @@ static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP: + return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op); } return -EINVAL; } @@ -4121,6 +4247,7 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id); struct net_device *dev = fen_info->fi->fib_dev; struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; @@ -4135,6 +4262,15 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry, ipip_entry); } + if (mlxsw_sp_nve_ipv4_route_is_decap(mlxsw_sp, tb_id, + dip.addr4)) { + u32 t_index; + + t_index = mlxsw_sp_nve_decap_tunnel_index_get(mlxsw_sp); + fib_entry->decap.tunnel_index = t_index; + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP; + return 0; + } /* fall through */ case RTN_BROADCAST: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 1a60391daafa..3dbafdeaab2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -7,17 +7,6 @@ #include "spectrum.h" #include "reg.h" -enum mlxsw_sp_l3proto { - MLXSW_SP_L3_PROTO_IPV4, - MLXSW_SP_L3_PROTO_IPV6, -#define MLXSW_SP_L3_PROTO_MAX (MLXSW_SP_L3_PROTO_IPV6 + 1) -}; - -union mlxsw_sp_l3addr { - __be32 addr4; - struct in6_addr addr6; -}; - struct mlxsw_sp_rif_ipip_lb; struct mlxsw_sp_rif_ipip_lb_config { enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; @@ -35,8 +24,6 @@ struct mlxsw_sp_neigh_entry; struct mlxsw_sp_nexthop; struct mlxsw_sp_ipip_entry; -struct mlxsw_sp_rif *mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); @@ -44,9 +31,7 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); -u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif); -struct mlxsw_sp_fid *mlxsw_sp_rif_fid(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index db715da7bab7..bc60d7a8b49d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -15,9 +15,9 @@ #include <linux/rtnetlink.h> #include <linux/netlink.h> #include <net/switchdev.h> +#include <net/vxlan.h> #include "spectrum_span.h" -#include "spectrum_router.h" #include "spectrum_switchdev.h" #include "spectrum.h" #include "core.h" @@ -84,9 +84,19 @@ struct mlxsw_sp_bridge_ops { void (*port_leave)(struct mlxsw_sp_bridge_device *bridge_device, struct mlxsw_sp_bridge_port *bridge_port, struct mlxsw_sp_port *mlxsw_sp_port); + int (*vxlan_join)(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack); + void (*vxlan_leave)(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev); struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_bridge_device *bridge_device, u16 vid); + struct mlxsw_sp_fid * + (*fid_lookup)(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid); + u16 (*fid_vid)(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid); }; static int @@ -1237,6 +1247,51 @@ static enum mlxsw_reg_sfd_op mlxsw_sp_sfd_op(bool adding) MLXSW_REG_SFD_OP_WRITE_REMOVE; } +static int mlxsw_sp_port_fdb_tunnel_uc_op(struct mlxsw_sp *mlxsw_sp, + const char *mac, u16 fid, + enum mlxsw_sp_l3proto proto, + const union mlxsw_sp_l3addr *addr, + bool adding, bool dynamic) +{ + enum mlxsw_reg_sfd_uc_tunnel_protocol sfd_proto; + char *sfd_pl; + u8 num_rec; + u32 uip; + int err; + + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + uip = be32_to_cpu(addr->addr4); + sfd_proto = MLXSW_REG_SFD_UC_TUNNEL_PROTOCOL_IPV4; + break; + case MLXSW_SP_L3_PROTO_IPV6: /* fall through */ + default: + WARN_ON(1); + return -EOPNOTSUPP; + } + + sfd_pl = kmalloc(MLXSW_REG_SFD_LEN, GFP_KERNEL); + if (!sfd_pl) + return -ENOMEM; + + mlxsw_reg_sfd_pack(sfd_pl, mlxsw_sp_sfd_op(adding), 0); + mlxsw_reg_sfd_uc_tunnel_pack(sfd_pl, 0, + mlxsw_sp_sfd_rec_policy(dynamic), mac, fid, + MLXSW_REG_SFD_REC_ACTION_NOP, uip, + sfd_proto); + num_rec = mlxsw_reg_sfd_num_rec_get(sfd_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sfd), sfd_pl); + if (err) + goto out; + + if (num_rec != mlxsw_reg_sfd_num_rec_get(sfd_pl)) + err = -EBUSY; + +out: + kfree(sfd_pl); + return err; +} + static int __mlxsw_sp_port_fdb_uc_op(struct mlxsw_sp *mlxsw_sp, u8 local_port, const char *mac, u16 fid, bool adding, enum mlxsw_reg_sfd_rec_action action, @@ -1950,6 +2005,21 @@ mlxsw_sp_bridge_8021q_port_leave(struct mlxsw_sp_bridge_device *bridge_device, mlxsw_sp_port_pvid_set(mlxsw_sp_port, 1); } +static int +mlxsw_sp_bridge_8021q_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack) +{ + WARN_ON(1); + return -EINVAL; +} + +static void +mlxsw_sp_bridge_8021q_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev) +{ +} + static struct mlxsw_sp_fid * mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid) @@ -1959,10 +2029,29 @@ mlxsw_sp_bridge_8021q_fid_get(struct mlxsw_sp_bridge_device *bridge_device, return mlxsw_sp_fid_8021q_get(mlxsw_sp, vid); } +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021q_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + WARN_ON(1); + return NULL; +} + +static u16 +mlxsw_sp_bridge_8021q_fid_vid(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid) +{ + return mlxsw_sp_fid_8021q_vid(fid); +} + static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021q_ops = { .port_join = mlxsw_sp_bridge_8021q_port_join, .port_leave = mlxsw_sp_bridge_8021q_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021q_vxlan_join, + .vxlan_leave = mlxsw_sp_bridge_8021q_vxlan_leave, .fid_get = mlxsw_sp_bridge_8021q_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021q_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021q_fid_vid, }; static bool @@ -2026,19 +2115,126 @@ mlxsw_sp_bridge_8021d_port_leave(struct mlxsw_sp_bridge_device *bridge_device, mlxsw_sp_port_vlan_bridge_leave(mlxsw_sp_port_vlan); } +static int +mlxsw_sp_bridge_8021d_vxlan_join(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct vxlan_dev *vxlan = netdev_priv(vxlan_dev); + struct mlxsw_sp_nve_params params = { + .type = MLXSW_SP_NVE_TYPE_VXLAN, + .vni = vxlan->cfg.vni, + .dev = vxlan_dev, + }; + struct mlxsw_sp_fid *fid; + int err; + + fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); + if (!fid) + return -EINVAL; + + if (mlxsw_sp_fid_vni_is_set(fid)) + return -EINVAL; + + err = mlxsw_sp_nve_fid_enable(mlxsw_sp, fid, ¶ms, extack); + if (err) + goto err_nve_fid_enable; + + /* The tunnel port does not hold a reference on the FID. Only + * local ports and the router port + */ + mlxsw_sp_fid_put(fid); + + return 0; + +err_nve_fid_enable: + mlxsw_sp_fid_put(fid); + return err; +} + +static void +mlxsw_sp_bridge_8021d_vxlan_leave(struct mlxsw_sp_bridge_device *bridge_device, + const struct net_device *vxlan_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct mlxsw_sp_fid *fid; + + fid = mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); + if (WARN_ON(!fid)) + return; + + /* If the VxLAN device is down, then the FID does not have a VNI */ + if (!mlxsw_sp_fid_vni_is_set(fid)) + goto out; + + mlxsw_sp_nve_fid_disable(mlxsw_sp, fid); +out: + mlxsw_sp_fid_put(fid); +} + static struct mlxsw_sp_fid * mlxsw_sp_bridge_8021d_fid_get(struct mlxsw_sp_bridge_device *bridge_device, u16 vid) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + struct net_device *vxlan_dev; + struct mlxsw_sp_fid *fid; + int err; + + fid = mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); + if (IS_ERR(fid)) + return fid; + + if (mlxsw_sp_fid_vni_is_set(fid)) + return fid; + + vxlan_dev = mlxsw_sp_bridge_vxlan_dev_find(bridge_device->dev); + if (!vxlan_dev) + return fid; + + if (!netif_running(vxlan_dev)) + return fid; + + err = mlxsw_sp_bridge_8021d_vxlan_join(bridge_device, vxlan_dev, NULL); + if (err) + goto err_vxlan_join; + + return fid; + +err_vxlan_join: + mlxsw_sp_fid_put(fid); + return ERR_PTR(err); +} + +static struct mlxsw_sp_fid * +mlxsw_sp_bridge_8021d_fid_lookup(struct mlxsw_sp_bridge_device *bridge_device, + u16 vid) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(bridge_device->dev); + + /* The only valid VLAN for a VLAN-unaware bridge is 0 */ + if (vid) + return NULL; - return mlxsw_sp_fid_8021d_get(mlxsw_sp, bridge_device->dev->ifindex); + return mlxsw_sp_fid_8021d_lookup(mlxsw_sp, bridge_device->dev->ifindex); +} + +static u16 +mlxsw_sp_bridge_8021d_fid_vid(struct mlxsw_sp_bridge_device *bridge_device, + const struct mlxsw_sp_fid *fid) +{ + return 0; } static const struct mlxsw_sp_bridge_ops mlxsw_sp_bridge_8021d_ops = { .port_join = mlxsw_sp_bridge_8021d_port_join, .port_leave = mlxsw_sp_bridge_8021d_port_leave, + .vxlan_join = mlxsw_sp_bridge_8021d_vxlan_join, + .vxlan_leave = mlxsw_sp_bridge_8021d_vxlan_leave, .fid_get = mlxsw_sp_bridge_8021d_fid_get, + .fid_lookup = mlxsw_sp_bridge_8021d_fid_lookup, + .fid_vid = mlxsw_sp_bridge_8021d_fid_vid, }; int mlxsw_sp_port_bridge_join(struct mlxsw_sp_port *mlxsw_sp_port, @@ -2088,15 +2284,43 @@ void mlxsw_sp_port_bridge_leave(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_bridge_port_put(mlxsw_sp->bridge, bridge_port); } +int mlxsw_sp_bridge_vxlan_join(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (WARN_ON(!bridge_device)) + return -EINVAL; + + return bridge_device->ops->vxlan_join(bridge_device, vxlan_dev, extack); +} + +void mlxsw_sp_bridge_vxlan_leave(struct mlxsw_sp *mlxsw_sp, + const struct net_device *br_dev, + const struct net_device *vxlan_dev) +{ + struct mlxsw_sp_bridge_device *bridge_device; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (WARN_ON(!bridge_device)) + return; + + bridge_device->ops->vxlan_leave(bridge_device, vxlan_dev); +} + static void mlxsw_sp_fdb_call_notifiers(enum switchdev_notifier_type type, const char *mac, u16 vid, - struct net_device *dev) + struct net_device *dev, bool offloaded) { struct switchdev_notifier_fdb_info info; info.addr = mac; info.vid = vid; + info.offloaded = offloaded; call_switchdev_notifiers(type, dev, &info.info); } @@ -2148,7 +2372,7 @@ do_fdb_op: if (!do_notification) return; type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; - mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev); + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding); return; @@ -2208,7 +2432,7 @@ do_fdb_op: if (!do_notification) return; type = adding ? SWITCHDEV_FDB_ADD_TO_BRIDGE : SWITCHDEV_FDB_DEL_TO_BRIDGE; - mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev); + mlxsw_sp_fdb_call_notifiers(type, mac, vid, bridge_port->dev, adding); return; @@ -2284,12 +2508,127 @@ out: struct mlxsw_sp_switchdev_event_work { struct work_struct work; - struct switchdev_notifier_fdb_info fdb_info; + union { + struct switchdev_notifier_fdb_info fdb_info; + struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info; + }; struct net_device *dev; unsigned long event; }; -static void mlxsw_sp_switchdev_event_work(struct work_struct *work) +static void +mlxsw_sp_switchdev_vxlan_addr_convert(const union vxlan_addr *vxlan_addr, + enum mlxsw_sp_l3proto *proto, + union mlxsw_sp_l3addr *addr) +{ + if (vxlan_addr->sa.sa_family == AF_INET) { + addr->addr4 = vxlan_addr->sin.sin_addr.s_addr; + *proto = MLXSW_SP_L3_PROTO_IPV4; + } else { + addr->addr6 = vxlan_addr->sin6.sin6_addr; + *proto = MLXSW_SP_L3_PROTO_IPV6; + } +} + +static void +mlxsw_sp_switchdev_bridge_vxlan_fdb_event(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work, + struct mlxsw_sp_fid *fid, __be32 vni) +{ + struct switchdev_notifier_vxlan_fdb_info vxlan_fdb_info; + struct switchdev_notifier_fdb_info *fdb_info; + struct net_device *dev = switchdev_work->dev; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + int err; + + fdb_info = &switchdev_work->fdb_info; + err = vxlan_fdb_find_uc(dev, fdb_info->addr, vni, &vxlan_fdb_info); + if (err) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info.remote_ip, + &proto, &addr); + + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, + vxlan_fdb_info.eth_addr, + mlxsw_sp_fid_index(fid), + proto, &addr, true, false); + if (err) + return; + vxlan_fdb_info.offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info.info); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info.eth_addr, + fdb_info->vid, dev, true); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, + vxlan_fdb_info.eth_addr, + mlxsw_sp_fid_index(fid), + proto, &addr, false, + false); + vxlan_fdb_info.offloaded = false; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info.info); + break; + } +} + +static void +mlxsw_sp_switchdev_bridge_nve_fdb_event(struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + struct net_device *br_dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_fid *fid; + __be32 vni; + int err; + + if (switchdev_work->event != SWITCHDEV_FDB_ADD_TO_DEVICE && + switchdev_work->event != SWITCHDEV_FDB_DEL_TO_DEVICE) + return; + + if (!switchdev_work->fdb_info.added_by_user) + return; + + if (!netif_running(dev)) + return; + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + return; + if (!netif_is_bridge_master(br_dev)) + return; + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + return; + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = bridge_device->ops->fid_lookup(bridge_device, + switchdev_work->fdb_info.vid); + if (!fid) + return; + + err = mlxsw_sp_fid_vni(fid, &vni); + if (err) + goto out; + + mlxsw_sp_switchdev_bridge_vxlan_fdb_event(mlxsw_sp, switchdev_work, fid, + vni); + +out: + mlxsw_sp_fid_put(fid); +} + +static void mlxsw_sp_switchdev_bridge_fdb_event_work(struct work_struct *work) { struct mlxsw_sp_switchdev_event_work *switchdev_work = container_of(work, struct mlxsw_sp_switchdev_event_work, work); @@ -2299,6 +2638,11 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work) int err; rtnl_lock(); + if (netif_is_vxlan(dev)) { + mlxsw_sp_switchdev_bridge_nve_fdb_event(switchdev_work); + goto out; + } + mlxsw_sp_port = mlxsw_sp_port_dev_lower_find(dev); if (!mlxsw_sp_port) goto out; @@ -2313,7 +2657,7 @@ static void mlxsw_sp_switchdev_event_work(struct work_struct *work) break; mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, fdb_info->addr, - fdb_info->vid, dev); + fdb_info->vid, dev, true); break; case SWITCHDEV_FDB_DEL_TO_DEVICE: fdb_info = &switchdev_work->fdb_info; @@ -2338,22 +2682,213 @@ out: dev_put(dev); } +static void +mlxsw_sp_switchdev_vxlan_fdb_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + u8 all_zeros_mac[ETH_ALEN] = { 0 }; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + struct net_device *br_dev; + struct mlxsw_sp_fid *fid; + u16 vid; + int err; + + vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + br_dev = netdev_master_upper_dev_get(dev); + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni); + if (!fid) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, + &proto, &addr); + + if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + err = mlxsw_sp_nve_flood_ip_add(mlxsw_sp, fid, proto, &addr); + if (err) { + mlxsw_sp_fid_put(fid); + return; + } + vxlan_fdb_info->offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info->info); + mlxsw_sp_fid_put(fid); + return; + } + + /* The device has a single FDB table, whereas Linux has two - one + * in the bridge driver and another in the VxLAN driver. We only + * program an entry to the device if the MAC points to the VxLAN + * device in the bridge's FDB table + */ + vid = bridge_device->ops->fid_vid(bridge_device, fid); + if (br_fdb_find_port(br_dev, vxlan_fdb_info->eth_addr, vid) != dev) + goto err_br_fdb_find; + + err = mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr, + mlxsw_sp_fid_index(fid), proto, + &addr, true, false); + if (err) + goto err_fdb_tunnel_uc_op; + vxlan_fdb_info->offloaded = true; + call_switchdev_notifiers(SWITCHDEV_VXLAN_FDB_OFFLOADED, dev, + &vxlan_fdb_info->info); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info->eth_addr, vid, dev, true); + + mlxsw_sp_fid_put(fid); + + return; + +err_fdb_tunnel_uc_op: +err_br_fdb_find: + mlxsw_sp_fid_put(fid); +} + +static void +mlxsw_sp_switchdev_vxlan_fdb_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_switchdev_event_work * + switchdev_work) +{ + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct mlxsw_sp_bridge_device *bridge_device; + struct net_device *dev = switchdev_work->dev; + struct net_device *br_dev = netdev_master_upper_dev_get(dev); + u8 all_zeros_mac[ETH_ALEN] = { 0 }; + enum mlxsw_sp_l3proto proto; + union mlxsw_sp_l3addr addr; + struct mlxsw_sp_fid *fid; + u16 vid; + + vxlan_fdb_info = &switchdev_work->vxlan_fdb_info; + + bridge_device = mlxsw_sp_bridge_device_find(mlxsw_sp->bridge, br_dev); + if (!bridge_device) + return; + + fid = mlxsw_sp_fid_lookup_by_vni(mlxsw_sp, vxlan_fdb_info->vni); + if (!fid) + return; + + mlxsw_sp_switchdev_vxlan_addr_convert(&vxlan_fdb_info->remote_ip, + &proto, &addr); + + if (ether_addr_equal(vxlan_fdb_info->eth_addr, all_zeros_mac)) { + mlxsw_sp_nve_flood_ip_del(mlxsw_sp, fid, proto, &addr); + mlxsw_sp_fid_put(fid); + return; + } + + mlxsw_sp_port_fdb_tunnel_uc_op(mlxsw_sp, vxlan_fdb_info->eth_addr, + mlxsw_sp_fid_index(fid), proto, &addr, + false, false); + vid = bridge_device->ops->fid_vid(bridge_device, fid); + mlxsw_sp_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, + vxlan_fdb_info->eth_addr, vid, dev, false); + + mlxsw_sp_fid_put(fid); +} + +static void mlxsw_sp_switchdev_vxlan_fdb_event_work(struct work_struct *work) +{ + struct mlxsw_sp_switchdev_event_work *switchdev_work = + container_of(work, struct mlxsw_sp_switchdev_event_work, work); + struct net_device *dev = switchdev_work->dev; + struct mlxsw_sp *mlxsw_sp; + struct net_device *br_dev; + + rtnl_lock(); + + if (!netif_running(dev)) + goto out; + br_dev = netdev_master_upper_dev_get(dev); + if (!br_dev) + goto out; + if (!netif_is_bridge_master(br_dev)) + goto out; + mlxsw_sp = mlxsw_sp_lower_get(br_dev); + if (!mlxsw_sp) + goto out; + + switch (switchdev_work->event) { + case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: + mlxsw_sp_switchdev_vxlan_fdb_add(mlxsw_sp, switchdev_work); + break; + case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE: + mlxsw_sp_switchdev_vxlan_fdb_del(mlxsw_sp, switchdev_work); + break; + } + +out: + rtnl_unlock(); + kfree(switchdev_work); + dev_put(dev); +} + +static int +mlxsw_sp_switchdev_vxlan_work_prepare(struct mlxsw_sp_switchdev_event_work * + switchdev_work, + struct switchdev_notifier_info *info) +{ + struct vxlan_dev *vxlan = netdev_priv(switchdev_work->dev); + struct switchdev_notifier_vxlan_fdb_info *vxlan_fdb_info; + struct vxlan_config *cfg = &vxlan->cfg; + + vxlan_fdb_info = container_of(info, + struct switchdev_notifier_vxlan_fdb_info, + info); + + if (vxlan_fdb_info->remote_port != cfg->dst_port) + return -EOPNOTSUPP; + if (vxlan_fdb_info->remote_vni != cfg->vni) + return -EOPNOTSUPP; + if (vxlan_fdb_info->vni != cfg->vni) + return -EOPNOTSUPP; + if (vxlan_fdb_info->remote_ifindex) + return -EOPNOTSUPP; + if (is_multicast_ether_addr(vxlan_fdb_info->eth_addr)) + return -EOPNOTSUPP; + if (vxlan_addr_multicast(&vxlan_fdb_info->remote_ip)) + return -EOPNOTSUPP; + + switchdev_work->vxlan_fdb_info = *vxlan_fdb_info; + + return 0; +} + /* Called under rcu_read_lock() */ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = switchdev_notifier_info_to_dev(ptr); struct mlxsw_sp_switchdev_event_work *switchdev_work; - struct switchdev_notifier_fdb_info *fdb_info = ptr; + struct switchdev_notifier_fdb_info *fdb_info; + struct switchdev_notifier_info *info = ptr; + struct net_device *br_dev; + int err; - if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) + /* Tunnel devices are not our uppers, so check their master instead */ + br_dev = netdev_master_upper_dev_get_rcu(dev); + if (!br_dev) + return NOTIFY_DONE; + if (!netif_is_bridge_master(br_dev)) + return NOTIFY_DONE; + if (!mlxsw_sp_port_dev_lower_find_rcu(br_dev)) return NOTIFY_DONE; switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); if (!switchdev_work) return NOTIFY_BAD; - INIT_WORK(&switchdev_work->work, mlxsw_sp_switchdev_event_work); switchdev_work->dev = dev; switchdev_work->event = event; @@ -2362,6 +2897,11 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, case SWITCHDEV_FDB_DEL_TO_DEVICE: /* fall through */ case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */ case SWITCHDEV_FDB_DEL_TO_BRIDGE: + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + INIT_WORK(&switchdev_work->work, + mlxsw_sp_switchdev_bridge_fdb_event_work); memcpy(&switchdev_work->fdb_info, ptr, sizeof(switchdev_work->fdb_info)); switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); @@ -2375,6 +2915,16 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, */ dev_hold(dev); break; + case SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE: /* fall through */ + case SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE: + INIT_WORK(&switchdev_work->work, + mlxsw_sp_switchdev_vxlan_fdb_event_work); + err = mlxsw_sp_switchdev_vxlan_work_prepare(switchdev_work, + info); + if (err) + goto err_vxlan_work_prepare; + dev_hold(dev); + break; default: kfree(switchdev_work); return NOTIFY_DONE; @@ -2384,6 +2934,7 @@ static int mlxsw_sp_switchdev_event(struct notifier_block *unused, return NOTIFY_DONE; +err_vxlan_work_prepare: err_addr_alloc: kfree(switchdev_work); return NOTIFY_BAD; diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 53020724c2f6..6f18f4d3322a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -24,6 +24,7 @@ enum { MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, MLXSW_TRAP_ID_PKT_SAMPLE = 0x38, MLXSW_TRAP_ID_FID_MISS = 0x3D, + MLXSW_TRAP_ID_DECAP_ECN0 = 0x40, MLXSW_TRAP_ID_ARPBC = 0x50, MLXSW_TRAP_ID_ARPUC = 0x51, MLXSW_TRAP_ID_MTUERROR = 0x52, @@ -59,6 +60,7 @@ enum { MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91, MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1, + MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD, MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, MLXSW_TRAP_ID_ACL0 = 0x1C0, |