diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
81 files changed, 7030 insertions, 1754 deletions
diff --git a/drivers/net/ethernet/mellanox/Kconfig b/drivers/net/ethernet/mellanox/Kconfig index 84a200764111..872548cd9431 100644 --- a/drivers/net/ethernet/mellanox/Kconfig +++ b/drivers/net/ethernet/mellanox/Kconfig @@ -5,9 +5,10 @@ config NET_VENDOR_MELLANOX bool "Mellanox devices" default y - depends on PCI + depends on PCI || I2C ---help--- - If you have a network (Ethernet) card belonging to this class, say Y. + If you have a network (Ethernet or RDMA) device belonging to this + class, say Y. Note that the answer to this question doesn't directly affect the kernel: saying N will just cause the configurator to skip all diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index b651c1210555..6dabd983e7e0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -186,7 +186,7 @@ int mlx4_bitmap_init(struct mlx4_bitmap *bitmap, u32 num, u32 mask, bitmap->effective_len = bitmap->avail; spin_lock_init(&bitmap->lock); bitmap->table = kzalloc(BITS_TO_LONGS(bitmap->max) * - sizeof (long), GFP_KERNEL); + sizeof(long), GFP_KERNEL); if (!bitmap->table) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 78b89ceb4f46..6a9086dc1e92 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1958,19 +1958,19 @@ static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port) int i; int err; int num_vfs; - u16 availible_vpp; + u16 available_vpp; u8 vpp_param[MLX4_NUM_UP]; struct mlx4_qos_manager *port_qos; struct mlx4_priv *priv = mlx4_priv(dev); - err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param); + err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param); if (err) { - mlx4_info(dev, "Failed query availible VPPs\n"); + mlx4_info(dev, "Failed query available VPPs\n"); return; } port_qos = &priv->mfunc.master.qos_ctl[port]; - num_vfs = (availible_vpp / + num_vfs = (available_vpp / bitmap_weight(port_qos->priority_bm, MLX4_NUM_UP)); for (i = 0; i < MLX4_NUM_UP; i++) { @@ -1985,14 +1985,14 @@ static void mlx4_allocate_port_vpps(struct mlx4_dev *dev, int port) } /* Query actual allocated VPP, just to make sure */ - err = mlx4_ALLOCATE_VPP_get(dev, port, &availible_vpp, vpp_param); + err = mlx4_ALLOCATE_VPP_get(dev, port, &available_vpp, vpp_param); if (err) { - mlx4_info(dev, "Failed query availible VPPs\n"); + mlx4_info(dev, "Failed query available VPPs\n"); return; } port_qos->num_of_qos_vfs = num_vfs; - mlx4_dbg(dev, "Port %d Availible VPPs %d\n", port, availible_vpp); + mlx4_dbg(dev, "Port %d Available VPPs %d\n", port, available_vpp); for (i = 0; i < MLX4_NUM_UP; i++) mlx4_dbg(dev, "Port %d UP %d Allocated %d VPPs\n", port, i, @@ -2637,7 +2637,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) int err = 0; priv->cmd.context = kmalloc(priv->cmd.max_cmds * - sizeof (struct mlx4_cmd_context), + sizeof(struct mlx4_cmd_context), GFP_KERNEL); if (!priv->cmd.context) return -ENOMEM; @@ -2695,7 +2695,7 @@ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) { struct mlx4_cmd_mailbox *mailbox; - mailbox = kmalloc(sizeof *mailbox, GFP_KERNEL); + mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL); if (!mailbox) return ERR_PTR(-ENOMEM); @@ -2891,7 +2891,7 @@ static int mlx4_set_vport_qos(struct mlx4_priv *priv, int slave, int port, memset(vpp_qos, 0, sizeof(struct mlx4_vport_qos_param) * MLX4_NUM_UP); if (slave > port_qos->num_of_qos_vfs) { - mlx4_info(dev, "No availible VPP resources for this VF\n"); + mlx4_info(dev, "No available VPP resources for this VF\n"); return -EINVAL; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index f849eec21824..1e487acb4667 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -209,12 +209,10 @@ int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) cq->moder_cnt, cq->moder_time); } -int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq) { mlx4_cq_arm(&cq->mcq, MLX4_CQ_DB_REQ_NOT, priv->mdev->uar_map, &priv->mdev->uar_lock); - - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 2b0cbca4beb5..686e18de9a97 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -147,7 +147,7 @@ void mlx4_en_update_loopback_state(struct net_device *dev, mutex_unlock(&priv->mdev->state_lock); } -static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) +static void mlx4_en_get_profile(struct mlx4_en_dev *mdev) { struct mlx4_en_profile *params = &mdev->profile; int i; @@ -176,8 +176,6 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev) params->prof[i].rss_rings = 0; params->prof[i].inline_thold = inline_thold; } - - return 0; } static void *mlx4_en_get_netdev(struct mlx4_dev *dev, void *ctx, u8 port) @@ -309,10 +307,7 @@ static void *mlx4_en_add(struct mlx4_dev *dev) } /* Build device profile according to supplied module parameters */ - if (mlx4_en_get_profile(mdev)) { - mlx4_err(mdev, "Bad module parameters, aborting\n"); - goto err_mr; - } + mlx4_en_get_profile(mdev); /* Configure which ports to start according to module parameters */ mdev->port_cnt = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index e3e6d9fa69fd..9c218f1cfc6c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -130,19 +130,20 @@ out: return err; } -static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 proto, - struct tc_to_netdev *tc) +static int __mlx4_en_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { - if (tc->type != TC_SETUP_MQPRIO) - return -EINVAL; + struct tc_mqprio_qopt *mqprio = type_data; + + if (type != TC_SETUP_MQPRIO) + return -EOPNOTSUPP; - if (tc->mqprio->num_tc && tc->mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) + if (mqprio->num_tc && mqprio->num_tc != MLX4_EN_NUM_UP_HIGH) return -EINVAL; - tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; - return mlx4_en_alloc_tx_queue_per_tc(dev, tc->mqprio->num_tc); + return mlx4_en_alloc_tx_queue_per_tc(dev, mqprio->num_tc); } #ifdef CONFIG_RFS_ACCEL @@ -732,6 +733,21 @@ static int mlx4_en_replace_mac(struct mlx4_en_priv *priv, int qpn, return __mlx4_replace_mac(dev, priv->port, qpn, new_mac_u64); } +static void mlx4_en_update_user_mac(struct mlx4_en_priv *priv, + unsigned char new_mac[ETH_ALEN + 2]) +{ + struct mlx4_en_dev *mdev = priv->mdev; + int err; + + if (!(mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_USER_MAC_EN)) + return; + + err = mlx4_SET_PORT_user_mac(mdev->dev, priv->port, new_mac); + if (err) + en_err(priv, "Failed to pass user MAC(%pM) to Firmware for port %d, with error %d\n", + new_mac, priv->port, err); +} + static int mlx4_en_do_set_mac(struct mlx4_en_priv *priv, unsigned char new_mac[ETH_ALEN + 2]) { @@ -766,8 +782,12 @@ static int mlx4_en_set_mac(struct net_device *dev, void *addr) mutex_lock(&mdev->state_lock); memcpy(new_mac, saddr->sa_data, ETH_ALEN); err = mlx4_en_do_set_mac(priv, new_mac); - if (!err) - memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); + if (err) + goto out; + + memcpy(dev->dev_addr, saddr->sa_data, ETH_ALEN); + mlx4_en_update_user_mac(priv, new_mac); +out: mutex_unlock(&mdev->state_lock); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 86d2d42d658d..5a47f9669621 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -44,7 +44,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; - memset(context, 0, sizeof *context); + memset(context, 0, sizeof(*context)); context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET); context->pd = cpu_to_be32(mdev->priv_pdn); context->mtu_msgmax = 0xff; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ec24c4057be6..b97a55c827eb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -1056,7 +1056,7 @@ static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn, } qp->event = mlx4_en_sqp_event; - memset(context, 0, sizeof *context); + memset(context, 0, sizeof(*context)); mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0, qpn, ring->cqn, -1, context); context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index a81db2582555..8a32a8f7f9c0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -644,7 +644,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, void *fragptr) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; - int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; + int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof(*inl); unsigned int hlen = skb_headlen(skb); if (skb->len <= spc) { diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 07406cf2eacd..6f57c052053e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -259,7 +259,7 @@ int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) if (!s_slave->active) return 0; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; @@ -276,7 +276,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) /*don't send if we don't have the that slave */ if (dev->persist->num_vfs < slave) return 0; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; @@ -295,7 +295,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, /*don't send if we don't have the that slave */ if (dev->persist->num_vfs < slave) return 0; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; eqe.subtype = port_subtype_change; @@ -432,7 +432,7 @@ int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) { struct mlx4_eqe eqe; - memset(&eqe, 0, sizeof eqe); + memset(&eqe, 0, sizeof(eqe)); eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO; @@ -726,7 +726,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) } memcpy(&priv->mfunc.master.comm_arm_bit_vector, eqe->event.comm_channel_arm.bit_vec, - sizeof eqe->event.comm_channel_arm.bit_vec); + sizeof(eqe->event.comm_channel_arm.bit_vec)); queue_work(priv->mfunc.master.comm_wq, &priv->mfunc.master.comm_work); break; @@ -984,15 +984,15 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, */ npages = PAGE_ALIGN(eq->nent * dev->caps.eqe_size) / PAGE_SIZE; - eq->page_list = kmalloc(npages * sizeof *eq->page_list, - GFP_KERNEL); + eq->page_list = kmalloc_array(npages, sizeof(*eq->page_list), + GFP_KERNEL); if (!eq->page_list) goto err_out; for (i = 0; i < npages; ++i) eq->page_list[i].buf = NULL; - dma_list = kmalloc(npages * sizeof *dma_list, GFP_KERNEL); + dma_list = kmalloc_array(npages, sizeof(*dma_list), GFP_KERNEL); if (!dma_list) goto err_out_free; @@ -1161,7 +1161,7 @@ int mlx4_alloc_eq_table(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); priv->eq_table.eq = kcalloc(dev->caps.num_eqs - dev->caps.reserved_eqs, - sizeof *priv->eq_table.eq, GFP_KERNEL); + sizeof(*priv->eq_table.eq), GFP_KERNEL); if (!priv->eq_table.eq) return -ENOMEM; @@ -1180,7 +1180,7 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) int i; priv->eq_table.uar_map = kcalloc(mlx4_num_eq_uar(dev), - sizeof *priv->eq_table.uar_map, + sizeof(*priv->eq_table.uar_map), GFP_KERNEL); if (!priv->eq_table.uar_map) { err = -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 041c0ed65929..16c09949afd5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -36,6 +36,7 @@ #include <linux/mlx4/cmd.h> #include <linux/module.h> #include <linux/cache.h> +#include <linux/kernel.h> #include "fw.h" #include "icm.h" @@ -57,7 +58,7 @@ MODULE_PARM_DESC(enable_qos, "Enable Enhanced QoS support (default: off)"); do { \ void *__p = (char *) (source) + (offset); \ u64 val; \ - switch (sizeof (dest)) { \ + switch (sizeof(dest)) { \ case 1: (dest) = *(u8 *) __p; break; \ case 2: (dest) = be16_to_cpup(__p); break; \ case 4: (dest) = be32_to_cpup(__p); break; \ @@ -162,6 +163,7 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [35] = "Diag counters per port", [36] = "QinQ VST mode support", [37] = "sl to vl mapping table change event support", + [38] = "user MAC support", }; int i; @@ -679,22 +681,22 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, if (func_cap->flags1 & QUERY_FUNC_CAP_VF_ENABLE_QP0) { MLX4_GET(qkey, outbox, QUERY_FUNC_CAP_PRIV_VF_QKEY_OFFSET); - func_cap->qp0_qkey = qkey; + func_cap->spec_qps.qp0_qkey = qkey; } else { - func_cap->qp0_qkey = 0; + func_cap->spec_qps.qp0_qkey = 0; } MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); - func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp0_tunnel = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); - func_cap->qp0_proxy_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp0_proxy = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); - func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp1_tunnel = size & 0xFFFFFF; MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); - func_cap->qp1_proxy_qpn = size & 0xFFFFFF; + func_cap->spec_qps.qp1_proxy = size & 0xFFFFFF; if (func_cap->flags1 & QUERY_FUNC_CAP_FLAGS1_NIC_INFO) MLX4_GET(func_cap->phys_port_id, outbox, @@ -778,6 +780,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_USER_MAC_EN_OFFSET 0x5C #define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 @@ -949,6 +952,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_USER_MAC_EN_OFFSET); + if (field & (1 << 2)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_USER_MAC_EN; MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); if (field & 0x1) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; @@ -1534,7 +1540,7 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt) for (i = 0; i < mlx4_icm_size(&iter) >> lg; ++i) { if (virt != -1) { pages[nent * 2] = cpu_to_be64(virt); - virt += 1 << lg; + virt += 1ULL << lg; } pages[nent * 2 + 1] = @@ -2450,14 +2456,14 @@ int mlx4_config_dev_retrieval(struct mlx4_dev *dev, csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET) & CONFIG_DEV_RX_CSUM_MODE_MASK; - if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) + if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags)) return -EINVAL; params->rx_csum_flags_port_1 = config_dev_csum_flags[csum_mask]; csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET) & CONFIG_DEV_RX_CSUM_MODE_MASK; - if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0])) + if (csum_mask >= ARRAY_SIZE(config_dev_csum_flags)) return -EINVAL; params->rx_csum_flags_port_2 = config_dev_csum_flags[csum_mask]; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index b52ba01aa486..cd6399c76bfd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -144,11 +144,7 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; - u32 qp0_qkey; - u32 qp0_tunnel_qpn; - u32 qp0_proxy_qpn; - u32 qp1_tunnel_qpn; - u32 qp1_proxy_qpn; + struct mlx4_spec_qps spec_qps; u32 reserved_lkey; u8 physical_port; u8 flags0; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c index 8f2fde0487c4..3a09d7122d3b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.c @@ -65,7 +65,7 @@ struct mlx4_set_port_scheduler_context { /* Granular Qos (per VF) section */ struct mlx4_alloc_vpp_param { - __be32 availible_vpp; + __be32 available_vpp; __be32 vpp_p_up[MLX4_NUM_UP]; }; @@ -157,7 +157,7 @@ int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, EXPORT_SYMBOL(mlx4_SET_PORT_SCHEDULER); int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, - u16 *availible_vpp, u8 *vpp_p_up) + u16 *available_vpp, u8 *vpp_p_up) { int i; int err; @@ -179,7 +179,7 @@ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, goto out; /* Total number of supported VPPs */ - *availible_vpp = (u16)be32_to_cpu(out_param->availible_vpp); + *available_vpp = (u16)be32_to_cpu(out_param->available_vpp); for (i = 0; i < MLX4_NUM_UP; i++) vpp_p_up[i] = (u8)be32_to_cpu(out_param->vpp_p_up[i]); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h index ac1f331878e6..582997577a04 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw_qos.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw_qos.h @@ -84,23 +84,23 @@ int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc); int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw, u8 *pg, u16 *ratelimit); /** - * mlx4_ALLOCATE_VPP_get - Query port VPP availible resources and allocation. - * Before distribution of VPPs to priorities, only availible_vpp is returned. + * mlx4_ALLOCATE_VPP_get - Query port VPP available resources and allocation. + * Before distribution of VPPs to priorities, only available_vpp is returned. * After initialization it returns the distribution of VPPs among priorities. * * @dev: mlx4_dev. * @port: Physical port number. - * @availible_vpp: Pointer to variable where number of availible VPPs is stored + * @available_vpp: Pointer to variable where number of available VPPs is stored * @vpp_p_up: Distribution of VPPs to priorities is stored in this array * * Returns 0 on success or a negative mlx4_core errno code. **/ int mlx4_ALLOCATE_VPP_get(struct mlx4_dev *dev, u8 port, - u16 *availible_vpp, u8 *vpp_p_up); + u16 *available_vpp, u8 *vpp_p_up); /** * mlx4_ALLOCATE_VPP_set - Distribution of VPPs among differnt priorities. * The total number of VPPs assigned to all for a port must not exceed - * the value reported by availible_vpp in mlx4_ALLOCATE_VPP_get. + * the value reported by available_vpp in mlx4_ALLOCATE_VPP_get. * VPP allocation is allowed only after the port type has been set, * and while no QPs are open for this port. * diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 5a7816e7c7b4..a822f7a56bc5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -400,7 +400,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table, obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size; num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk; - table->icm = kcalloc(num_icm, sizeof *table->icm, GFP_KERNEL); + table->icm = kcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL); if (!table->icm) return -ENOMEM; table->virt = virt; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.h b/drivers/net/ethernet/mellanox/mlx4/icm.h index dee67fa39107..c9169a490557 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.h +++ b/drivers/net/ethernet/mellanox/mlx4/icm.h @@ -39,8 +39,8 @@ #include <linux/mutex.h> #define MLX4_ICM_CHUNK_LEN \ - ((256 - sizeof (struct list_head) - 2 * sizeof (int)) / \ - (sizeof (struct scatterlist))) + ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ + (sizeof(struct scatterlist))) enum { MLX4_ICM_PAGE_SHIFT = 12, diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index e00f627331cb..2edcce98ab2d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -53,7 +53,7 @@ static void mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv *priv) { struct mlx4_device_context *dev_ctx; - dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL); + dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); if (!dev_ctx) return; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a594bfd9e095..e61c99ef741d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -34,6 +34,7 @@ */ #include <linux/module.h> +#include <linux/kernel.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/pci.h> @@ -121,7 +122,7 @@ static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION "\n"; -static struct mlx4_profile default_profile = { +static const struct mlx4_profile default_profile = { .num_qp = 1 << 18, .num_srq = 1 << 16, .rdmarc_per_qp = 1 << 4, @@ -131,7 +132,7 @@ static struct mlx4_profile default_profile = { .num_mtt = 1 << 20, /* It is really num mtt segements */ }; -static struct mlx4_profile low_mem_profile = { +static const struct mlx4_profile low_mem_profile = { .num_qp = 1 << 17, .num_srq = 1 << 6, .rdmarc_per_qp = 1 << 4, @@ -819,38 +820,93 @@ static void slave_adjust_steering_mode(struct mlx4_dev *dev, mlx4_steering_mode_str(dev->caps.steering_mode)); } +static void mlx4_slave_destroy_special_qp_cap(struct mlx4_dev *dev) +{ + kfree(dev->caps.spec_qps); + dev->caps.spec_qps = NULL; +} + +static int mlx4_slave_special_qp_cap(struct mlx4_dev *dev) +{ + struct mlx4_func_cap *func_cap = NULL; + struct mlx4_caps *caps = &dev->caps; + int i, err = 0; + + func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL); + caps->spec_qps = kcalloc(caps->num_ports, sizeof(*caps->spec_qps), GFP_KERNEL); + + if (!func_cap || !caps->spec_qps) { + mlx4_err(dev, "Failed to allocate memory for special qps cap\n"); + err = -ENOMEM; + goto err_mem; + } + + for (i = 1; i <= caps->num_ports; ++i) { + err = mlx4_QUERY_FUNC_CAP(dev, i, func_cap); + if (err) { + mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", + i, err); + goto err_mem; + } + caps->spec_qps[i - 1] = func_cap->spec_qps; + caps->port_mask[i] = caps->port_type[i]; + caps->phys_port_id[i] = func_cap->phys_port_id; + err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, + &caps->gid_table_len[i], + &caps->pkey_table_len[i]); + if (err) { + mlx4_err(dev, "QUERY_PORT command failed for port %d, aborting (%d)\n", + i, err); + goto err_mem; + } + } + +err_mem: + if (err) + mlx4_slave_destroy_special_qp_cap(dev); + kfree(func_cap); + return err; +} + static int mlx4_slave_cap(struct mlx4_dev *dev) { int err; u32 page_size; - struct mlx4_dev_cap dev_cap; - struct mlx4_func_cap func_cap; - struct mlx4_init_hca_param hca_param; - u8 i; + struct mlx4_dev_cap *dev_cap = NULL; + struct mlx4_func_cap *func_cap = NULL; + struct mlx4_init_hca_param *hca_param = NULL; + + hca_param = kzalloc(sizeof(*hca_param), GFP_KERNEL); + func_cap = kzalloc(sizeof(*func_cap), GFP_KERNEL); + dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL); + if (!hca_param || !func_cap || !dev_cap) { + mlx4_err(dev, "Failed to allocate memory for slave_cap\n"); + err = -ENOMEM; + goto free_mem; + } - memset(&hca_param, 0, sizeof(hca_param)); - err = mlx4_QUERY_HCA(dev, &hca_param); + err = mlx4_QUERY_HCA(dev, hca_param); if (err) { mlx4_err(dev, "QUERY_HCA command failed, aborting\n"); - return err; + goto free_mem; } /* fail if the hca has an unknown global capability * at this time global_caps should be always zeroed */ - if (hca_param.global_caps) { + if (hca_param->global_caps) { mlx4_err(dev, "Unknown hca global capabilities\n"); - return -EINVAL; + err = -EINVAL; + goto free_mem; } - dev->caps.hca_core_clock = hca_param.hca_core_clock; + dev->caps.hca_core_clock = hca_param->hca_core_clock; - memset(&dev_cap, 0, sizeof(dev_cap)); - dev->caps.max_qp_dest_rdma = 1 << hca_param.log_rd_per_qp; - err = mlx4_dev_cap(dev, &dev_cap); + dev->caps.max_qp_dest_rdma = 1 << hca_param->log_rd_per_qp; + err = mlx4_dev_cap(dev, dev_cap); if (err) { mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n"); - return err; + goto free_mem; } err = mlx4_QUERY_FW(dev); @@ -862,21 +918,23 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (page_size > PAGE_SIZE) { mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n", page_size, PAGE_SIZE); - return -ENODEV; + err = -ENODEV; + goto free_mem; } /* Set uar_page_shift for VF */ - dev->uar_page_shift = hca_param.uar_page_sz + 12; + dev->uar_page_shift = hca_param->uar_page_sz + 12; /* Make sure the master uar page size is valid */ if (dev->uar_page_shift > PAGE_SHIFT) { mlx4_err(dev, "Invalid configuration: uar page size is larger than system page size\n"); - return -ENODEV; + err = -ENODEV; + goto free_mem; } /* Set reserved_uars based on the uar_page_shift */ - mlx4_set_num_reserved_uars(dev, &dev_cap); + mlx4_set_num_reserved_uars(dev, dev_cap); /* Although uar page size in FW differs from system page size, * upper software layers (mlx4_ib, mlx4_en and part of mlx4_core) @@ -884,34 +942,35 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) */ dev->caps.uar_page_size = PAGE_SIZE; - memset(&func_cap, 0, sizeof(func_cap)); - err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); + err = mlx4_QUERY_FUNC_CAP(dev, 0, func_cap); if (err) { mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d)\n", err); - return err; + goto free_mem; } - if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != + if ((func_cap->pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) != PF_CONTEXT_BEHAVIOUR_MASK) { mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n", - func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK); - return -EINVAL; - } - - dev->caps.num_ports = func_cap.num_ports; - dev->quotas.qp = func_cap.qp_quota; - dev->quotas.srq = func_cap.srq_quota; - dev->quotas.cq = func_cap.cq_quota; - dev->quotas.mpt = func_cap.mpt_quota; - dev->quotas.mtt = func_cap.mtt_quota; - dev->caps.num_qps = 1 << hca_param.log_num_qps; - dev->caps.num_srqs = 1 << hca_param.log_num_srqs; - dev->caps.num_cqs = 1 << hca_param.log_num_cqs; - dev->caps.num_mpts = 1 << hca_param.log_mpt_sz; - dev->caps.num_eqs = func_cap.max_eq; - dev->caps.reserved_eqs = func_cap.reserved_eq; - dev->caps.reserved_lkey = func_cap.reserved_lkey; + func_cap->pf_context_behaviour, + PF_CONTEXT_BEHAVIOUR_MASK); + err = -EINVAL; + goto free_mem; + } + + dev->caps.num_ports = func_cap->num_ports; + dev->quotas.qp = func_cap->qp_quota; + dev->quotas.srq = func_cap->srq_quota; + dev->quotas.cq = func_cap->cq_quota; + dev->quotas.mpt = func_cap->mpt_quota; + dev->quotas.mtt = func_cap->mtt_quota; + dev->caps.num_qps = 1 << hca_param->log_num_qps; + dev->caps.num_srqs = 1 << hca_param->log_num_srqs; + dev->caps.num_cqs = 1 << hca_param->log_num_cqs; + dev->caps.num_mpts = 1 << hca_param->log_mpt_sz; + dev->caps.num_eqs = func_cap->max_eq; + dev->caps.reserved_eqs = func_cap->reserved_eq; + dev->caps.reserved_lkey = func_cap->reserved_lkey; dev->caps.num_pds = MLX4_NUM_PDS; dev->caps.num_mgms = 0; dev->caps.num_amgms = 0; @@ -919,43 +978,16 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (dev->caps.num_ports > MLX4_MAX_PORTS) { mlx4_err(dev, "HCA has %d ports, but we only support %d, aborting\n", dev->caps.num_ports, MLX4_MAX_PORTS); - return -ENODEV; + err = -ENODEV; + goto free_mem; } mlx4_replace_zero_macs(dev); - dev->caps.qp0_qkey = kcalloc(dev->caps.num_ports, sizeof(u32), GFP_KERNEL); - dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - - if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy || - !dev->caps.qp0_qkey) { - err = -ENOMEM; - goto err_mem; - } - - for (i = 1; i <= dev->caps.num_ports; ++i) { - err = mlx4_QUERY_FUNC_CAP(dev, i, &func_cap); - if (err) { - mlx4_err(dev, "QUERY_FUNC_CAP port command failed for port %d, aborting (%d)\n", - i, err); - goto err_mem; - } - dev->caps.qp0_qkey[i - 1] = func_cap.qp0_qkey; - dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; - dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; - dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; - dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; - dev->caps.port_mask[i] = dev->caps.port_type[i]; - dev->caps.phys_port_id[i] = func_cap.phys_port_id; - err = mlx4_get_slave_pkey_gid_tbl_len(dev, i, - &dev->caps.gid_table_len[i], - &dev->caps.pkey_table_len[i]); - if (err) - goto err_mem; + err = mlx4_slave_special_qp_cap(dev); + if (err) { + mlx4_err(dev, "Set special QP caps failed. aborting\n"); + goto free_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - @@ -970,7 +1002,7 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) goto err_mem; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_EQE_ENABLED) { dev->caps.eqe_size = 64; dev->caps.eqe_factor = 1; } else { @@ -978,20 +1010,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.eqe_factor = 0; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) { dev->caps.cqe_size = 64; dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } else { dev->caps.cqe_size = 32; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) { - dev->caps.eqe_size = hca_param.eqe_size; + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) { + dev->caps.eqe_size = hca_param->eqe_size; dev->caps.eqe_factor = 0; } - if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) { - dev->caps.cqe_size = hca_param.cqe_size; + if (hca_param->dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) { + dev->caps.cqe_size = hca_param->cqe_size; /* User still need to know when CQE > 32B */ dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE; } @@ -999,31 +1031,27 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS; mlx4_warn(dev, "Timestamping is not supported in slave mode\n"); - slave_adjust_steering_mode(dev, &dev_cap, &hca_param); + dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_USER_MAC_EN; + mlx4_dbg(dev, "User MAC FW update is not supported in slave mode\n"); + + slave_adjust_steering_mode(dev, dev_cap, hca_param); mlx4_dbg(dev, "RSS support for IP fragments is %s\n", - hca_param.rss_ip_frags ? "on" : "off"); + hca_param->rss_ip_frags ? "on" : "off"); - if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && + if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP && dev->caps.bf_reg_size) dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP; - if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP) + if (func_cap->extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP) dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP; - return 0; - err_mem: - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - dev->caps.qp0_qkey = NULL; - dev->caps.qp0_tunnel = NULL; - dev->caps.qp0_proxy = NULL; - dev->caps.qp1_tunnel = NULL; - dev->caps.qp1_proxy = NULL; - + if (err) + mlx4_slave_destroy_special_qp_cap(dev); +free_mem: + kfree(hca_param); + kfree(func_cap); + kfree(dev_cap); return err; } @@ -2399,7 +2427,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev) dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2; } priv->eq_table.inta_pin = adapter.inta_pin; - memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id); + memcpy(dev->board_id, adapter.board_id, sizeof(dev->board_id)); return 0; @@ -2407,13 +2435,8 @@ unmap_bf: unmap_internal_clock(dev); unmap_bf_area(dev); - if (mlx4_is_slave(dev)) { - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - } + if (mlx4_is_slave(dev)) + mlx4_slave_destroy_special_qp_cap(dev); err_close: if (mlx4_is_slave(dev)) @@ -2870,7 +2893,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) dev->caps.num_eqs - dev->caps.reserved_eqs, MAX_MSIX); - entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL); + entries = kcalloc(nreq, sizeof(*entries), GFP_KERNEL); if (!entries) goto no_msi; @@ -3597,13 +3620,8 @@ err_master_mfunc: mlx4_multi_func_cleanup(dev); } - if (mlx4_is_slave(dev)) { - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - } + if (mlx4_is_slave(dev)) + mlx4_slave_destroy_special_qp_cap(dev); err_close: mlx4_close_hca(dev); @@ -3659,7 +3677,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, * per port, we must limit the number of VFs to 63 (since their are * 128 MACs) */ - for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc; + for (i = 0; i < ARRAY_SIZE(nvfs) && i < num_vfs_argc; total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) { nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i]; if (nvfs[i] < 0) { @@ -3668,7 +3686,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, goto err_disable_pdev; } } - for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc; + for (i = 0; i < ARRAY_SIZE(prb_vf) && i < probe_vfs_argc; i++) { prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i]; if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) { @@ -3747,11 +3765,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, if (total_vfs) { unsigned vfs_offset = 0; - for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && + for (i = 0; i < ARRAY_SIZE(nvfs) && vfs_offset + nvfs[i] < extended_func_num(pdev); vfs_offset += nvfs[i], i++) ; - if (i == sizeof(nvfs)/sizeof(nvfs[0])) { + if (i == ARRAY_SIZE(nvfs)) { err = -ENODEV; goto err_release_regions; } @@ -3783,7 +3801,6 @@ err_release_regions: err_disable_pdev: mlx4_pci_disable_device(&priv->dev); - pci_set_drvdata(pdev, NULL); return err; } @@ -3944,11 +3961,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); - kfree(dev->caps.qp0_qkey); - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); + mlx4_slave_destroy_special_qp_cap(dev); kfree(dev->dev_vfs); mlx4_clean_dev(dev); @@ -3998,7 +4011,6 @@ static void mlx4_remove_one(struct pci_dev *pdev) devlink_unregister(devlink); kfree(dev->persist); devlink_free(devlink); - pci_set_drvdata(pdev, NULL); } static int restore_current_port_types(struct mlx4_dev *dev, diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 0710b3677464..4c5306dbcf11 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -162,7 +162,7 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port, return -EINVAL; s_steer = &mlx4_priv(dev)->steer[port - 1]; - new_entry = kzalloc(sizeof *new_entry, GFP_KERNEL); + new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); if (!new_entry) return -ENOMEM; @@ -175,7 +175,7 @@ static int new_steering_entry(struct mlx4_dev *dev, u8 port, */ pqp = get_promisc_qp(dev, port, steer, qpn); if (pqp) { - dqp = kmalloc(sizeof *dqp, GFP_KERNEL); + dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); if (!dqp) { err = -ENOMEM; goto out_alloc; @@ -274,7 +274,7 @@ static int existing_steering_entry(struct mlx4_dev *dev, u8 port, } /* add the qp as a duplicate on this index */ - dqp = kmalloc(sizeof *dqp, GFP_KERNEL); + dqp = kmalloc(sizeof(*dqp), GFP_KERNEL); if (!dqp) return -ENOMEM; dqp->qpn = qpn; @@ -443,7 +443,7 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port, goto out_mutex; } - pqp = kmalloc(sizeof *pqp, GFP_KERNEL); + pqp = kmalloc(sizeof(*pqp), GFP_KERNEL); if (!pqp) { err = -ENOMEM; goto out_mutex; @@ -514,7 +514,7 @@ static int add_promisc_qp(struct mlx4_dev *dev, u8 port, /* add the new qpn to list of promisc qps */ list_add_tail(&pqp->list, &s_steer->promisc_qps[steer]); /* now need to add all the promisc qps to default entry */ - memset(mgm, 0, sizeof *mgm); + memset(mgm, 0, sizeof(*mgm)); members_count = 0; list_for_each_entry(dqp, &s_steer->promisc_qps[steer], list) { if (members_count == dev->caps.num_qp_per_mgm) { @@ -1144,7 +1144,7 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], index += dev->caps.num_mgms; new_entry = 1; - memset(mgm, 0, sizeof *mgm); + memset(mgm, 0, sizeof(*mgm)); memcpy(mgm->gid, gid, 16); } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 852d00a5b016..c68da1986e51 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -807,6 +807,8 @@ struct mlx4_set_port_general_context { u8 phv_en; u8 reserved6[5]; __be16 user_mtu; + u16 reserved7; + u8 user_mac[6]; }; struct mlx4_set_port_rqp_calc_context { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d350b2158104..fdb3ad0cbe54 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -685,7 +685,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq, int cq_idx); void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); -int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); +void mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq); void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 24282cd017d3..c7c0764991c9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -106,9 +106,9 @@ static int mlx4_buddy_init(struct mlx4_buddy *buddy, int max_order) buddy->max_order = max_order; spin_lock_init(&buddy->lock); - buddy->bits = kcalloc(buddy->max_order + 1, sizeof (long *), + buddy->bits = kcalloc(buddy->max_order + 1, sizeof(long *), GFP_KERNEL); - buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, + buddy->num_free = kcalloc(buddy->max_order + 1, sizeof(*buddy->num_free), GFP_KERNEL); if (!buddy->bits || !buddy->num_free) goto err_out; @@ -703,13 +703,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, return -ENOMEM; dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle, - npages * sizeof (u64), DMA_TO_DEVICE); + npages * sizeof(u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle, - npages * sizeof (u64), DMA_TO_DEVICE); + npages * sizeof(u64), DMA_TO_DEVICE); return 0; } @@ -1052,7 +1052,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages, return -EINVAL; /* All MTTs must fit in the same page */ - if (max_pages * sizeof *fmr->mtts > PAGE_SIZE) + if (max_pages * sizeof(*fmr->mtts) > PAGE_SIZE) return -EINVAL; fmr->page_shift = page_shift; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 4e36e287d605..3ef3406ff4cb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -52,6 +52,7 @@ #define MLX4_FLAG2_V_IGNORE_FCS_MASK BIT(1) #define MLX4_FLAG2_V_USER_MTU_MASK BIT(5) +#define MLX4_FLAG2_V_USER_MAC_MASK BIT(6) #define MLX4_FLAG_V_MTU_MASK BIT(0) #define MLX4_FLAG_V_PPRX_MASK BIT(1) #define MLX4_FLAG_V_PPTX_MASK BIT(2) @@ -1700,6 +1701,30 @@ int mlx4_SET_PORT_user_mtu(struct mlx4_dev *dev, u8 port, u16 user_mtu) } EXPORT_SYMBOL(mlx4_SET_PORT_user_mtu); +int mlx4_SET_PORT_user_mac(struct mlx4_dev *dev, u8 port, u8 *user_mac) +{ + struct mlx4_cmd_mailbox *mailbox; + struct mlx4_set_port_general_context *context; + u32 in_mod; + int err; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + context = mailbox->buf; + context->flags2 |= MLX4_FLAG2_V_USER_MAC_MASK; + memcpy(context->user_mac, user_mac, sizeof(context->user_mac)); + + in_mod = MLX4_SET_PORT_GENERAL << 8 | port; + err = mlx4_cmd(dev, mailbox->dma, in_mod, MLX4_SET_PORT_ETH_OPCODE, + MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); + + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL(mlx4_SET_PORT_user_mac); + int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, u8 ignore_fcs_value) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 5e5b4475b85e..728a2fb1f5c0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -174,7 +174,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, cpu_to_be16(mlx4_qp_roce_entropy(dev, qp->qpn)); *(__be32 *) mailbox->buf = cpu_to_be32(optpar); - memcpy(mailbox->buf + 8, context, sizeof *context); + memcpy(mailbox->buf + 8, context, sizeof(*context)); ((struct mlx4_qp_context *) (mailbox->buf + 8))->local_qpn = cpu_to_be32(qp->qpn); @@ -845,24 +845,21 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) /* In mfunc, calculate proxy and tunnel qp offsets for the PF here, * since the PF does not call mlx4_slave_caps */ - dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); - - if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || - !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + dev->caps.spec_qps = kcalloc(dev->caps.num_ports, + sizeof(*dev->caps.spec_qps), + GFP_KERNEL); + if (!dev->caps.spec_qps) { err = -ENOMEM; goto err_mem; } for (k = 0; k < dev->caps.num_ports; k++) { - dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn + + dev->caps.spec_qps[k].qp0_proxy = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev) + k; - dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX; - dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn + + dev->caps.spec_qps[k].qp0_tunnel = dev->caps.spec_qps[k].qp0_proxy + 8 * MLX4_MFUNC_MAX; + dev->caps.spec_qps[k].qp1_proxy = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k; - dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX; + dev->caps.spec_qps[k].qp1_tunnel = dev->caps.spec_qps[k].qp1_proxy + 8 * MLX4_MFUNC_MAX; } } @@ -874,12 +871,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) return err; err_mem: - kfree(dev->caps.qp0_tunnel); - kfree(dev->caps.qp0_proxy); - kfree(dev->caps.qp1_tunnel); - kfree(dev->caps.qp1_proxy); - dev->caps.qp0_tunnel = dev->caps.qp0_proxy = - dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + kfree(dev->caps.spec_qps); + dev->caps.spec_qps = NULL; mlx4_cleanup_qp_zones(dev); return err; } @@ -908,7 +901,7 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp, MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (!err) - memcpy(context, mailbox->buf + 8, sizeof *context); + memcpy(context, mailbox->buf + 8, sizeof(*context)); mlx4_free_cmd_mailbox(dev, mailbox); return err; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 215e21c3dc8a..fabb53379727 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1040,7 +1040,7 @@ static struct res_common *alloc_qp_tr(int id) { struct res_qp *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1058,7 +1058,7 @@ static struct res_common *alloc_mtt_tr(int id, int order) { struct res_mtt *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1074,7 +1074,7 @@ static struct res_common *alloc_mpt_tr(int id, int key) { struct res_mpt *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1089,7 +1089,7 @@ static struct res_common *alloc_eq_tr(int id) { struct res_eq *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1103,7 +1103,7 @@ static struct res_common *alloc_cq_tr(int id) { struct res_cq *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1118,7 +1118,7 @@ static struct res_common *alloc_srq_tr(int id) { struct res_srq *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1133,7 +1133,7 @@ static struct res_common *alloc_counter_tr(int id, int port) { struct res_counter *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1148,7 +1148,7 @@ static struct res_common *alloc_xrcdn_tr(int id) { struct res_xrcdn *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1162,7 +1162,7 @@ static struct res_common *alloc_fs_rule_tr(u64 id, int qpn) { struct res_fs_rule *ret; - ret = kzalloc(sizeof *ret, GFP_KERNEL); + ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -1274,7 +1274,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count, struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker; struct rb_root *root = &tracker->res_tree[type]; - res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL); + res_arr = kcalloc(count, sizeof(*res_arr), GFP_KERNEL); if (!res_arr) return -ENOMEM; @@ -2027,7 +2027,7 @@ static int mac_add_to_slave(struct mlx4_dev *dev, int slave, u64 mac, int port, if (mlx4_grant_resource(dev, slave, RES_MAC, 1, port)) return -EINVAL; - res = kzalloc(sizeof *res, GFP_KERNEL); + res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) { mlx4_release_resource(dev, slave, RES_MAC, 1, port); return -ENOMEM; @@ -4020,7 +4020,7 @@ static int add_mcg_res(struct mlx4_dev *dev, int slave, struct res_qp *rqp, struct res_gid *res; int err; - res = kzalloc(sizeof *res, GFP_KERNEL); + res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 5aee05992f27..fdaef00465d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -34,6 +34,27 @@ config MLX5_CORE_EN ---help--- Ethernet support in Mellanox Technologies ConnectX-4 NIC. +config MLX5_MPFS + bool "Mellanox Technologies MLX5 MPFS support" + depends on MLX5_CORE_EN + default y + ---help--- + Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS) + support in ConnectX NIC. MPFs is required for when multi-PF configuration + is enabled to allow passing user configured unicast MAC addresses to the + requesting PF. + +config MLX5_ESWITCH + bool "Mellanox Technologies MLX5 SRIOV E-Switch support" + depends on MLX5_CORE_EN + default y + ---help--- + Mellanox Technologies Ethernet SRIOV E-Switch support in ConnectX NIC. + E-Switch provides internal SRIOV packet steering and switching for the + enabled VFs and PF in two available modes: + Legacy SRIOV mode (L2 mac vlan steering based). + Switchdev mode (eswitch offloads). + config MLX5_CORE_EN_DCB bool "Data Center Bridging (DCB) Support" default y diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9d17e4e76d3a..87a3099808f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -4,17 +4,21 @@ subdir-ccflags-y += -I$(src) mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o + fs_counters.o rl.o lag.o dev.o wq.o lib/gid.o \ + diag/fs_tracepoint.o mlx5_core-$(CONFIG_MLX5_ACCEL) += accel/ipsec.o mlx5_core-$(CONFIG_MLX5_FPGA) += fpga/cmd.o fpga/core.o fpga/conn.o fpga/sdk.o \ fpga/ipsec.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += eswitch.o eswitch_offloads.o \ - en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ - en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ - en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o +mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ + en_tx.o en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_arfs.o en_fs_ethtool.o en_selftest.o + +mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o + +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o en_rep.o en_tc.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o @@ -22,3 +26,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \ en_accel/ipsec_stats.o + +CFLAGS_tracepoint.o := -I$(src) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 3c95f7f53802..47239bf7bf43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -258,6 +258,7 @@ EXPORT_SYMBOL_GPL(mlx5_db_alloc); void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db) { u32 db_per_page = PAGE_SIZE / cache_line_size(); + mutex_lock(&dev->priv.pgdir_mutex); __set_bit(db->index, db->u.pgdir->bitmap); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 1acbb721f38d..1fffdebbc9e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -802,7 +802,6 @@ static void cmd_work_handler(struct work_struct *work) bool poll_cmd = ent->polling; int alloc_ret; - sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index a62f4b6a21a5..ff60cf7342ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -45,11 +45,70 @@ struct mlx5_device_context { unsigned long state; }; +struct mlx5_delayed_event { + struct list_head list; + struct mlx5_core_dev *dev; + enum mlx5_dev_event event; + unsigned long param; +}; + enum { MLX5_INTERFACE_ADDED, MLX5_INTERFACE_ATTACHED, }; +static void add_delayed_event(struct mlx5_priv *priv, + struct mlx5_core_dev *dev, + enum mlx5_dev_event event, + unsigned long param) +{ + struct mlx5_delayed_event *delayed_event; + + delayed_event = kzalloc(sizeof(*delayed_event), GFP_ATOMIC); + if (!delayed_event) { + mlx5_core_err(dev, "event %d is missed\n", event); + return; + } + + mlx5_core_dbg(dev, "Accumulating event %d\n", event); + delayed_event->dev = dev; + delayed_event->event = event; + delayed_event->param = param; + list_add_tail(&delayed_event->list, &priv->waiting_events_list); +} + +static void fire_delayed_event_locked(struct mlx5_device_context *dev_ctx, + struct mlx5_core_dev *dev, + struct mlx5_priv *priv) +{ + struct mlx5_delayed_event *de; + struct mlx5_delayed_event *n; + + /* stop delaying events */ + priv->is_accum_events = false; + + /* fire all accumulated events before new event comes */ + list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) { + dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param); + list_del(&de->list); + kfree(de); + } +} + +static void cleanup_delayed_evets(struct mlx5_priv *priv) +{ + struct mlx5_delayed_event *de; + struct mlx5_delayed_event *n; + + spin_lock_irq(&priv->ctx_lock); + priv->is_accum_events = false; + list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) { + list_del(&de->list); + kfree(de); + } + spin_unlock_irq(&priv->ctx_lock); +} + void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) { struct mlx5_device_context *dev_ctx; @@ -63,6 +122,12 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) return; dev_ctx->intf = intf; + /* accumulating events that can come after mlx5_ib calls to + * ib_register_device, till adding that interface to the events list. + */ + + priv->is_accum_events = true; + dev_ctx->context = intf->add(dev); set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); if (intf->attach) @@ -71,6 +136,9 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) if (dev_ctx->context) { spin_lock_irq(&priv->ctx_lock); list_add_tail(&dev_ctx->list, &priv->ctx_list); + + fire_delayed_event_locked(dev_ctx, dev, priv); + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING if (dev_ctx->intf->pfault) { if (priv->pfault) { @@ -84,6 +152,8 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) spin_unlock_irq(&priv->ctx_lock); } else { kfree(dev_ctx); + /* delete all accumulated events */ + cleanup_delayed_evets(priv); } } @@ -341,6 +411,9 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, spin_lock_irqsave(&priv->ctx_lock, flags); + if (priv->is_accum_events) + add_delayed_event(priv, dev, event, param); + list_for_each_entry(dev_ctx, &priv->ctx_list, list) if (dev_ctx->intf->event) dev_ctx->intf->event(dev, dev_ctx->context, event, param); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile new file mode 100644 index 000000000000..d8e17110f25d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/Makefile @@ -0,0 +1 @@ +subdir-ccflags-y += -I$(src)/.. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c new file mode 100644 index 000000000000..0be4575b58a2 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define CREATE_TRACE_POINTS + +#include "fs_tracepoint.h" +#include <linux/stringify.h> + +#define DECLARE_MASK_VAL(type, name) struct {type m; type v; } name +#define MASK_VAL(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET(spec, mask, fld),\ + .v = MLX5_GET(spec, val, fld)} +#define MASK_VAL_BE(type, spec, name, mask, val, fld) \ + DECLARE_MASK_VAL(type, name) = \ + {.m = MLX5_GET_BE(type, spec, mask, fld),\ + .v = MLX5_GET_BE(type, spec, val, fld)} +#define GET_MASKED_VAL(name) (name.m & name.v) + +#define GET_MASK_VAL(name, type, mask, val, fld) \ + (name.m = MLX5_GET(type, mask, fld), \ + name.v = MLX5_GET(type, val, fld), \ + name.m & name.v) +#define PRINT_MASKED_VAL(name, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", name.v); \ + } +#define PRINT_MASKED_VALP(name, cast, p, format) { \ + if (name.m) \ + trace_seq_printf(p, __stringify(name) "=" format " ", \ + (cast)&name.v);\ + } + +static void print_lyr_2_4_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_L2(type, name, fld) \ + MASK_VAL(type, fte_match_set_lyr_2_4, name, mask, value, fld) + DECLARE_MASK_VAL(u64, smac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, smac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, smac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, smac_15_0)}; + DECLARE_MASK_VAL(u64, dmac) = { + .m = MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, mask, dmac_15_0), + .v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 | + MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)}; + MASK_VAL_L2(u16, ethertype, ethertype); + + PRINT_MASKED_VALP(smac, u8 *, p, "%pM"); + PRINT_MASKED_VALP(dmac, u8 *, p, "%pM"); + PRINT_MASKED_VAL(ethertype, p, "%04x"); + + if (ethertype.m == 0xffff) { + if (ethertype.v == ETH_P_IP) { +#define MASK_VAL_L2_BE(type, name, fld) \ + MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld) + MASK_VAL_L2_BE(u32, src_ipv4, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MASK_VAL_L2_BE(u32, dst_ipv4, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + + PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p, + "%pI4"); + PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p, + "%pI4"); + } else if (ethertype.v == ETH_P_IPV6) { + static const struct in6_addr full_ones = { + .in6_u.u6_addr32 = {htonl(0xffffffff), + htonl(0xffffffff), + htonl(0xffffffff), + htonl(0xffffffff)}, + }; + DECLARE_MASK_VAL(struct in6_addr, src_ipv6); + DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); + + memcpy(src_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.m)); + memcpy(dst_ipv6.m.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.m)); + memcpy(src_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + sizeof(src_ipv6.v)); + memcpy(dst_ipv6.v.in6_u.u6_addr8, + MLX5_ADDR_OF(fte_match_set_lyr_2_4, value, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + sizeof(dst_ipv6.v)); + + if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "src_ipv6=%pI6 ", + &src_ipv6.v); + if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones))) + trace_seq_printf(p, "dst_ipv6=%pI6 ", + &dst_ipv6.v); + } + } + +#define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\ + MASK_VAL_L2(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + + PRINT_MASKED_VAL_L2(u8, ip_protocol, ip_protocol, p, "%02x"); + PRINT_MASKED_VAL_L2(u16, tcp_flags, tcp_flags, p, "%x"); + PRINT_MASKED_VAL_L2(u16, tcp_sport, tcp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, tcp_dport, tcp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_sport, udp_sport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, udp_dport, udp_dport, p, "%u"); + PRINT_MASKED_VAL_L2(u16, first_vid, first_vid, p, "%04x"); + PRINT_MASKED_VAL_L2(u8, first_prio, first_prio, p, "%x"); + PRINT_MASKED_VAL_L2(u8, first_cfi, first_cfi, p, "%d"); + PRINT_MASKED_VAL_L2(u8, ip_dscp, ip_dscp, p, "%02x"); + PRINT_MASKED_VAL_L2(u8, ip_ecn, ip_ecn, p, "%x"); + PRINT_MASKED_VAL_L2(u8, cvlan_tag, cvlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, svlan_tag, svlan_tag, p, "%d"); + PRINT_MASKED_VAL_L2(u8, frag, frag, p, "%d"); +} + +static void print_misc_parameters_hdrs(struct trace_seq *p, + const u32 *mask, const u32 *value) +{ +#define MASK_VAL_MISC(type, name, fld) \ + MASK_VAL(type, fte_match_set_misc, name, mask, value, fld) +#define PRINT_MASKED_VAL_MISC(type, name, fld, p, format) {\ + MASK_VAL_MISC(type, name, fld); \ + PRINT_MASKED_VAL(name, p, format); \ +} + DECLARE_MASK_VAL(u64, gre_key) = { + .m = MLX5_GET(fte_match_set_misc, mask, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, mask, gre_key_l), + .v = MLX5_GET(fte_match_set_misc, value, gre_key_h) << 8 | + MLX5_GET(fte_match_set_misc, value, gre_key_l)}; + + PRINT_MASKED_VAL(gre_key, p, "%llu"); + PRINT_MASKED_VAL_MISC(u32, source_sqn, source_sqn, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, source_port, source_port, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_prio, outer_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_cfi, outer_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, outer_second_vid, outer_second_vid, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_prio, inner_second_prio, + p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cfi, inner_second_cfi, p, "%u"); + PRINT_MASKED_VAL_MISC(u16, inner_second_vid, inner_second_vid, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, outer_second_cvlan_tag, + outer_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_cvlan_tag, + inner_second_cvlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, outer_second_svlan_tag, + outer_second_svlan_tag, p, "%u"); + PRINT_MASKED_VAL_MISC(u8, inner_second_svlan_tag, + inner_second_svlan_tag, p, "%u"); + + PRINT_MASKED_VAL_MISC(u8, gre_protocol, gre_protocol, p, "%u"); + + PRINT_MASKED_VAL_MISC(u32, vxlan_vni, vxlan_vni, p, "%u"); + PRINT_MASKED_VAL_MISC(u32, outer_ipv6_flow_label, outer_ipv6_flow_label, + p, "%x"); + PRINT_MASKED_VAL_MISC(u32, inner_ipv6_flow_label, inner_ipv6_flow_label, + p, "%x"); +} + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner) +{ + const char *ret = trace_seq_buffer_ptr(p); + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { + trace_seq_printf(p, "[outer] "); + print_lyr_2_4_hdrs(p, mask_outer, value_outer); + } + + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { + trace_seq_printf(p, "[misc] "); + print_misc_parameters_hdrs(p, mask_misc, value_misc); + } + if (match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { + trace_seq_printf(p, "[inner] "); + print_lyr_2_4_hdrs(p, mask_inner, value_inner); + } + trace_seq_putc(p, 0); + return ret; +} + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id) +{ + const char *ret = trace_seq_buffer_ptr(p); + + switch (dst->type) { + case MLX5_FLOW_DESTINATION_TYPE_VPORT: + trace_seq_printf(p, "vport=%u\n", dst->vport_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + trace_seq_printf(p, "ft=%p\n", dst->ft); + break; + case MLX5_FLOW_DESTINATION_TYPE_TIR: + trace_seq_printf(p, "tir=%u\n", dst->tir_num); + break; + case MLX5_FLOW_DESTINATION_TYPE_COUNTER: + trace_seq_printf(p, "counter_id=%u\n", counter_id); + break; + } + + trace_seq_putc(p, 0); + return ret; +} + +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fg); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_set_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_fte); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_add_rule); +EXPORT_TRACEPOINT_SYMBOL(mlx5_fs_del_rule); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h new file mode 100644 index 000000000000..1e3a6c3e4132 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#if !defined(_MLX5_FS_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_FS_TP_ + +#include <linux/tracepoint.h> +#include <linux/trace_seq.h> +#include "../fs_core.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#define __parse_fs_hdrs(match_criteria_enable, mouter, mmisc, minner, vouter, \ + vinner, vmisc) \ + parse_fs_hdrs(p, match_criteria_enable, mouter, mmisc, minner, vouter,\ + vinner, vmisc) + +const char *parse_fs_hdrs(struct trace_seq *p, + u8 match_criteria_enable, + const u32 *mask_outer, + const u32 *mask_misc, + const u32 *mask_inner, + const u32 *value_outer, + const u32 *value_misc, + const u32 *value_inner); + +#define __parse_fs_dst(dst, counter_id) \ + parse_fs_dst(p, (const struct mlx5_flow_destination *)dst, counter_id) + +const char *parse_fs_dst(struct trace_seq *p, + const struct mlx5_flow_destination *dst, + u32 counter_id); + +TRACE_EVENT(mlx5_fs_add_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(const struct mlx5_flow_table *, ft) + __field(u32, start_index) + __field(u32, end_index) + __field(u32, id) + __field(u8, mask_enable) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fg = fg; + fs_get_obj(__entry->ft, fg->node.parent); + __entry->start_index = fg->start_index; + __entry->end_index = fg->start_index + fg->max_ftes; + __entry->id = fg->id; + __entry->mask_enable = fg->mask.match_criteria_enable; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + + ), + TP_printk("fg=%p ft=%p id=%u start=%u end=%u bit_mask=%02x %s\n", + __entry->fg, __entry->ft, __entry->id, + __entry->start_index, __entry->end_index, + __entry->mask_enable, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fg, + TP_PROTO(const struct mlx5_flow_group *fg), + TP_ARGS(fg), + TP_STRUCT__entry( + __field(const struct mlx5_flow_group *, fg) + __field(u32, id) + ), + TP_fast_assign( + __entry->fg = fg; + __entry->id = fg->id; + + ), + TP_printk("fg=%p id=%u\n", + __entry->fg, __entry->id) + ); + +#define ACTION_FLAGS \ + {MLX5_FLOW_CONTEXT_ACTION_ALLOW, "ALLOW"},\ + {MLX5_FLOW_CONTEXT_ACTION_DROP, "DROP"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, "FWD"},\ + {MLX5_FLOW_CONTEXT_ACTION_COUNT, "CNT"},\ + {MLX5_FLOW_CONTEXT_ACTION_ENCAP, "ENCAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_DECAP, "DECAP"},\ + {MLX5_FLOW_CONTEXT_ACTION_MOD_HDR, "MOD_HDR"},\ + {MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO, "NEXT_PRIO"} + +TRACE_EVENT(mlx5_fs_set_fte, + TP_PROTO(const struct fs_fte *fte, bool new_fte), + TP_ARGS(fte, new_fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(const struct mlx5_flow_group *, fg) + __field(u32, group_index) + __field(u32, index) + __field(u32, action) + __field(u32, flow_tag) + __field(u8, mask_enable) + __field(bool, new_fte) + __array(u32, mask_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, mask_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + __array(u32, value_outer, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_inner, MLX5_ST_SZ_DW(fte_match_set_lyr_2_4)) + __array(u32, value_misc, MLX5_ST_SZ_DW(fte_match_set_misc)) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->new_fte = new_fte; + fs_get_obj(__entry->fg, fte->node.parent); + __entry->group_index = __entry->fg->id; + __entry->index = fte->index; + __entry->action = fte->action; + __entry->mask_enable = __entry->fg->mask.match_criteria_enable; + __entry->flow_tag = fte->flow_tag; + memcpy(__entry->mask_outer, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + outer_headers), + sizeof(__entry->mask_outer)); + memcpy(__entry->mask_inner, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + inner_headers), + sizeof(__entry->mask_inner)); + memcpy(__entry->mask_misc, + MLX5_ADDR_OF(fte_match_param, + &__entry->fg->mask.match_criteria, + misc_parameters), + sizeof(__entry->mask_misc)); + memcpy(__entry->value_outer, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + outer_headers), + sizeof(__entry->value_outer)); + memcpy(__entry->value_inner, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + inner_headers), + sizeof(__entry->value_inner)); + memcpy(__entry->value_misc, + MLX5_ADDR_OF(fte_match_param, + &fte->val, + misc_parameters), + sizeof(__entry->value_misc)); + + ), + TP_printk("op=%s fte=%p fg=%p index=%u group_index=%u action=<%s> flow_tag=%x %s\n", + __entry->new_fte ? "add" : "set", + __entry->fte, __entry->fg, __entry->index, + __entry->group_index, __print_flags(__entry->action, "|", + ACTION_FLAGS), + __entry->flow_tag, + __parse_fs_hdrs(__entry->mask_enable, + __entry->mask_outer, + __entry->mask_misc, + __entry->mask_inner, + __entry->value_outer, + __entry->value_misc, + __entry->value_inner)) + ); + +TRACE_EVENT(mlx5_fs_del_fte, + TP_PROTO(const struct fs_fte *fte), + TP_ARGS(fte), + TP_STRUCT__entry( + __field(const struct fs_fte *, fte) + __field(u32, index) + ), + TP_fast_assign( + __entry->fte = fte; + __entry->index = fte->index; + + ), + TP_printk("fte=%p index=%u\n", + __entry->fte, __entry->index) + ); + +TRACE_EVENT(mlx5_fs_add_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + __field(u32, sw_action) + __field(u32, index) + __field(u32, counter_id) + __array(u8, destination, sizeof(struct mlx5_flow_destination)) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + __entry->index = __entry->fte->dests_size - 1; + __entry->sw_action = rule->sw_action; + memcpy(__entry->destination, + &rule->dest_attr, + sizeof(__entry->destination)); + if (rule->dest_attr.type & MLX5_FLOW_DESTINATION_TYPE_COUNTER && + rule->dest_attr.counter) + __entry->counter_id = + rule->dest_attr.counter->id; + ), + TP_printk("rule=%p fte=%p index=%u sw_action=<%s> [dst] %s\n", + __entry->rule, __entry->fte, __entry->index, + __print_flags(__entry->sw_action, "|", ACTION_FLAGS), + __parse_fs_dst(__entry->destination, __entry->counter_id)) + ); + +TRACE_EVENT(mlx5_fs_del_rule, + TP_PROTO(const struct mlx5_flow_rule *rule), + TP_ARGS(rule), + TP_STRUCT__entry( + __field(const struct mlx5_flow_rule *, rule) + __field(const struct fs_fte *, fte) + ), + TP_fast_assign( + __entry->rule = rule; + fs_get_obj(__entry->fte, rule->node.parent); + ), + TP_printk("rule=%p fte=%p\n", + __entry->rule, __entry->fte) + ); +#endif + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ./diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE fs_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 040d1af310b4..cc13d3dbd366 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -291,10 +291,11 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_ENABLED, - MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, MLX5E_RQ_STATE_AM, }; +#define MLX5E_TEST_BIT(state, nr) (state & BIT(nr)) + struct mlx5e_cq { /* data path - accessed per cqe */ struct mlx5_cqwq wq; @@ -342,7 +343,6 @@ enum { struct mlx5e_sq_wqe_info { u8 opcode; - u8 num_wqebbs; }; struct mlx5e_txqsq { @@ -418,13 +418,8 @@ struct mlx5e_xdpsq { struct mlx5e_icosq { /* data path */ - /* dirtied @completion */ - u16 cc; - /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; - u32 dma_fifo_pc; - u16 prev_cc; struct mlx5e_cq cq; @@ -438,7 +433,6 @@ struct mlx5e_icosq { void __iomem *uar_map; u32 sqn; u16 edge; - struct device *pdev; __be32 mkey_be; unsigned long state; @@ -507,7 +501,7 @@ struct mlx5e_rx_am { /* Adaptive Moderation */ */ #define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) -#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) struct mlx5e_page_cache { u32 head; u32 tail; @@ -516,7 +510,7 @@ struct mlx5e_page_cache { struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); -typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16); +typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); struct mlx5e_rq { @@ -527,21 +521,26 @@ struct mlx5e_rq { struct { struct mlx5e_wqe_frag_info *frag_info; u32 frag_sz; /* max possible skb frag_sz */ - bool page_reuse; - bool xdp_xmit; + union { + bool page_reuse; + bool xdp_xmit; + }; } wqe; struct { struct mlx5e_mpw_info *info; void *mtt_no_align; + u16 num_strides; + u8 log_stride_sz; + bool umr_in_progress; } mpwqe; }; struct { + u16 headroom; u8 page_order; - u32 wqe_sz; /* wqe data buffer size */ u8 map_dir; /* dma map direction */ } buff; - __be32 mkey_be; + struct mlx5e_channel *channel; struct device *pdev; struct net_device *netdev; struct mlx5e_tstamp *tstamp; @@ -550,12 +549,11 @@ struct mlx5e_rq { struct mlx5e_page_cache page_cache; mlx5e_fp_handle_rx_cqe handle_rx_cqe; - mlx5e_fp_alloc_wqe alloc_wqe; + mlx5e_fp_post_rx_wqes post_wqes; mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; int ix; - u16 rx_headroom; struct mlx5e_rx_am am; /* Adaptive Moderation */ @@ -565,19 +563,13 @@ struct mlx5e_rq { /* control */ struct mlx5_wq_ctrl wq_ctrl; + __be32 mkey_be; u8 wq_type; - u32 mpwqe_stride_sz; - u32 mpwqe_num_strides; u32 rqn; - struct mlx5e_channel *channel; struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; } ____cacheline_aligned_in_smp; -enum channel_flags { - MLX5E_CHANNEL_NAPI_SCHED = 1, -}; - struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; @@ -589,7 +581,9 @@ struct mlx5e_channel { struct net_device *netdev; __be32 mkey_be; u8 num_tc; - unsigned long flags; + + /* data path - accessed per napi poll */ + struct irq_desc *irq_desc; /* control */ struct mlx5e_priv *priv; @@ -620,6 +614,12 @@ enum mlx5e_traffic_types { MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; +enum mlx5e_tunnel_types { + MLX5E_TT_IPV4_GRE, + MLX5E_TT_IPV6_GRE, + MLX5E_NUM_TUNNEL_TT, +}; + enum { MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, @@ -679,6 +679,7 @@ struct mlx5e_l2_table { struct mlx5e_ttc_table { struct mlx5e_flow_table ft; struct mlx5_flow_handle *rules[MLX5E_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT]; }; #define ARFS_HASH_SHIFT BITS_PER_BYTE @@ -711,6 +712,7 @@ enum { MLX5E_VLAN_FT_LEVEL = 0, MLX5E_L2_FT_LEVEL, MLX5E_TTC_FT_LEVEL, + MLX5E_INNER_TTC_FT_LEVEL, MLX5E_ARFS_FT_LEVEL }; @@ -736,6 +738,7 @@ struct mlx5e_flow_steering { struct mlx5e_vlan_table vlan; struct mlx5e_l2_table l2; struct mlx5e_ttc_table ttc; + struct mlx5e_ttc_table inner_ttc; struct mlx5e_arfs_tables arfs; }; @@ -769,6 +772,7 @@ struct mlx5e_priv { u32 tisn[MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir inner_indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; int hard_mtu; @@ -839,11 +843,9 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); -void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); void mlx5e_rx_am(struct mlx5e_rq *rq); @@ -903,7 +905,7 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, struct mlx5e_redirect_rqt_param rrp); void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc); + void *tirc, bool inner); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); @@ -922,8 +924,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); -void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, - u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels); int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); @@ -932,6 +933,12 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params, u8 rq_type); +static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_ETH(mdev, tunnel_stateless_gre) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); +} + static inline struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index f5594014715b..d12e9fc0d76b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -176,7 +176,6 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) int mlx5e_ethtool_get_sset_count(struct mlx5e_priv *priv, int sset) { - switch (sset) { case ETH_SS_STATS: return NUM_SW_COUNTERS + @@ -207,7 +206,7 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) return mlx5e_ethtool_get_sset_count(priv, sset); } -static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) +static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, u8 *data) { int i, j, tc, prio, idx = 0; unsigned long pfc_combined; @@ -242,10 +241,22 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) strcpy(data + (idx++) * ETH_GSTRING_LEN, pport_phy_statistical_stats_desc[i].format); + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_eth_ext_stats_desc[i].format); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, pcie_perf_stats_desc[i].format); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stats_desc64[i].format); + + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pcie_perf_stall_stats_desc[i].format); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) sprintf(data + (idx++) * ETH_GSTRING_LEN, @@ -297,8 +308,7 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) priv->channel_tc2txq[i][tc]); } -void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, - uint32_t stringset, uint8_t *data) +void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, u32 stringset, u8 *data) { int i; @@ -320,8 +330,7 @@ void mlx5e_ethtool_get_strings(struct mlx5e_priv *priv, } } -static void mlx5e_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) +static void mlx5e_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -373,10 +382,22 @@ void mlx5e_ethtool_get_ethtool_stats(struct mlx5e_priv *priv, data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.phy_statistical_counters, pport_phy_statistical_stats_desc, i); + for (i = 0; i < NUM_PPORT_ETH_EXT_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.eth_ext_counters, + pport_eth_ext_stats_desc, i); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS(priv); i++) data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, pcie_perf_stats_desc, i); + for (i = 0; i < NUM_PCIE_PERF_COUNTERS64(priv); i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stats_desc64, i); + + for (i = 0; i < NUM_PCIE_PERF_STALL_COUNTERS(priv); i++) + data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters, + pcie_perf_stall_stats_desc, i); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], @@ -642,8 +663,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, new_channels.params = priv->channels.params; new_channels.params.num_channels = count; if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->mdev, - new_channels.params.indirection_rqt, + mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { @@ -966,24 +986,27 @@ static u8 get_connector_port(u32 eth_proto, u8 connector_type) if (connector_type && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER) return ptys2connector_type[connector_type]; - if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) - | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) - | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) - | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { - return PORT_FIBRE; + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_SR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) | + MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { + return PORT_FIBRE; } - if (eth_proto & (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) - | MLX5E_PROT_MASK(MLX5E_10GBASE_CR) - | MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { - return PORT_DA; + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_40GBASE_CR4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | + MLX5E_PROT_MASK(MLX5E_100GBASE_CR4))) { + return PORT_DA; } - if (eth_proto & (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) - | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) - | MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) - | MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { - return PORT_NONE; + if (eth_proto & + (MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) | + MLX5E_PROT_MASK(MLX5E_10GBASE_KR) | + MLX5E_PROT_MASK(MLX5E_40GBASE_KR4) | + MLX5E_PROT_MASK(MLX5E_100GBASE_KR4))) { + return PORT_NONE; } return PORT_OTHER; @@ -1190,9 +1213,18 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); } + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(tirc, 0, ctxlen); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); + mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in, inlen); + } } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index dfccb5305e9c..f11fd07ac4dd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -36,6 +36,7 @@ #include <linux/tcp.h> #include <linux/mlx5/fs.h> #include "en.h" +#include "lib/mpfs.h" static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type); @@ -65,6 +66,7 @@ struct mlx5e_l2_hash_node { struct hlist_node hlist; u8 action; struct mlx5e_l2_rule ai; + bool mpfs; }; static inline int mlx5e_hash_l2(u8 *addr) @@ -362,17 +364,30 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, struct mlx5e_l2_hash_node *hn) { - switch (hn->action) { + u8 action = hn->action; + int l2_err = 0; + + switch (action) { case MLX5E_ACTION_ADD: mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); + if (!is_multicast_ether_addr(hn->ai.addr)) { + l2_err = mlx5_mpfs_add_mac(priv->mdev, hn->ai.addr); + hn->mpfs = !l2_err; + } hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: + if (!is_multicast_ether_addr(hn->ai.addr) && hn->mpfs) + l2_err = mlx5_mpfs_del_mac(priv->mdev, hn->ai.addr); mlx5e_del_l2_flow_rule(priv, &hn->ai); mlx5e_del_l2_from_hash(hn); break; } + + if (l2_err) + netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n", + action == MLX5E_ACTION_ADD ? "add" : "del", hn->ai.addr, l2_err); } static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) @@ -593,12 +608,21 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) ttc->rules[i] = NULL; } } + + for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } } -static struct { +struct mlx5e_etype_proto { u16 etype; u8 proto; -} ttc_rules[] = { +}; + +static struct mlx5e_etype_proto ttc_rules[] = { [MLX5E_TT_IPV4_TCP] = { .etype = ETH_P_IP, .proto = IPPROTO_TCP, @@ -645,6 +669,28 @@ static struct { }, }; +static struct mlx5e_etype_proto ttc_tunnel_rules[] = { + [MLX5E_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5E_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, +}; + +static u8 mlx5e_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + static struct mlx5_flow_handle * mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, struct mlx5_flow_table *ft, @@ -652,10 +698,12 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { + int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; + u8 ipv; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) @@ -666,7 +714,13 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); } - if (etype) { + + ipv = mlx5e_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); @@ -708,6 +762,20 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) goto del_rules; } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + rules = ttc->tunnel_rules; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.inner_ttc.ft.t; + for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + return 0; del_rules: @@ -718,13 +786,23 @@ del_rules: } #define MLX5E_TTC_NUM_GROUPS 3 -#define MLX5E_TTC_GROUP1_SIZE BIT(3) -#define MLX5E_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) #define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ MLX5E_TTC_GROUP2_SIZE +\ MLX5E_TTC_GROUP3_SIZE) -static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) + +#define MLX5E_INNER_TTC_NUM_GROUPS 3 +#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ + MLX5E_INNER_TTC_GROUP2_SIZE +\ + MLX5E_INNER_TTC_GROUP3_SIZE) + +static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, + bool use_ipv) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5e_flow_table *ft = &ttc->ft; @@ -746,7 +824,10 @@ static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) /* L4 Group */ mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); ix += MLX5E_TTC_GROUP1_SIZE; @@ -787,6 +868,190 @@ err: return err; } +static struct mlx5_flow_handle * +mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5e_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_handle **rules; + struct mlx5e_ttc_table *ttc; + struct mlx5_flow_table *ft; + int err; + int tt; + + ttc = &priv->fs.inner_ttc; + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = priv->direct_tir[0].tirn; + else + dest.tir_num = priv->inner_indir_tir[tt].tirn; + + rules[tt] = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); + + return err; +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + ft_attr.max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + ft_attr.level = MLX5E_INNER_TTC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_inner_ttc_table_groups(ttc); + if (err) + goto err; + + err = mlx5e_generate_inner_ttc_table_rules(priv); + if (err) + goto err; + + return 0; + +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.inner_ttc; + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; @@ -797,6 +1062,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) int mlx5e_create_ttc_table(struct mlx5e_priv *priv) { + bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); struct mlx5e_ttc_table *ttc = &priv->fs.ttc; struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; @@ -813,7 +1079,7 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_ttc_table_groups(ttc); + err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer); if (err) goto err; @@ -1139,11 +1405,18 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } + err = mlx5e_create_inner_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); - goto err_destroy_arfs_tables; + goto err_destroy_inner_ttc_table; } err = mlx5e_create_l2_table(priv); @@ -1168,6 +1441,8 @@ err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: mlx5e_destroy_ttc_table(priv); +err_destroy_inner_ttc_table: + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1179,6 +1454,7 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d75f3099d164..dfc29720ab77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -213,6 +213,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_cache_full += rq_stats->cache_full; s->rx_cache_empty += rq_stats->cache_empty; s->rx_cache_busy += rq_stats->cache_busy; + s->rx_cache_waive += rq_stats->cache_waive; for (j = 0; j < priv->channels.params.num_tc; j++) { sq_stats = &c->sq[j].stats; @@ -293,6 +294,12 @@ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv, bool full) mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); } + if (MLX5_CAP_PCAM_FEATURE(mdev, rx_buffer_fullness_counters)) { + out = pstats->eth_ext_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + } + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { out = pstats->per_prio_counters[prio]; @@ -593,12 +600,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, } rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->rx_headroom = params->rq_headroom; + rq->buff.headroom = params->rq_headroom; switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe; @@ -615,11 +622,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz); - rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides); + rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz; + rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides); - rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; - byte_count = rq->buff.wqe_sz; + byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) @@ -639,7 +645,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = -ENOMEM; goto err_rq_wq_destroy; } - rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; #ifdef CONFIG_MLX5_EN_IPSEC @@ -655,18 +661,17 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->buff.wqe_sz = params->lro_en ? + byte_count = params->lro_en ? params->lro_wqe_sz : MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu); #ifdef CONFIG_MLX5_EN_IPSEC if (MLX5_IPSEC_DEV(mdev)) - rq->buff.wqe_sz += MLX5E_METADATA_ETHER_LEN; + byte_count += MLX5E_METADATA_ETHER_LEN; #endif rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en; - byte_count = rq->buff.wqe_sz; /* calc the required page order */ - rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->rx_headroom + byte_count); + rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->buff.headroom + byte_count); npages = DIV_ROUND_UP(rq->wqe.frag_sz, PAGE_SIZE); rq->buff.page_order = order_base_2(npages); @@ -677,6 +682,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) << PAGE_SHIFT; + + wqe->data.addr = cpu_to_be64(dma_offset); + } + wqe->data.byte_count = cpu_to_be32(byte_count); wqe->data.lkey = rq->mkey_be; } @@ -883,7 +894,8 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) u16 wqe_ix; /* UMR WQE (if in progress) is always at wq->head */ - if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + rq->mpwqe.umr_in_progress) mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { @@ -926,7 +938,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, goto err_destroy_rq; if (params->rx_am_enabled) - set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + c->rq.state |= BIT(MLX5E_RQ_STATE_AM); return 0; @@ -946,7 +958,6 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq) set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nopwqe->ctrl); } @@ -1048,7 +1059,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->mdev; int err; - sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; sq->uar_map = mdev->mlx5e_res.bfreg.map; @@ -1752,7 +1762,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; struct mlx5e_channel *c; + unsigned int irq; int err; + int eqn; c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix)); if (!c) @@ -1768,6 +1780,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; + mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + c->irq_desc = irq_to_desc(irq); + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); @@ -2345,9 +2360,10 @@ static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, enum mlx5e_traffic_types tt, - void *tirc) + void *tirc, bool inner) { - void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); + void *hfso = inner ? MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_inner) : + MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -2496,6 +2512,21 @@ free_in: return err; } +static void mlx5e_build_inner_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) +{ + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); + MLX5_SET(tirc, tirc, tunneled_offload_en, 0x1); + + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, true); +} + static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; @@ -2583,12 +2614,6 @@ static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv) } } -static bool mlx5e_is_eswitch_vport_mngr(struct mlx5_core_dev *mdev) -{ - return (MLX5_CAP_GEN(mdev, vport_group_manager) && - MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH); -} - void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) { int num_txqs = priv->channels.num * priv->channels.params.num_tc; @@ -2602,7 +2627,7 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_activate_channels(&priv->channels); netif_tx_start_all_queues(priv->netdev); - if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_add_sqs_fwd_rules(priv); mlx5e_wait_channels_min_rx_wqes(&priv->channels); @@ -2613,7 +2638,7 @@ void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { mlx5e_redirect_rqts_to_drop(priv); - if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_remove_sqs_fwd_rules(priv); /* FIXME: This is a W/A only for tx timeout watch dog false alarm when @@ -2690,6 +2715,8 @@ int mlx5e_open(struct net_device *netdev) mutex_lock(&priv->state_lock); err = mlx5e_open_locked(netdev); + if (!err) + mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_UP); mutex_unlock(&priv->state_lock); return err; @@ -2724,6 +2751,7 @@ int mlx5e_close(struct net_device *netdev) return -ENODEV; mutex_lock(&priv->state_lock); + mlx5_set_port_admin_status(priv->mdev, MLX5_PORT_DOWN); err = mlx5e_close_locked(netdev); mutex_unlock(&priv->state_lock); @@ -2864,7 +2892,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc, false); } static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) @@ -2883,6 +2911,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) struct mlx5e_tir *tir; void *tirc; int inlen; + int i = 0; int err; u32 *in; int tt; @@ -2898,16 +2927,36 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_indir_tir_ctx(priv, tt, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); - if (err) - goto err_destroy_tirs; + if (err) { + mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } } + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + goto out; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { + memset(in, 0, inlen); + tir = &priv->inner_indir_tir[i]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_inner_indir_tir_ctx(priv, i, tirc); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); + if (err) { + mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); + goto err_destroy_inner_tirs; + } + } + +out: kvfree(in); return 0; -err_destroy_tirs: - mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); +err_destroy_inner_tirs: + for (i--; i >= 0; i--) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); + for (tt--; tt >= 0; tt--) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); @@ -2961,6 +3010,12 @@ void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); + + if (!mlx5e_tunnel_inner_ft_supported(priv->mdev)) + return; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]); } void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) @@ -3000,12 +3055,16 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) return 0; } -static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) +static int mlx5e_setup_tc_mqprio(struct net_device *netdev, + struct tc_mqprio_qopt *mqprio) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_channels new_channels = {}; + u8 tc = mqprio->num_tc; int err = 0; + mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + if (tc && tc != MLX5E_MAX_NUM_TC) return -EINVAL; @@ -3029,39 +3088,42 @@ out: return err; } -static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 proto, - struct tc_to_netdev *tc) +#ifdef CONFIG_MLX5_ESWITCH +static int mlx5e_setup_tc_cls_flower(struct net_device *dev, + struct tc_cls_flower_offload *cls_flower) { struct mlx5e_priv *priv = netdev_priv(dev); - if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) - goto mqprio; + if (!is_classid_clsact_ingress(cls_flower->common.classid) || + cls_flower->common.chain_index) + return -EOPNOTSUPP; - if (chain_index) + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, cls_flower); + default: return -EOPNOTSUPP; + } +} +#endif - switch (tc->type) { +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { +#ifdef CONFIG_MLX5_ESWITCH case TC_SETUP_CLSFLOWER: - switch (tc->cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, proto, tc->cls_flower); - case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, tc->cls_flower); - case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, tc->cls_flower); - } + return mlx5e_setup_tc_cls_flower(dev, type_data); +#endif + case TC_SETUP_MQPRIO: + return mlx5e_setup_tc_mqprio(dev, type_data); default: return -EOPNOTSUPP; } - -mqprio: - if (tc->type != TC_SETUP_MQPRIO) - return -EINVAL; - - tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; - - return mlx5e_setup_tc(dev, tc->mqprio->num_tc); } static void @@ -3358,6 +3420,7 @@ static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } } +#ifdef CONFIG_MLX5_ESWITCH static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -3460,6 +3523,7 @@ static int mlx5e_get_vf_stats(struct net_device *dev, return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, vf_stats); } +#endif static void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti) @@ -3489,13 +3553,13 @@ static void mlx5e_del_vxlan_port(struct net_device *netdev, mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 0); } -static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, - struct sk_buff *skb, - netdev_features_t features) +static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, + struct sk_buff *skb, + netdev_features_t features) { struct udphdr *udph; - u16 proto; - u16 port = 0; + u8 proto; + u16 port; switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): @@ -3508,14 +3572,17 @@ static netdev_features_t mlx5e_vxlan_features_check(struct mlx5e_priv *priv, goto out; } - if (proto == IPPROTO_UDP) { + switch (proto) { + case IPPROTO_GRE: + return features; + case IPPROTO_UDP: udph = udp_hdr(skb); port = be16_to_cpu(udph->dest); - } - /* Verify if UDP port is being offloaded by HW */ - if (port && mlx5e_vxlan_lookup_port(priv, port)) - return features; + /* Verify if UDP port is being offloaded by HW */ + if (mlx5e_vxlan_lookup_port(priv, port)) + return features; + } out: /* Disable CSUM and GSO if the udp dport is not offloaded by HW */ @@ -3539,7 +3606,7 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK)) - return mlx5e_vxlan_features_check(priv, skb, features); + return mlx5e_tunnel_features_check(priv, skb, features); return features; } @@ -3636,7 +3703,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); /* napi_schedule in case we have missed anything */ - set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); napi_schedule(&c->napi); if (old_prog) @@ -3693,11 +3759,11 @@ static void mlx5e_netpoll(struct net_device *dev) } #endif -static const struct net_device_ops mlx5e_netdev_ops_basic = { +static const struct net_device_ops mlx5e_netdev_ops = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, .ndo_start_xmit = mlx5e_xmit, - .ndo_setup_tc = mlx5e_ndo_setup_tc, + .ndo_setup_tc = mlx5e_setup_tc, .ndo_select_queue = mlx5e_select_queue, .ndo_get_stats64 = mlx5e_get_stats, .ndo_set_rx_mode = mlx5e_set_rx_mode, @@ -3708,6 +3774,9 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, + .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, + .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_features_check = mlx5e_features_check, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -3716,29 +3785,8 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mlx5e_netpoll, #endif -}; - -static const struct net_device_ops mlx5e_netdev_ops_sriov = { - .ndo_open = mlx5e_open, - .ndo_stop = mlx5e_close, - .ndo_start_xmit = mlx5e_xmit, - .ndo_setup_tc = mlx5e_ndo_setup_tc, - .ndo_select_queue = mlx5e_select_queue, - .ndo_get_stats64 = mlx5e_get_stats, - .ndo_set_rx_mode = mlx5e_set_rx_mode, - .ndo_set_mac_address = mlx5e_set_mac, - .ndo_vlan_rx_add_vid = mlx5e_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = mlx5e_vlan_rx_kill_vid, - .ndo_set_features = mlx5e_set_features, - .ndo_change_mtu = mlx5e_change_mtu, - .ndo_do_ioctl = mlx5e_ioctl, - .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, - .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, - .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, - .ndo_features_check = mlx5e_features_check, -#ifdef CONFIG_RFS_ACCEL - .ndo_rx_flow_steer = mlx5e_rx_flow_steer, -#endif +#ifdef CONFIG_MLX5_ESWITCH + /* SRIOV E-Switch NDOs */ .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, @@ -3747,13 +3795,9 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, - .ndo_tx_timeout = mlx5e_tx_timeout, - .ndo_xdp = mlx5e_xdp, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = mlx5e_netpoll, -#endif .ndo_has_offload_stats = mlx5e_has_offload_stats, .ndo_get_offload_stats = mlx5e_get_offload_stats, +#endif }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -3790,8 +3834,7 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; } -void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, - u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels) { int i; @@ -3934,7 +3977,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* RSS */ params->rss_hfunc = ETH_RSS_HASH_XOR; netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt, + mlx5e_build_default_indir_rqt(params->indirection_rqt, MLX5E_INDIR_RQT_SIZE, max_channels); } @@ -3973,9 +4016,11 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } +#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH) static const struct switchdev_ops mlx5e_switchdev_ops = { .switchdev_port_attr_get = mlx5e_attr_get, }; +#endif static void mlx5e_build_nic_netdev(struct net_device *netdev) { @@ -3986,15 +4031,12 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) SET_NETDEV_DEV(netdev, &mdev->pdev->dev); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - netdev->netdev_ops = &mlx5e_netdev_ops_sriov; + netdev->netdev_ops = &mlx5e_netdev_ops; + #ifdef CONFIG_MLX5_CORE_EN_DCB - if (MLX5_CAP_GEN(mdev, qos)) - netdev->dcbnl_ops = &mlx5e_dcbnl_ops; + if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos)) + netdev->dcbnl_ops = &mlx5e_dcbnl_ops; #endif - } else { - netdev->netdev_ops = &mlx5e_netdev_ops_basic; - } netdev->watchdog_timeo = 15 * HZ; @@ -4017,20 +4059,32 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX; netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; - if (mlx5e_vxlan_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + if (mlx5e_vxlan_allowed(mdev) || MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_PARTIAL; netdev->hw_enc_features |= NETIF_F_IP_CSUM; netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM | - NETIF_F_GSO_PARTIAL; + netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL; + } + + if (mlx5e_vxlan_allowed(mdev)) { + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } + if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre)) { + netdev->hw_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + netdev->gso_partial_features |= NETIF_F_GSO_GRE | + NETIF_F_GSO_GRE_CSUM; + } + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); if (fcs_supported) @@ -4066,8 +4120,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) mlx5e_set_netdev_dev_addr(netdev); -#ifdef CONFIG_NET_SWITCHDEV - if (MLX5_CAP_GEN(mdev, vport_group_manager)) +#if IS_ENABLED(CONFIG_NET_SWITCHDEV) && IS_ENABLED(CONFIG_MLX5_ESWITCH) + if (MLX5_VPORT_MANAGER(mdev)) netdev->switchdev_ops = &mlx5e_switchdev_ops; #endif @@ -4199,6 +4253,10 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_init_l2_addr(priv); + /* Marking the link as currently not needed by the Driver */ + if (!netif_running(netdev)) + mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); + /* MTU range: 68 - hw-specific max */ netdev->min_mtu = ETH_MIN_MTU; mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); @@ -4209,7 +4267,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_enable_async_events(priv); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_register_vport_reps(priv); if (netdev->reg_state != NETREG_REGISTERED) @@ -4243,7 +4301,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) + if (MLX5_VPORT_MANAGER(priv->mdev)) mlx5e_unregister_vport_reps(priv); mlx5e_disable_async_events(priv); @@ -4416,32 +4474,29 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) static void *mlx5e_add(struct mlx5_core_dev *mdev) { - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - struct mlx5e_rep_priv *rpriv = NULL; + struct net_device *netdev; + void *rpriv = NULL; void *priv; - int vport; int err; - struct net_device *netdev; err = mlx5e_check_required_hca_cap(mdev); if (err) return NULL; - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); +#ifdef CONFIG_MLX5_ESWITCH + if (MLX5_VPORT_MANAGER(mdev)) { + rpriv = mlx5e_alloc_nic_rep_priv(mdev); if (!rpriv) { - mlx5_core_warn(mdev, - "Not creating net device, Failed to alloc rep priv data\n"); + mlx5_core_warn(mdev, "Failed to alloc NIC rep priv data\n"); return NULL; } - rpriv->rep = &esw->offloads.vport_reps[0]; } +#endif netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); - goto err_unregister_reps; + goto err_free_rpriv; } priv = netdev_priv(netdev); @@ -4462,14 +4517,9 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) err_detach: mlx5e_detach(mdev, priv); - err_destroy_netdev: mlx5e_destroy_netdev(priv); - -err_unregister_reps: - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); - +err_free_rpriv: kfree(rpriv); return NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 45e60be9c277..45e03c427faf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -613,15 +613,18 @@ static int mlx5e_rep_open(struct net_device *dev) struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; - err = mlx5e_open(dev); + mutex_lock(&priv->state_lock); + err = mlx5e_open_locked(dev); if (err) - return err; + goto unlock; - err = mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP); - if (!err) + if (!mlx5_eswitch_set_vport_state(esw, rep->vport, + MLX5_ESW_VPORT_ADMIN_STATE_UP)) netif_carrier_on(dev); - return 0; +unlock: + mutex_unlock(&priv->state_lock); + return err; } static int mlx5e_rep_close(struct net_device *dev) @@ -630,10 +633,13 @@ static int mlx5e_rep_close(struct net_device *dev) struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int ret; + mutex_lock(&priv->state_lock); (void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN); - - return mlx5e_close(dev); + ret = mlx5e_close_locked(dev); + mutex_unlock(&priv->state_lock); + return ret; } static int mlx5e_rep_get_phys_port_name(struct net_device *dev, @@ -651,37 +657,42 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, return 0; } -static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 proto, - struct tc_to_netdev *tc) +static int +mlx5e_rep_setup_tc_cls_flower(struct net_device *dev, + struct tc_cls_flower_offload *cls_flower) { struct mlx5e_priv *priv = netdev_priv(dev); - if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + if (!is_classid_clsact_ingress(cls_flower->common.classid) || + cls_flower->common.chain_index) return -EOPNOTSUPP; - if (tc->egress_dev) { + if (cls_flower->egress_dev) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct net_device *uplink_dev = mlx5_eswitch_get_uplink_netdev(esw); - return uplink_dev->netdev_ops->ndo_setup_tc(uplink_dev, handle, - chain_index, - proto, tc); + dev = mlx5_eswitch_get_uplink_netdev(esw); + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_CLSFLOWER, + cls_flower); } - if (chain_index) + switch (cls_flower->command) { + case TC_CLSFLOWER_REPLACE: + return mlx5e_configure_flower(priv, cls_flower); + case TC_CLSFLOWER_DESTROY: + return mlx5e_delete_flower(priv, cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, cls_flower); + default: return -EOPNOTSUPP; + } +} - switch (tc->type) { +static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { case TC_SETUP_CLSFLOWER: - switch (tc->cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return mlx5e_configure_flower(priv, proto, tc->cls_flower); - case TC_CLSFLOWER_DESTROY: - return mlx5e_delete_flower(priv, tc->cls_flower); - case TC_CLSFLOWER_STATS: - return mlx5e_stats_flower(priv, tc->cls_flower); - } + return mlx5e_rep_setup_tc_cls_flower(dev, type_data); default: return -EOPNOTSUPP; } @@ -773,7 +784,7 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_stop = mlx5e_rep_close, .ndo_start_xmit = mlx5e_xmit, .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, - .ndo_setup_tc = mlx5e_rep_ndo_setup_tc, + .ndo_setup_tc = mlx5e_rep_setup_tc, .ndo_get_stats64 = mlx5e_rep_get_stats, .ndo_has_offload_stats = mlx5e_has_offload_stats, .ndo_get_offload_stats = mlx5e_get_offload_stats, @@ -913,7 +924,7 @@ static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev) return MLX5E_PORT_REPRESENTOR_NCH; } -static struct mlx5e_profile mlx5e_rep_profile = { +static const struct mlx5e_profile mlx5e_rep_profile = { .init = mlx5e_init_rep, .init_rx = mlx5e_init_rep_rx, .cleanup_rx = mlx5e_cleanup_rep_rx, @@ -1099,3 +1110,16 @@ void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/ } + +void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv; + + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return NULL; + + rpriv->rep = &esw->offloads.vport_reps[0]; + return rpriv; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index a0a1a7a1d6c0..5659ed9f51e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -38,6 +38,7 @@ #include "eswitch.h" #include "en.h" +#ifdef CONFIG_MLX5_ESWITCH struct mlx5e_neigh_update_table { struct rhashtable neigh_ht; /* Save the neigh hash entries in a list in addition to the hash table @@ -123,6 +124,7 @@ struct mlx5e_encap_entry { int encap_size; }; +void *mlx5e_alloc_nic_rep_priv(struct mlx5_core_dev *mdev); void mlx5e_register_vport_reps(struct mlx5e_priv *priv); void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); @@ -141,5 +143,12 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e); void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); +#else /* CONFIG_MLX5_ESWITCH */ +static inline void mlx5e_register_vport_reps(struct mlx5e_priv *priv) {} +static inline void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) {} +static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; } +static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) {} +#endif #endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7344433259fc..f1dd638384d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -163,7 +163,7 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, static inline bool mlx5e_page_is_reserved(struct page *page) { - return page_is_pfmemalloc(page) || page_to_nid(page) != numa_node_id(); + return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); } static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, @@ -177,8 +177,10 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } - if (unlikely(page_is_pfmemalloc(dma_info->page))) + if (unlikely(mlx5e_page_is_reserved(dma_info->page))) { + rq->stats.cache_waive++; return false; + } cache->page_cache[cache->tail] = *dma_info; cache->tail = tail_next; @@ -252,7 +254,7 @@ static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq, !mlx5e_page_is_reserved(wi->di.page); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix]; @@ -263,8 +265,7 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) wi->offset = 0; } - wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + - rq->rx_headroom); + wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + rq->buff.headroom); return 0; } @@ -296,7 +297,7 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) { - return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; + return rq->mpwqe.num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; } static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, @@ -305,7 +306,7 @@ static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, u32 page_idx, u32 frag_offset, u32 len) { - unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); + unsigned int truesize = ALIGN(len, BIT(rq->mpwqe.log_stride_sz)); dma_sync_single_for_cpu(rq->pdev, wi->umr.dma_info[page_idx].addr + frag_offset, @@ -358,7 +359,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_post_nop(wq, sq->sqn, &sq->pc); } @@ -369,41 +369,35 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) MLX5_OPCODE_UMR); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; - sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); } static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; int err; int i; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { err = mlx5e_page_alloc_mapped(rq, dma_info); if (unlikely(err)) goto err_unmap; wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR); page_ref_add(dma_info->page, pg_strides); - wi->skbs_frags[i] = 0; } + memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE); wi->consumed_strides = 0; - wqe->data.addr = cpu_to_be64(dma_offset); return 0; err_unmap: while (--i >= 0) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + dma_info--; page_ref_sub(dma_info->page, pg_strides); mlx5e_page_release(rq, dma_info, true); } @@ -414,27 +408,21 @@ err_unmap: void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; int i; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); mlx5e_page_release(rq, dma_info, true); } } -void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) +static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - - if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) { - mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); - return; - } + rq->mpwqe.umr_in_progress = false; mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); @@ -444,16 +432,18 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { int err; - err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix); - if (unlikely(err)) + err = mlx5e_alloc_rx_umr_mpwqe(rq, ix); + if (unlikely(err)) { + rq->stats.buff_alloc_err++; return err; - set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + } + rq->mpwqe.umr_in_progress = true; mlx5e_post_umr_wqe(rq, ix); - return -EBUSY; + return 0; } void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) @@ -463,94 +453,150 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) mlx5e_free_rx_mpwqe(rq, wi); } -#define RQ_CANNOT_POST(rq) \ - (!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \ - test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; + int err; - if (unlikely(RQ_CANNOT_POST(rq))) + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return false; - while (!mlx5_wq_ll_is_full(wq)) { + if (mlx5_wq_ll_is_full(wq)) + return false; + + do { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - int err; - err = rq->alloc_wqe(rq, wqe, wq->head); - if (err == -EBUSY) - return true; + err = mlx5e_alloc_rx_wqe(rq, wqe, wq->head); if (unlikely(err)) { rq->stats.buff_alloc_err++; break; } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - } + } while (!mlx5_wq_ll_is_full(wq)); /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); mlx5_wq_ll_update_db_record(wq); - return !mlx5_wq_ll_is_full(wq); + return !!err; +} + +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + + mlx5_cqwq_pop(&cq->wq); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + return; + } + + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); + return; + } + + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); +} + +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; + + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (likely(!cqe)) + return; + + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe); + + mlx5_cqwq_update_db_record(&cq->wq); +} + +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) + return false; + + mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq); + + if (mlx5_wq_ll_is_full(wq)) + return false; + + if (!rq->mpwqe.umr_in_progress) + mlx5e_alloc_rx_mpwqe(rq, wq->head); + + return true; } static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); - struct iphdr *ipv4; - struct ipv6hdr *ipv6; struct tcphdr *tcp; int network_depth = 0; __be16 proto; u16 tot_len; + void *ip_p; u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); - int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || - (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); skb->mac_len = ETH_HLEN; proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); - ipv4 = (struct iphdr *)(skb->data + network_depth); - ipv6 = (struct ipv6hdr *)(skb->data + network_depth); tot_len = cqe_bcnt - network_depth; + ip_p = skb->data + network_depth; if (proto == htons(ETH_P_IP)) { - tcp = (struct tcphdr *)(skb->data + network_depth + - sizeof(struct iphdr)); - ipv6 = NULL; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - } else { - tcp = (struct tcphdr *)(skb->data + network_depth + - sizeof(struct ipv6hdr)); - ipv4 = NULL; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; - } - - if (get_cqe_lro_tcppsh(cqe)) - tcp->psh = 1; + struct iphdr *ipv4 = ip_p; - if (tcp_ack) { - tcp->ack = 1; - tcp->ack_seq = cqe->lro_ack_seq_num; - tcp->window = cqe->lro_tcp_win; - } + tcp = ip_p + sizeof(struct iphdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - if (ipv4) { ipv4->ttl = cqe->lro_min_ttl; ipv4->tot_len = cpu_to_be16(tot_len); ipv4->check = 0; ipv4->check = ip_fast_csum((unsigned char *)ipv4, ipv4->ihl); } else { + struct ipv6hdr *ipv6 = ip_p; + + tcp = ip_p + sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + ipv6->hop_limit = cqe->lro_min_ttl; ipv6->payload_len = cpu_to_be16(tot_len - sizeof(struct ipv6hdr)); } + + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro_ack_seq_num; + tcp->window = cqe->lro_tcp_win; + } } static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, @@ -776,9 +822,9 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) { struct mlx5e_dma_info *di = &wi->di; + u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb; void *va, *data; - u16 rx_headroom = rq->rx_headroom; bool consumed; u32 frag_size; @@ -857,6 +903,7 @@ wq_ll_pop: &wqe->next.next_wqe_index); } +#ifdef CONFIG_MLX5_ESWITCH void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct net_device *netdev = rq->netdev; @@ -901,6 +948,7 @@ wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); } +#endif static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, @@ -909,7 +957,7 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb) { u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); - u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); u32 page_idx = wqe_offset >> PAGE_SHIFT; u32 head_page_idx = page_idx; @@ -977,7 +1025,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: - if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) return; mlx5e_free_rx_mpwqe(rq, wi); @@ -987,21 +1035,23 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5e_xdpsq *xdpsq; + struct mlx5_cqe64 *cqe; int work_done = 0; - if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return 0; if (cq->decmprs_left) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); - for (; work_done < budget; work_done++) { - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(&cq->wq); + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return 0; - if (!cqe) - break; + xdpsq = &rq->xdpsq; + do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { work_done += mlx5e_decompress_cqes_start(rq, cq, @@ -1012,7 +1062,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) mlx5_cqwq_pop(&cq->wq); rq->handle_rx_cqe(rq, cqe); - } + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); if (xdpsq->db.doorbell) { mlx5e_xmit_xdp_doorbell(xdpsq); @@ -1030,13 +1080,18 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { struct mlx5e_xdpsq *sq; + struct mlx5_cqe64 *cqe; struct mlx5e_rq *rq; u16 sqcc; int i; sq = container_of(cq, struct mlx5e_xdpsq, cq); - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) return false; rq = container_of(sq, struct mlx5e_rq, xdpsq); @@ -1046,15 +1101,11 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) */ sqcc = sq->cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; + i = 0; + do { u16 wqe_counter; bool last_wqe; - cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (!cqe) - break; - mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); @@ -1072,7 +1123,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) /* Recycle RX page */ mlx5e_page_release(rq, di, true); } while (!last_wqe); - } + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index e65517eafc58..6d199ffb1c0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -47,7 +47,7 @@ struct counter_desc { char format[ETH_GSTRING_LEN]; - int offset; /* Byte offset */ + size_t offset; /* Byte offset */ }; struct mlx5e_sw_stats { @@ -84,6 +84,7 @@ struct mlx5e_sw_stats { u64 rx_cache_full; u64 rx_cache_empty; u64 rx_cache_busy; + u64 rx_cache_waive; /* Special handling counters */ u64 link_down_events_phy; @@ -123,6 +124,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -216,6 +218,12 @@ static const struct counter_desc vport_stats_desc[] = { MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ counter_set.eth_per_prio_grp_data_layout.c##_high) #define NUM_PPORT_PRIO 8 +#define PPORT_ETH_EXT_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) +#define PPORT_ETH_EXT_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, (pstats)->eth_ext_counters, \ + counter_set.eth_extended_cntrs_grp_data_layout.c##_high) struct mlx5e_pport_stats { __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; @@ -224,6 +232,7 @@ struct mlx5e_pport_stats { __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; __be64 phy_statistical_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 eth_ext_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; }; static const struct counter_desc pport_802_3_stats_desc[] = { @@ -290,12 +299,22 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { { "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; +static const struct counter_desc pport_eth_ext_stats_desc[] = { + { "rx_buffer_passed_thres_phy", PPORT_ETH_EXT_OFF(rx_buffer_almost_full) }, +}; + #define PCIE_PERF_OFF(c) \ MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c) #define PCIE_PERF_GET(pcie_stats, c) \ MLX5_GET(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ counter_set.pcie_perf_cntrs_grp_data_layout.c) +#define PCIE_PERF_OFF64(c) \ + MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) +#define PCIE_PERF_GET64(pcie_stats, c) \ + MLX5_GET64(mpcnt_reg, (pcie_stats)->pcie_perf_counters, \ + counter_set.pcie_perf_cntrs_grp_data_layout.c##_high) + struct mlx5e_pcie_stats { __be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)]; }; @@ -305,6 +324,17 @@ static const struct counter_desc pcie_perf_stats_desc[] = { { "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) }, }; +static const struct counter_desc pcie_perf_stats_desc64[] = { + { "outbound_pci_buffer_overflow", PCIE_PERF_OFF64(tx_overflow_buffer_pkt) }, +}; + +static const struct counter_desc pcie_perf_stall_stats_desc[] = { + { "outbound_pci_stalled_rd", PCIE_PERF_OFF(outbound_stalled_reads) }, + { "outbound_pci_stalled_wr", PCIE_PERF_OFF(outbound_stalled_writes) }, + { "outbound_pci_stalled_rd_events", PCIE_PERF_OFF(outbound_stalled_reads_events) }, + { "outbound_pci_stalled_wr_events", PCIE_PERF_OFF(outbound_stalled_writes_events) }, +}; + struct mlx5e_rq_stats { u64 packets; u64 bytes; @@ -326,6 +356,7 @@ struct mlx5e_rq_stats { u64 cache_full; u64 cache_empty; u64 cache_busy; + u64 cache_waive; }; static const struct counter_desc rq_stats_desc[] = { @@ -349,6 +380,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, }; struct mlx5e_sq_stats { @@ -397,17 +429,29 @@ static const struct counter_desc sq_stats_desc[] = { #define NUM_PCIE_PERF_COUNTERS(priv) \ (ARRAY_SIZE(pcie_perf_stats_desc) * \ MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_performance_group)) +#define NUM_PCIE_PERF_COUNTERS64(priv) \ + (ARRAY_SIZE(pcie_perf_stats_desc64) * \ + MLX5_CAP_MCAM_FEATURE((priv)->mdev, tx_overflow_buffer_pkt)) +#define NUM_PCIE_PERF_STALL_COUNTERS(priv) \ + (ARRAY_SIZE(pcie_perf_stall_stats_desc) * \ + MLX5_CAP_MCAM_FEATURE((priv)->mdev, pcie_outbound_stalled)) #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ ARRAY_SIZE(pport_per_prio_traffic_stats_desc) #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_ETH_EXT_COUNTERS(priv) \ + (ARRAY_SIZE(pport_eth_ext_stats_desc) * \ + MLX5_CAP_PCAM_FEATURE((priv)->mdev, rx_buffer_fullness_counters)) #define NUM_PPORT_COUNTERS(priv) (NUM_PPORT_802_3_COUNTERS + \ NUM_PPORT_2863_COUNTERS + \ NUM_PPORT_2819_COUNTERS + \ NUM_PPORT_PHY_STATISTICAL_COUNTERS(priv) + \ NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ - NUM_PPORT_PRIO) -#define NUM_PCIE_COUNTERS(priv) NUM_PCIE_PERF_COUNTERS(priv) + NUM_PPORT_PRIO + \ + NUM_PPORT_ETH_EXT_COUNTERS(priv)) +#define NUM_PCIE_COUNTERS(priv) (NUM_PCIE_PERF_COUNTERS(priv) + \ + NUM_PCIE_PERF_COUNTERS64(priv) +\ + NUM_PCIE_PERF_STALL_COUNTERS(priv)) #define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) #define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 7f282e8f4e7f..da503e6411da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1326,7 +1326,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, LIST_HEAD(actions); int err; - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return -EINVAL; attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; @@ -1837,7 +1837,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, bool encap = false; int err = 0; - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return -EINVAL; memset(attr, 0, sizeof(*attr)); @@ -1937,7 +1937,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return err; } -int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, +int mlx5e_configure_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index ecbe30d808ae..c14c263a739b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -33,12 +33,15 @@ #ifndef __MLX5_EN_TC_H__ #define __MLX5_EN_TC_H__ +#include <net/pkt_cls.h> + #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff +#ifdef CONFIG_MLX5_ESWITCH int mlx5e_tc_init(struct mlx5e_priv *priv); void mlx5e_tc_cleanup(struct mlx5e_priv *priv); -int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, +int mlx5e_configure_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); @@ -60,4 +63,10 @@ static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) return atomic_read(&priv->fs.tc.ht.nelems); } +#else /* CONFIG_MLX5_ESWITCH */ +static inline int mlx5e_tc_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_tc_cleanup(struct mlx5e_priv *priv) {} +static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return 0; } +#endif + #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 31353e5c3c78..fee43e40fa16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -394,6 +394,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_txqsq *sq; + struct mlx5_cqe64 *cqe; u32 dma_fifo_cc; u32 nbytes; u16 npkts; @@ -402,7 +403,11 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_txqsq, cq); - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return false; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) return false; npkts = 0; @@ -416,15 +421,11 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) /* avoid dirtying sq cache line every cqe */ dma_fifo_cc = sq->dma_fifo_cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; + i = 0; + do { u16 wqe_counter; bool last_wqe; - cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (!cqe) - break; - mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); @@ -467,7 +468,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sqcc += wi->num_wqebbs; napi_consume_skb(skb, napi_budget); } while (!last_wqe); - } + + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 92db28a9ed43..e906b754415c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -30,66 +30,18 @@ * SOFTWARE. */ +#include <linux/irq.h> #include "en.h" -static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, - struct mlx5e_icosq *sq, - struct mlx5_cqe64 *cqe, - u16 *sqcc) +static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) { - struct mlx5_wq_cyc *wq = &sq->wq; - u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - struct mlx5e_rq *rq = &sq->channel->rq; - - prefetch(rq); - mlx5_cqwq_pop(&cq->wq); - *sqcc += icowi->num_wqebbs; - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { - WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", - cqe->op_own); - return; - } - - if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { - mlx5e_post_rx_mpwqe(rq); - return; - } - - if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) - WARN_ONCE(true, - "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", - icowi->opcode); -} - -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) -{ - struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); - struct mlx5_cqe64 *cqe; - u16 sqcc; - - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return; - - cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (likely(!cqe)) - return; + int current_cpu = smp_processor_id(); + const struct cpumask *aff; + struct irq_data *idata; - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), - * otherwise a cq overrun may occur - */ - sqcc = sq->cc; - - /* by design, there's only a single cqe */ - mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc); - - mlx5_cqwq_update_db_record(&cq->wq); - - /* ensure cq space is freed before enabling more cqes */ - wmb(); - - sq->cc = sqcc; + idata = irq_desc_get_irq_data(c->irq_desc); + aff = irq_data_get_affinity_mask(idata); + return cpumask_test_cpu(current_cpu, aff); } int mlx5e_napi_poll(struct napi_struct *napi, int budget) @@ -100,8 +52,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) int work_done; int i; - clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); - for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); @@ -111,25 +61,22 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; - mlx5e_poll_ico_cq(&c->icosq.cq); + busy |= c->rq.post_wqes(&c->rq); - busy |= mlx5e_post_rx_wqes(&c->rq); - - if (busy) - return budget; - - napi_complete_done(napi, work_done); + if (busy) { + if (likely(mlx5e_channel_no_affinity_change(c))) + return budget; + if (work_done == budget) + work_done--; + } - /* avoid losing completion event during/after polling cqs */ - if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) { - napi_schedule(napi); + if (unlikely(!napi_complete_done(napi, work_done))) return work_done; - } for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); - if (test_bit(MLX5E_RQ_STATE_AM, &c->rq.state)) + if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM)) mlx5e_rx_am(&c->rq); mlx5e_cq_arm(&c->rq.cq); @@ -143,7 +90,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); cq->event_ctr++; - set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); napi_schedule(cq->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index edd11ebd392e..fc606bfd1d6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -36,9 +36,7 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" #include "fpga/core.h" -#ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" -#endif enum { MLX5_EQE_SIZE = sizeof(struct mlx5_eqe), @@ -193,6 +191,7 @@ static void eq_update_ci(struct mlx5_eq *eq, int arm) { __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2); u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24); + __raw_writel((__force u32)cpu_to_be32(val), addr); /* We still want ordering, just not swabbing, so add a barrier */ mb(); @@ -483,11 +482,9 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) } break; -#ifdef CONFIG_MLX5_CORE_EN case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE: mlx5_eswitch_vport_event(dev->priv.eswitch, eqe); break; -#endif case MLX5_EVENT_TYPE_PORT_MODULE_EVENT: mlx5_port_module_event(dev, eqe); @@ -702,9 +699,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) u64 async_event_mask = MLX5_ASYNC_EVENT_MASK; int err; - if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && - MLX5_CAP_GEN(dev, vport_group_manager) && - mlx5_core_is_pf(dev)) + if (MLX5_VPORT_MANAGER(dev)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE); if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5b41f692acad..c77f4c0c7769 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -46,19 +46,13 @@ enum { MLX5_ACTION_DEL = 2, }; -/* E-Switch UC L2 table hash node */ -struct esw_uc_addr { - struct l2addr_node node; - u32 table_index; - u32 vport; -}; - /* Vport UC/MC hash node */ struct vport_addr { struct l2addr_node node; u8 action; u32 vport; - struct mlx5_flow_handle *flow_rule; /* SRIOV only */ + struct mlx5_flow_handle *flow_rule; + bool mpfs; /* UC MAC was added to MPFs */ /* A flag indicating that mac was added due to mc promiscuous vport */ bool mc_promisc; }; @@ -154,81 +148,6 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in)); } -/* HW L2 Table (MPFS) management */ -static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, - u8 *mac, u8 vlan_valid, u16 vlan) -{ - u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0}; - u8 *in_mac_addr; - - MLX5_SET(set_l2_table_entry_in, in, opcode, - MLX5_CMD_OP_SET_L2_TABLE_ENTRY); - MLX5_SET(set_l2_table_entry_in, in, table_index, index); - MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid); - MLX5_SET(set_l2_table_entry_in, in, vlan, vlan); - - in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); - ether_addr_copy(&in_mac_addr[2], mac); - - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) -{ - u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0}; - u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0}; - - MLX5_SET(delete_l2_table_entry_in, in, opcode, - MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); - MLX5_SET(delete_l2_table_entry_in, in, table_index, index); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); -} - -static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) -{ - int err = 0; - - *ix = find_first_zero_bit(l2_table->bitmap, l2_table->size); - if (*ix >= l2_table->size) - err = -ENOSPC; - else - __set_bit(*ix, l2_table->bitmap); - - return err; -} - -static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix) -{ - __clear_bit(ix, l2_table->bitmap); -} - -static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac, - u8 vlan_valid, u16 vlan, - u32 *index) -{ - struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; - int err; - - err = alloc_l2_table_index(l2_table, index); - if (err) - return err; - - err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan); - if (err) - free_l2_table_index(l2_table, *index); - - return err; -} - -static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) -{ - struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table; - - del_l2_table_entry_cmd(dev, index); - free_l2_table_index(l2_table, index); -} - /* E-Switch FDB */ static struct mlx5_flow_handle * __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, @@ -455,65 +374,60 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw, static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { - struct hlist_head *hash = esw->l2_table.l2_hash; - struct esw_uc_addr *esw_uc; u8 *mac = vaddr->node.addr; u32 vport = vaddr->vport; int err; - esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); - if (esw_uc) { + /* Skip mlx5_mpfs_add_mac for PFs, + * it is already done by the PF netdev in mlx5e_execute_l2_action + */ + if (!vport) + goto fdb_add; + + err = mlx5_mpfs_add_mac(esw->dev, mac); + if (err) { esw_warn(esw->dev, - "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n", - mac, vport, esw_uc->vport); - return -EEXIST; + "Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + return err; } + vaddr->mpfs = true; - esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL); - if (!esw_uc) - return -ENOMEM; - esw_uc->vport = vport; - - err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index); - if (err) - goto abort; - +fdb_add: /* SRIOV is enabled: Forward UC MAC to vport */ if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); - esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", - vport, mac, esw_uc->table_index, vaddr->flow_rule); - return err; -abort: - l2addr_hash_del(esw_uc); - return err; + esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n", + vport, mac, vaddr->flow_rule); + + return 0; } static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { - struct hlist_head *hash = esw->l2_table.l2_hash; - struct esw_uc_addr *esw_uc; u8 *mac = vaddr->node.addr; u32 vport = vaddr->vport; + int err = 0; - esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr); - if (!esw_uc || esw_uc->vport != vport) { - esw_debug(esw->dev, - "MAC(%pM) doesn't belong to vport (%d)\n", - mac, vport); - return -EINVAL; - } - esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n", - vport, mac, esw_uc->table_index, vaddr->flow_rule); + /* Skip mlx5_mpfs_del_mac for PFs, + * it is already done by the PF netdev in mlx5e_execute_l2_action + */ + if (!vport || !vaddr->mpfs) + goto fdb_del; - del_l2_table_entry(esw->dev, esw_uc->table_index); + err = mlx5_mpfs_del_mac(esw->dev, mac); + if (err) + esw_warn(esw->dev, + "Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n", + mac, vport, err); + vaddr->mpfs = false; +fdb_del: if (vaddr->flow_rule) mlx5_del_flow_rules(vaddr->flow_rule); vaddr->flow_rule = NULL; - l2addr_hash_del(esw_uc); return 0; } @@ -1611,13 +1525,14 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) } /* Public E-Switch API */ +#define ESW_ALLOWED(esw) ((esw) && MLX5_VPORT_MANAGER((esw)->dev)) + int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; int i, enabled_events; - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!ESW_ALLOWED(esw)) return 0; if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) || @@ -1634,7 +1549,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); esw->mode = mode; - esw_disable_vport(esw, 0); if (mode == SRIOV_LEGACY) err = esw_create_legacy_fdb_table(esw, nvfs + 1); @@ -1647,7 +1561,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (err) esw_warn(esw->dev, "Failed to create eswitch TSAR"); - enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE; + /* Don't enable vport events when in SRIOV_OFFLOADS mode, since: + * 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode + * 2. FDB/Eswitch is programmed by user space tools + */ + enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0; for (i = 0; i <= nvfs; i++) esw_enable_vport(esw, i, enabled_events); @@ -1656,7 +1574,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) return 0; abort: - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); esw->mode = SRIOV_NONE; return err; } @@ -1667,9 +1584,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) int nvports; int i; - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH || - esw->mode == SRIOV_NONE) + if (!ESW_ALLOWED(esw) || esw->mode == SRIOV_NONE) return; esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", @@ -1692,44 +1607,21 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_offloads_cleanup(esw, nvports); esw->mode = SRIOV_NONE; - /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); -} - -void mlx5_eswitch_attach(struct mlx5_eswitch *esw) -{ - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return; - - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); - /* VF Vports will be enabled when SRIOV is enabled */ -} - -void mlx5_eswitch_detach(struct mlx5_eswitch *esw) -{ - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return; - - esw_disable_vport(esw, 0); } int mlx5_eswitch_init(struct mlx5_core_dev *dev) { - int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); int total_vports = MLX5_TOTAL_VPORTS(dev); struct mlx5_eswitch *esw; int vport_num; int err; - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!MLX5_VPORT_MANAGER(dev)) return 0; esw_info(dev, - "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n", - total_vports, l2_table_size, + "Total vports %d, per vport: max uc(%d) max mc(%d)\n", + total_vports, MLX5_MAX_UC_PER_VPORT(dev), MLX5_MAX_MC_PER_VPORT(dev)); @@ -1739,14 +1631,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->dev = dev; - esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size), - sizeof(uintptr_t), GFP_KERNEL); - if (!esw->l2_table.bitmap) { - err = -ENOMEM; - goto abort; - } - esw->l2_table.size = l2_table_size; - esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -1797,7 +1681,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) abort: if (esw->work_queue) destroy_workqueue(esw->work_queue); - kfree(esw->l2_table.bitmap); kfree(esw->vports); kfree(esw->offloads.vport_reps); kfree(esw); @@ -1806,15 +1689,13 @@ abort: void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) { - if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || - MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) return; esw_info(esw->dev, "cleanup\n"); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); - kfree(esw->l2_table.bitmap); kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); @@ -1838,8 +1719,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) } /* Vport Administration */ -#define ESW_ALLOWED(esw) \ - (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 834a33050969..565c8b7a399a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -37,6 +37,15 @@ #include <linux/if_link.h> #include <net/devlink.h> #include <linux/mlx5/device.h> +#include "lib/mpfs.h" + +enum { + SRIOV_NONE, + SRIOV_LEGACY, + SRIOV_OFFLOADS +}; + +#ifdef CONFIG_MLX5_ESWITCH #define MLX5_MAX_UC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_uc_list)) @@ -44,9 +53,6 @@ #define MLX5_MAX_MC_PER_VPORT(dev) \ (1 << MLX5_CAP_GEN(dev, log_max_current_mc_list)) -#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) -#define MLX5_L2_ADDR_HASH(addr) (addr[5]) - #define FDB_UPLINK_VPORT 0xffff #define MLX5_MIN_BW_SHARE 1 @@ -54,48 +60,6 @@ #define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ min_t(u32, max_t(u32, (rate) / (divider), MLX5_MIN_BW_SHARE), limit) -/* L2 -mac address based- hash helpers */ -struct l2addr_node { - struct hlist_node hlist; - u8 addr[ETH_ALEN]; -}; - -#define for_each_l2hash_node(hn, tmp, hash, i) \ - for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ - hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) - -#define l2addr_hash_find(hash, mac, type) ({ \ - int ix = MLX5_L2_ADDR_HASH(mac); \ - bool found = false; \ - type *ptr = NULL; \ - \ - hlist_for_each_entry(ptr, &hash[ix], node.hlist) \ - if (ether_addr_equal(ptr->node.addr, mac)) {\ - found = true; \ - break; \ - } \ - if (!found) \ - ptr = NULL; \ - ptr; \ -}) - -#define l2addr_hash_add(hash, mac, type, gfp) ({ \ - int ix = MLX5_L2_ADDR_HASH(mac); \ - type *ptr = NULL; \ - \ - ptr = kzalloc(sizeof(type), gfp); \ - if (ptr) { \ - ether_addr_copy(ptr->node.addr, mac); \ - hlist_add_head(&ptr->node.hlist, &hash[ix]);\ - } \ - ptr; \ -}) - -#define l2addr_hash_del(ptr) ({ \ - hlist_del(&ptr->node.hlist); \ - kfree(ptr); \ -}) - struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_group *allow_untagged_spoofchk_grp; @@ -150,12 +114,6 @@ struct mlx5_vport { u16 enabled_events; }; -struct mlx5_l2_table { - struct hlist_head l2_hash[MLX5_L2_ADDR_HASH_SIZE]; - u32 size; - unsigned long *bitmap; -}; - struct mlx5_eswitch_fdb { void *fdb; union { @@ -175,12 +133,6 @@ struct mlx5_eswitch_fdb { }; }; -enum { - SRIOV_NONE, - SRIOV_LEGACY, - SRIOV_OFFLOADS -}; - struct mlx5_esw_sq { struct mlx5_flow_handle *send_to_vport_rule; struct list_head list; @@ -222,7 +174,6 @@ struct esw_mc_addr { /* SRIOV only */ struct mlx5_eswitch { struct mlx5_core_dev *dev; - struct mlx5_l2_table l2_table; struct mlx5_eswitch_fdb fdb_table; struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; struct workqueue_struct *work_queue; @@ -250,8 +201,6 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); -void mlx5_eswitch_attach(struct mlx5_eswitch *esw); -void mlx5_eswitch_detach(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); @@ -345,4 +294,13 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) +#else /* CONFIG_MLX5_ESWITCH */ +/* eswitch API stubs */ +static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} +static inline void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) {} +static inline int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { return 0; } +static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) {} +#endif /* CONFIG_MLX5_ESWITCH */ + #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 5bc0593bd76e..d9fd8570b07c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -433,6 +433,8 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) struct mlx5_flow_table *fdb = NULL; int esw_size, err = 0; u32 flags = 0; + u32 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) | + MLX5_CAP_GEN(dev, max_flow_counter_15_0); root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { @@ -443,9 +445,9 @@ static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size), - MLX5_CAP_GEN(dev, max_flow_counter), ESW_OFFLOADS_NUM_GROUPS); + max_flow_counter, ESW_OFFLOADS_NUM_GROUPS); - esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, + esw_size = min_t(int, max_flow_counter * ESW_OFFLOADS_NUM_GROUPS, 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index e750f07793b8..e0d0efd903bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -263,7 +263,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); - memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); + memcpy(in_match_value, &fte->val, sizeof(fte->val)); in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { @@ -359,7 +359,7 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id) { u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; @@ -374,7 +374,7 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) return err; } -int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id) { u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0}; @@ -385,7 +385,7 @@ int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } -int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes) { u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + @@ -409,14 +409,14 @@ int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, } struct mlx5_cmd_fc_bulk { - u16 id; + u32 id; int num; int outlen; u32 out[0]; }; struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num) +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num) { struct mlx5_cmd_fc_bulk *b; int outlen = @@ -453,7 +453,7 @@ mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) } void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u16 id, + struct mlx5_cmd_fc_bulk *b, u32 id, u64 *packets, u64 *bytes) { int index = id - b->id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 0f98a7cf4877..c6d7bdf255b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -74,20 +74,20 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, u32 underlay_qpn); -int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); -int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); -int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id); +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id); +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id, u64 *packets, u64 *bytes); struct mlx5_cmd_fc_bulk; struct mlx5_cmd_fc_bulk * -mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num); +mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u32 id, int num); void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b); int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b); void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, - struct mlx5_cmd_fc_bulk *b, u16 id, + struct mlx5_cmd_fc_bulk *b, u32 id, u64 *packets, u64 *bytes); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e8690fe46bf2..5a7bea688ec8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -36,6 +36,7 @@ #include "mlx5_core.h" #include "fs_core.h" #include "fs_cmd.h" +#include "diag/fs_tracepoint.h" #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) @@ -82,8 +83,8 @@ #define ETHTOOL_PRIO_NUM_LEVELS 1 #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) -/* Vlan, mac, ttc, aRFS */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 4 +/* Vlan, mac, ttc, inner ttc, aRFS */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 5 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) @@ -150,6 +151,23 @@ enum fs_i_mutex_lock_class { FS_MUTEX_CHILD }; +static const struct rhashtable_params rhash_fte = { + .key_len = FIELD_SIZEOF(struct fs_fte, val), + .key_offset = offsetof(struct fs_fte, val), + .head_offset = offsetof(struct fs_fte, hash), + .automatic_shrinking = true, + .min_size = 1, +}; + +static const struct rhashtable_params rhash_fg = { + .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask), + .key_offset = offsetof(struct mlx5_flow_group, mask), + .head_offset = offsetof(struct mlx5_flow_group, hash), + .automatic_shrinking = true, + .min_size = 1, + +}; + static void del_rule(struct fs_node *node); static void del_flow_table(struct fs_node *node); static void del_flow_group(struct fs_node *node); @@ -255,71 +273,77 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } -static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size) +static bool check_last_reserved(const u32 *match_criteria) { - unsigned int i; + char *match_criteria_reserved = + MLX5_ADDR_OF(fte_match_param, match_criteria, MLX5_FTE_MATCH_PARAM_RESERVED); - for (i = 0; i < size; i++, mask++, val1++, val2++) - if ((*((u8 *)val1) & (*(u8 *)mask)) != - ((*(u8 *)val2) & (*(u8 *)mask))) - return false; - - return true; + return !match_criteria_reserved[0] && + !memcmp(match_criteria_reserved, match_criteria_reserved + 1, + MLX5_FLD_SZ_BYTES(fte_match_param, + MLX5_FTE_MATCH_PARAM_RESERVED) - 1); } -static bool compare_match_value(struct mlx5_flow_group_mask *mask, - void *fte_param1, void *fte_param2) +static bool check_valid_mask(u8 match_criteria_enable, const u32 *match_criteria) { - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, outer_headers); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, outer_headers); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, outer_headers); + if (match_criteria_enable & ~( + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS) | + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) | + (1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS))) + return false; - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_OUTER_HEADERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, outer_headers); + + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) return false; } - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, misc_parameters); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, misc_parameters); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, misc_parameters); + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_MISC_PARAMETERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, misc_parameters); - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_misc))) + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_misc) - 1)) return false; } - if (mask->match_criteria_enable & - 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS) { - void *fte_match1 = MLX5_ADDR_OF(fte_match_param, - fte_param1, inner_headers); - void *fte_match2 = MLX5_ADDR_OF(fte_match_param, - fte_param2, inner_headers); - void *fte_mask = MLX5_ADDR_OF(fte_match_param, - mask->match_criteria, inner_headers); + if (!(match_criteria_enable & + 1 << MLX5_CREATE_FLOW_GROUP_IN_MATCH_CRITERIA_ENABLE_INNER_HEADERS)) { + char *fg_type_mask = MLX5_ADDR_OF(fte_match_param, + match_criteria, inner_headers); - if (!masked_memcmp(fte_mask, fte_match1, fte_match2, - MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4))) + if (fg_type_mask[0] || + memcmp(fg_type_mask, fg_type_mask + 1, + MLX5_ST_SZ_BYTES(fte_match_set_lyr_2_4) - 1)) return false; } - return true; + + return check_last_reserved(match_criteria); } -static bool compare_match_criteria(u8 match_criteria_enable1, - u8 match_criteria_enable2, - void *mask1, void *mask2) +static bool check_valid_spec(const struct mlx5_flow_spec *spec) { - return match_criteria_enable1 == match_criteria_enable2 && - !memcmp(mask1, mask2, MLX5_ST_SZ_BYTES(fte_match_param)); + int i; + + if (!check_valid_mask(spec->match_criteria_enable, spec->match_criteria)) { + pr_warn("mlx5_core: Match criteria given mismatches match_criteria_enable\n"); + return false; + } + + for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) + if (spec->match_value[i] & ~spec->match_criteria[i]) { + pr_warn("mlx5_core: match_value differs from match_criteria\n"); + return false; + } + + return check_last_reserved(spec->match_value); } static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) @@ -360,6 +384,8 @@ static void del_flow_table(struct fs_node *node) err = mlx5_cmd_destroy_flow_table(dev, ft); if (err) mlx5_core_warn(dev, "flow steering can't destroy ft\n"); + ida_destroy(&ft->fte_allocator); + rhltable_destroy(&ft->fgs_hash); fs_get_obj(prio, ft->node.parent); prio->num_ft--; } @@ -370,22 +396,16 @@ static void del_rule(struct fs_node *node) struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; struct fs_fte *fte; - u32 *match_value; int modify_mask; struct mlx5_core_dev *dev = get_dev(node); - int match_len = MLX5_ST_SZ_BYTES(fte_match_param); int err; bool update_fte = false; - match_value = kvzalloc(match_len, GFP_KERNEL); - if (!match_value) - return; - fs_get_obj(rule, node); fs_get_obj(fte, rule->node.parent); fs_get_obj(fg, fte->node.parent); - memcpy(match_value, fte->val, sizeof(fte->val)); fs_get_obj(ft, fg->node.parent); + trace_mlx5_fs_del_rule(rule); list_del(&rule->node.list); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); @@ -414,7 +434,18 @@ out: "%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); } - kvfree(match_value); +} + +static void destroy_fte(struct fs_fte *fte, struct mlx5_flow_group *fg) +{ + struct mlx5_flow_table *ft; + int ret; + + ret = rhashtable_remove_fast(&fg->ftes_hash, &fte->hash, rhash_fte); + WARN_ON(ret); + fte->status = 0; + fs_get_obj(ft, fg->node.parent); + ida_simple_remove(&ft->fte_allocator, fte->index); } static void del_fte(struct fs_node *node) @@ -428,6 +459,7 @@ static void del_fte(struct fs_node *node) fs_get_obj(fte, node); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); + trace_mlx5_fs_del_fte(fte); dev = get_dev(&ft->node); err = mlx5_cmd_delete_fte(dev, ft, @@ -437,8 +469,7 @@ static void del_fte(struct fs_node *node) "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); - fte->status = 0; - fg->num_ftes--; + destroy_fte(fte, fg); } static void del_flow_group(struct fs_node *node) @@ -446,14 +477,21 @@ static void del_flow_group(struct fs_node *node) struct mlx5_flow_group *fg; struct mlx5_flow_table *ft; struct mlx5_core_dev *dev; + int err; fs_get_obj(fg, node); fs_get_obj(ft, fg->node.parent); dev = get_dev(&ft->node); + trace_mlx5_fs_del_fg(fg); if (ft->autogroup.active) ft->autogroup.num_groups--; + rhashtable_destroy(&fg->ftes_hash); + err = rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg); + WARN_ON(err); if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", fg->id, ft->id); @@ -488,10 +526,17 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) u8 match_criteria_enable = MLX5_GET(create_flow_group_in, create_fg_in, match_criteria_enable); + int ret; + fg = kzalloc(sizeof(*fg), GFP_KERNEL); if (!fg) return ERR_PTR(-ENOMEM); + ret = rhashtable_init(&fg->ftes_hash, &rhash_fte); + if (ret) { + kfree(fg); + return ERR_PTR(ret); + } fg->mask.match_criteria_enable = match_criteria_enable; memcpy(&fg->mask.match_criteria, match_criteria, sizeof(fg->mask.match_criteria)); @@ -509,10 +554,17 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft u32 flags) { struct mlx5_flow_table *ft; + int ret; ft = kzalloc(sizeof(*ft), GFP_KERNEL); if (!ft) - return NULL; + return ERR_PTR(-ENOMEM); + + ret = rhltable_init(&ft->fgs_hash, &rhash_fg); + if (ret) { + kfree(ft); + return ERR_PTR(ret); + } ft->level = level; ft->node.type = FS_TYPE_FLOW_TABLE; @@ -523,6 +575,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->flags = flags; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); + ida_init(&ft->fte_allocator); return ft; } @@ -812,8 +865,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, root->table_type, op_mod, ft_attr->flags); - if (!ft) { - err = -ENOMEM; + if (IS_ERR(ft)) { + err = PTR_ERR(ft); goto unlock_root; } @@ -839,6 +892,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa destroy_ft: mlx5_cmd_destroy_flow_table(root->dev, ft); free_ft: + ida_destroy(&ft->fte_allocator); kfree(ft); unlock_root: mutex_unlock(&root->chain_lock); @@ -924,11 +978,13 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * if (IS_ERR(fg)) return fg; + err = rhltable_insert(&ft->fgs_hash, &fg->hash, rhash_fg); + if (err) + goto err_free_fg; + err = mlx5_cmd_create_flow_group(dev, ft, fg_in, &fg->id); - if (err) { - kfree(fg); - return ERR_PTR(err); - } + if (err) + goto err_remove_fg; if (ft->autogroup.active) ft->autogroup.num_groups++; @@ -938,14 +994,33 @@ static struct mlx5_flow_group *create_flow_group_common(struct mlx5_flow_table * /* Add node to group list */ list_add(&fg->node.list, prev_fg); + trace_mlx5_fs_add_fg(fg); return fg; + +err_remove_fg: + WARN_ON(rhltable_remove(&ft->fgs_hash, + &fg->hash, + rhash_fg)); +err_free_fg: + rhashtable_destroy(&fg->ftes_hash); + kfree(fg); + + return ERR_PTR(err); } struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, u32 *fg_in) { + void *match_criteria = MLX5_ADDR_OF(create_flow_group_in, + fg_in, match_criteria); + u8 match_criteria_enable = MLX5_GET(create_flow_group_in, + fg_in, + match_criteria_enable); struct mlx5_flow_group *fg; + if (!check_valid_mask(match_criteria_enable, match_criteria)) + return ERR_PTR(-EINVAL); + if (ft->autogroup.active) return ERR_PTR(-EPERM); @@ -1102,43 +1177,38 @@ free_handle: return ERR_PTR(err); } -/* Assumed fg is locked */ -static unsigned int get_free_fte_index(struct mlx5_flow_group *fg, - struct list_head **prev) -{ - struct fs_fte *fte; - unsigned int start = fg->start_index; - - if (prev) - *prev = &fg->node.children; - - /* assumed list is sorted by index */ - fs_for_each_fte(fte, fg) { - if (fte->index != start) - return start; - start++; - if (prev) - *prev = &fte->node.list; - } - - return start; -} - -/* prev is output, prev->next = new_fte */ static struct fs_fte *create_fte(struct mlx5_flow_group *fg, u32 *match_value, - struct mlx5_flow_act *flow_act, - struct list_head **prev) + struct mlx5_flow_act *flow_act) { + struct mlx5_flow_table *ft; struct fs_fte *fte; int index; + int ret; + + fs_get_obj(ft, fg->node.parent); + index = ida_simple_get(&ft->fte_allocator, fg->start_index, + fg->start_index + fg->max_ftes, + GFP_KERNEL); + if (index < 0) + return ERR_PTR(index); - index = get_free_fte_index(fg, prev); fte = alloc_fte(flow_act, match_value, index); - if (IS_ERR(fte)) - return fte; + if (IS_ERR(fte)) { + ret = PTR_ERR(fte); + goto err_alloc; + } + ret = rhashtable_insert_fast(&fg->ftes_hash, &fte->hash, rhash_fte); + if (ret) + goto err_hash; return fte; + +err_hash: + kfree(fte); +err_alloc: + ida_simple_remove(&ft->fte_allocator, index); + return ERR_PTR(ret); } static struct mlx5_flow_group *create_autogroup(struct mlx5_flow_table *ft, @@ -1226,79 +1296,104 @@ static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte, return NULL; } +static bool check_conflicting_actions(u32 action1, u32 action2) +{ + u32 xored_actions = action1 ^ action2; + + /* if one rule only wants to count, it's ok */ + if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT || + action2 == MLX5_FLOW_CONTEXT_ACTION_COUNT) + return false; + + if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_DECAP)) + return true; + + return false; +} + +static int check_conflicting_ftes(struct fs_fte *fte, const struct mlx5_flow_act *flow_act) +{ + if (check_conflicting_actions(flow_act->action, fte->action)) { + mlx5_core_warn(get_dev(&fte->node), + "Found two FTEs with conflicting actions\n"); + return -EEXIST; + } + + if (fte->flow_tag != flow_act->flow_tag) { + mlx5_core_warn(get_dev(&fte->node), + "FTE flow tag %u already exists with different flow tag %u\n", + fte->flow_tag, + flow_act->flow_tag); + return -EEXIST; + } + + return 0; +} + static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg, u32 *match_value, struct mlx5_flow_act *flow_act, struct mlx5_flow_destination *dest, - int dest_num) + int dest_num, + struct fs_fte *fte) { struct mlx5_flow_handle *handle; struct mlx5_flow_table *ft; - struct list_head *prev; - struct fs_fte *fte; int i; - nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT); - fs_for_each_fte(fte, fg) { + if (fte) { + int old_action; + int ret; + nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); - if (compare_match_value(&fg->mask, match_value, &fte->val) && - (flow_act->action & fte->action)) { - int old_action = fte->action; - - if (fte->flow_tag != flow_act->flow_tag) { - mlx5_core_warn(get_dev(&fte->node), - "FTE flow tag %u already exists with different flow tag %u\n", - fte->flow_tag, - flow_act->flow_tag); - handle = ERR_PTR(-EEXIST); - goto unlock_fte; - } + ret = check_conflicting_ftes(fte, flow_act); + if (ret) { + handle = ERR_PTR(ret); + goto unlock_fte; + } - fte->action |= flow_act->action; - handle = add_rule_fte(fte, fg, dest, dest_num, - old_action != flow_act->action); - if (IS_ERR(handle)) { - fte->action = old_action; - goto unlock_fte; - } else { - goto add_rules; - } + old_action = fte->action; + fte->action |= flow_act->action; + handle = add_rule_fte(fte, fg, dest, dest_num, + old_action != flow_act->action); + if (IS_ERR(handle)) { + fte->action = old_action; + goto unlock_fte; + } else { + trace_mlx5_fs_set_fte(fte, false); + goto add_rules; } - unlock_ref_node(&fte->node); } fs_get_obj(ft, fg->node.parent); - if (fg->num_ftes >= fg->max_ftes) { - handle = ERR_PTR(-ENOSPC); - goto unlock_fg; - } - fte = create_fte(fg, match_value, flow_act, &prev); - if (IS_ERR(fte)) { - handle = (void *)fte; - goto unlock_fg; - } + fte = create_fte(fg, match_value, flow_act); + if (IS_ERR(fte)) + return (void *)fte; tree_init_node(&fte->node, 0, del_fte); nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD); handle = add_rule_fte(fte, fg, dest, dest_num, false); if (IS_ERR(handle)) { unlock_ref_node(&fte->node); + destroy_fte(fte, fg); kfree(fte); - goto unlock_fg; + return handle; } - fg->num_ftes++; - tree_add_node(&fte->node, &fg->node); - list_add(&fte->node.list, prev); + /* fte list isn't sorted */ + list_add_tail(&fte->node.list, &fg->node.children); + trace_mlx5_fs_set_fte(fte, true); add_rules: for (i = 0; i < handle->num_rules; i++) { - if (atomic_read(&handle->rule[i]->node.refcount) == 1) + if (atomic_read(&handle->rule[i]->node.refcount) == 1) { tree_add_node(&handle->rule[i]->node, &fte->node); + trace_mlx5_fs_add_rule(handle->rule[i]); + } } unlock_fte: unlock_ref_node(&fte->node); -unlock_fg: - unlock_ref_node(&fg->node); return handle; } @@ -1347,6 +1442,96 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest, } static struct mlx5_flow_handle * +try_add_to_existing_fg(struct mlx5_flow_table *ft, + struct mlx5_flow_spec *spec, + struct mlx5_flow_act *flow_act, + struct mlx5_flow_destination *dest, + int dest_num) +{ + struct mlx5_flow_group *g; + struct mlx5_flow_handle *rule = ERR_PTR(-ENOENT); + struct rhlist_head *tmp, *list; + struct match_list { + struct list_head list; + struct mlx5_flow_group *g; + } match_list, *iter; + LIST_HEAD(match_head); + + rcu_read_lock(); + /* Collect all fgs which has a matching match_criteria */ + list = rhltable_lookup(&ft->fgs_hash, spec, rhash_fg); + rhl_for_each_entry_rcu(g, tmp, list, hash) { + struct match_list *curr_match; + + if (likely(list_empty(&match_head))) { + match_list.g = g; + list_add_tail(&match_list.list, &match_head); + continue; + } + curr_match = kmalloc(sizeof(*curr_match), GFP_ATOMIC); + + if (!curr_match) { + rcu_read_unlock(); + rule = ERR_PTR(-ENOMEM); + goto free_list; + } + curr_match->g = g; + list_add_tail(&curr_match->list, &match_head); + } + rcu_read_unlock(); + + /* Try to find a fg that already contains a matching fte */ + list_for_each_entry(iter, &match_head, list) { + struct fs_fte *fte; + + g = iter->g; + nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + fte = rhashtable_lookup_fast(&g->ftes_hash, spec->match_value, + rhash_fte); + if (fte) { + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, fte); + unlock_ref_node(&g->node); + goto free_list; + } + unlock_ref_node(&g->node); + } + + /* No group with matching fte found. Try to add a new fte to any + * matching fg. + */ + list_for_each_entry(iter, &match_head, list) { + g = iter->g; + + nested_lock_ref_node(&g->node, FS_MUTEX_PARENT); + rule = add_rule_fg(g, spec->match_value, + flow_act, dest, dest_num, NULL); + if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) { + unlock_ref_node(&g->node); + goto free_list; + } + unlock_ref_node(&g->node); + } + +free_list: + if (!list_empty(&match_head)) { + struct match_list *match_tmp; + + /* The most common case is having one FG. Since we want to + * optimize this case, we save the first on the stack. + * Therefore, no need to free it. + */ + list_del(&list_first_entry(&match_head, typeof(*iter), list)->list); + list_for_each_entry_safe(iter, match_tmp, &match_head, list) { + list_del(&iter->list); + kfree(iter); + } + } + + return rule; +} + +static struct mlx5_flow_handle * _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_spec *spec, struct mlx5_flow_act *flow_act, @@ -1358,22 +1543,18 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, struct mlx5_flow_handle *rule; int i; + if (!check_valid_spec(spec)) + return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { if (!dest_is_valid(&dest[i], flow_act->action, ft)) return ERR_PTR(-EINVAL); } nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); - fs_for_each_fg(g, ft) - if (compare_match_criteria(g->mask.match_criteria_enable, - spec->match_criteria_enable, - g->mask.match_criteria, - spec->match_criteria)) { - rule = add_rule_fg(g, spec->match_value, - flow_act, dest, dest_num); - if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC) - goto unlock; - } + rule = try_add_to_existing_fg(ft, spec, flow_act, dest, dest_num); + if (!IS_ERR(rule)) + goto unlock; g = create_autogroup(ft, spec->match_criteria_enable, spec->match_criteria); @@ -1382,7 +1563,8 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, goto unlock; } - rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num); + rule = add_rule_fg(g, spec->match_value, flow_act, dest, + dest_num, NULL); if (IS_ERR(rule)) { /* Remove assumes refcount > 0 and autogroup creates a group * with a refcount = 0. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 990acee6fb09..5509a752f98e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -34,6 +34,7 @@ #define _MLX5_FS_CORE_ #include <linux/mlx5/fs.h> +#include <linux/rhashtable.h> enum fs_node_type { FS_TYPE_NAMESPACE, @@ -118,6 +119,8 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; + struct ida fte_allocator; + struct rhltable fgs_hash; }; struct mlx5_fc_cache { @@ -136,17 +139,29 @@ struct mlx5_fc { u64 lastpackets; u64 lastbytes; - u16 id; + u32 id; bool deleted; bool aging; struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; }; +#define MLX5_FTE_MATCH_PARAM_RESERVED reserved_at_600 +/* Calculate the fte_match_param length and without the reserved length. + * Make sure the reserved field is the last. + */ +#define MLX5_ST_SZ_DW_MATCH_PARAM \ + ((MLX5_BYTE_OFF(fte_match_param, MLX5_FTE_MATCH_PARAM_RESERVED) / sizeof(u32)) + \ + BUILD_BUG_ON_ZERO(MLX5_ST_SZ_BYTES(fte_match_param) != \ + MLX5_FLD_SZ_BYTES(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED) +\ + MLX5_BYTE_OFF(fte_match_param, \ + MLX5_FTE_MATCH_PARAM_RESERVED))) + /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; - u32 val[MLX5_ST_SZ_DW(fte_match_param)]; + u32 val[MLX5_ST_SZ_DW_MATCH_PARAM]; u32 dests_size; u32 flow_tag; u32 index; @@ -155,6 +170,7 @@ struct fs_fte { u32 modify_id; enum fs_fte_status status; struct mlx5_fc *counter; + struct rhash_head hash; }; /* Type of children is mlx5_flow_table/namespace */ @@ -174,7 +190,7 @@ struct mlx5_flow_namespace { struct mlx5_flow_group_mask { u8 match_criteria_enable; - u32 match_criteria[MLX5_ST_SZ_DW(fte_match_param)]; + u32 match_criteria[MLX5_ST_SZ_DW_MATCH_PARAM]; }; /* Type of children is fs_fte */ @@ -183,8 +199,9 @@ struct mlx5_flow_group { struct mlx5_flow_group_mask mask; u32 start_index; u32 max_ftes; - u32 num_ftes; u32 id; + struct rhashtable ftes_hash; + struct rhlist_head hash; }; struct mlx5_flow_root_namespace { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 6507d8acc54d..89d1f8650033 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -38,6 +38,8 @@ #include "fs_cmd.h" #define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) +/* Max number of counters to query in bulk read is 32K */ +#define MLX5_SW_MAX_COUNTERS_BULK BIT(15) /* locking scheme: * @@ -90,16 +92,21 @@ static void mlx5_fc_stats_insert(struct rb_root *root, struct mlx5_fc *counter) rb_insert_color(&counter->node, root); } +/* The function returns the last node that was queried so the caller + * function can continue calling it till all counters are queried. + */ static struct rb_node *mlx5_fc_stats_query(struct mlx5_core_dev *dev, struct mlx5_fc *first, - u16 last_id) + u32 last_id) { struct mlx5_cmd_fc_bulk *b; struct rb_node *node = NULL; - u16 afirst_id; + u32 afirst_id; int num; int err; - int max_bulk = 1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk); + + int max_bulk = min_t(int, MLX5_SW_MAX_COUNTERS_BULK, + (1 << MLX5_CAP_GEN(dev, log_max_flow_counter_bulk))); /* first id must be aligned to 4 when using bulk query */ afirst_id = first->id & ~0x3; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c index eb04e97d8765..43c126c63955 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ethtool.c @@ -39,10 +39,11 @@ static void mlx5i_get_drvinfo(struct net_device *dev, struct mlx5e_priv *priv = mlx5i_epriv(dev); mlx5e_ethtool_get_drvinfo(priv, drvinfo); + strlcpy(drvinfo->driver, DRIVER_NAME "[ib_ipoib]", + sizeof(drvinfo->driver)); } -static void mlx5i_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) +static void mlx5i_get_strings(struct net_device *dev, u32 stringset, u8 *data) { struct mlx5e_priv *priv = mlx5i_epriv(dev); @@ -129,17 +130,123 @@ static int mlx5i_flash_device(struct net_device *netdev, return mlx5e_ethtool_flash_device(priv, flash); } +enum mlx5_ptys_width { + MLX5_PTYS_WIDTH_1X = 1 << 0, + MLX5_PTYS_WIDTH_2X = 1 << 1, + MLX5_PTYS_WIDTH_4X = 1 << 2, + MLX5_PTYS_WIDTH_8X = 1 << 3, + MLX5_PTYS_WIDTH_12X = 1 << 4, +}; + +static inline int mlx5_ptys_width_enum_to_int(enum mlx5_ptys_width width) +{ + switch (width) { + case MLX5_PTYS_WIDTH_1X: return 1; + case MLX5_PTYS_WIDTH_2X: return 2; + case MLX5_PTYS_WIDTH_4X: return 4; + case MLX5_PTYS_WIDTH_8X: return 8; + case MLX5_PTYS_WIDTH_12X: return 12; + default: return -1; + } +} + +enum mlx5_ptys_rate { + MLX5_PTYS_RATE_SDR = 1 << 0, + MLX5_PTYS_RATE_DDR = 1 << 1, + MLX5_PTYS_RATE_QDR = 1 << 2, + MLX5_PTYS_RATE_FDR10 = 1 << 3, + MLX5_PTYS_RATE_FDR = 1 << 4, + MLX5_PTYS_RATE_EDR = 1 << 5, + MLX5_PTYS_RATE_HDR = 1 << 6, +}; + +static inline int mlx5_ptys_rate_enum_to_int(enum mlx5_ptys_rate rate) +{ + switch (rate) { + case MLX5_PTYS_RATE_SDR: return 2500; + case MLX5_PTYS_RATE_DDR: return 5000; + case MLX5_PTYS_RATE_QDR: + case MLX5_PTYS_RATE_FDR10: return 10000; + case MLX5_PTYS_RATE_FDR: return 14000; + case MLX5_PTYS_RATE_EDR: return 25000; + case MLX5_PTYS_RATE_HDR: return 50000; + default: return -1; + } +} + +static int mlx5i_get_port_settings(struct net_device *netdev, + u16 *ib_link_width_oper, u16 *ib_proto_oper) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; + int ret; + + ret = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_IB, 1); + if (ret) + return ret; + + *ib_link_width_oper = MLX5_GET(ptys_reg, out, ib_link_width_oper); + *ib_proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} + +static int mlx5i_get_speed_settings(u16 ib_link_width_oper, u16 ib_proto_oper) +{ + int rate, width; + + rate = mlx5_ptys_rate_enum_to_int(ib_proto_oper); + if (rate < 0) + return -EINVAL; + width = mlx5_ptys_width_enum_to_int(ib_link_width_oper); + if (width < 0) + return -EINVAL; + + return rate * width; +} + +static int mlx5i_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + u16 ib_link_width_oper; + u16 ib_proto_oper; + int speed, ret; + + ret = mlx5i_get_port_settings(netdev, &ib_link_width_oper, &ib_proto_oper); + if (ret) + return ret; + + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); + + speed = mlx5i_get_speed_settings(ib_link_width_oper, ib_proto_oper); + if (speed < 0) + return -EINVAL; + + link_ksettings->base.duplex = DUPLEX_FULL; + link_ksettings->base.port = PORT_OTHER; + + link_ksettings->base.autoneg = AUTONEG_DISABLE; + + link_ksettings->base.speed = speed; + + return 0; +} + const struct ethtool_ops mlx5i_ethtool_ops = { - .get_drvinfo = mlx5i_get_drvinfo, - .get_strings = mlx5i_get_strings, - .get_sset_count = mlx5i_get_sset_count, - .get_ethtool_stats = mlx5i_get_ethtool_stats, - .get_ringparam = mlx5i_get_ringparam, - .set_ringparam = mlx5i_set_ringparam, - .flash_device = mlx5i_flash_device, - .get_channels = mlx5i_get_channels, - .set_channels = mlx5i_set_channels, - .get_coalesce = mlx5i_get_coalesce, - .set_coalesce = mlx5i_set_coalesce, - .get_ts_info = mlx5i_get_ts_info, + .get_drvinfo = mlx5i_get_drvinfo, + .get_strings = mlx5i_get_strings, + .get_sset_count = mlx5i_get_sset_count, + .get_ethtool_stats = mlx5i_get_ethtool_stats, + .get_ringparam = mlx5i_get_ringparam, + .set_ringparam = mlx5i_set_ringparam, + .flash_device = mlx5i_flash_device, + .get_channels = mlx5i_get_channels, + .set_channels = mlx5i_set_channels, + .get_coalesce = mlx5i_get_coalesce, + .set_coalesce = mlx5i_set_coalesce, + .get_ts_info = mlx5i_get_ts_info, + .get_link_ksettings = mlx5i_get_link_ksettings, + .get_link = ethtool_op_get_link, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c new file mode 100644 index 000000000000..7cb67122e8b5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/etherdevice.h> +#include <linux/mlx5/driver.h> +#include <linux/mlx5/mlx5_ifc.h> +#include "mlx5_core.h" +#include "lib/mpfs.h" + +/* HW L2 Table (MPFS) management */ +static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac) +{ + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0}; + u8 *in_mac_addr; + + MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0}; + + MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +/* UC L2 table hash node */ +struct l2table_node { + struct l2addr_node node; + u32 index; /* index in HW l2 table */ +}; + +struct mlx5_mpfs { + struct hlist_head hash[MLX5_L2_ADDR_HASH_SIZE]; + struct mutex lock; /* Synchronize l2 table access */ + u32 size; + unsigned long *bitmap; +}; + +static int alloc_l2table_index(struct mlx5_mpfs *l2table, u32 *ix) +{ + int err = 0; + + *ix = find_first_zero_bit(l2table->bitmap, l2table->size); + if (*ix >= l2table->size) + err = -ENOSPC; + else + __set_bit(*ix, l2table->bitmap); + + return err; +} + +static void free_l2table_index(struct mlx5_mpfs *l2table, u32 ix) +{ + __clear_bit(ix, l2table->bitmap); +} + +int mlx5_mpfs_init(struct mlx5_core_dev *dev) +{ + int l2table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); + struct mlx5_mpfs *mpfs; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + mpfs = kzalloc(sizeof(*mpfs), GFP_KERNEL); + if (!mpfs) + return -ENOMEM; + + mutex_init(&mpfs->lock); + mpfs->size = l2table_size; + mpfs->bitmap = kcalloc(BITS_TO_LONGS(l2table_size), + sizeof(uintptr_t), GFP_KERNEL); + if (!mpfs->bitmap) { + kfree(mpfs); + return -ENOMEM; + } + + dev->priv.mpfs = mpfs; + return 0; +} + +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + + if (!MLX5_VPORT_MANAGER(dev)) + return; + + WARN_ON(!hlist_empty(mpfs->hash)); + kfree(mpfs->bitmap); + kfree(mpfs); +} + +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + u32 index; + int err; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (l2addr) { + err = -EEXIST; + goto abort; + } + + err = alloc_l2table_index(mpfs, &index); + if (err) + goto abort; + + l2addr = l2addr_hash_add(mpfs->hash, mac, struct l2table_node, GFP_KERNEL); + if (!l2addr) { + free_l2table_index(mpfs, index); + err = -ENOMEM; + goto abort; + } + + l2addr->index = index; + err = set_l2table_entry_cmd(dev, index, mac); + if (err) { + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + } + + mlx5_core_dbg(dev, "MPFS mac added %pM, index (%d)\n", mac, index); +abort: + mutex_unlock(&mpfs->lock); + return err; +} + +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) +{ + struct mlx5_mpfs *mpfs = dev->priv.mpfs; + struct l2table_node *l2addr; + int err = 0; + u32 index; + + if (!MLX5_VPORT_MANAGER(dev)) + return 0; + + mutex_lock(&mpfs->lock); + + l2addr = l2addr_hash_find(mpfs->hash, mac, struct l2table_node); + if (!l2addr) { + err = -ENOENT; + goto unlock; + } + + index = l2addr->index; + del_l2table_entry_cmd(dev, index); + l2addr_hash_del(l2addr); + free_l2table_index(mpfs, index); + mlx5_core_dbg(dev, "MPFS mac deleted %pM, index (%d)\n", mac, index); +unlock: + mutex_unlock(&mpfs->lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h new file mode 100644 index 000000000000..4a7b2c3203a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2017, Mellanox Technologies, Ltd. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5_MPFS_H__ +#define __MLX5_MPFS_H__ + +#include <linux/if_ether.h> +#include <linux/mlx5/device.h> + +/* L2 -mac address based- hash helpers */ +#define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) +#define MLX5_L2_ADDR_HASH(addr) (addr[5]) + +struct l2addr_node { + struct hlist_node hlist; + u8 addr[ETH_ALEN]; +}; + +#define for_each_l2hash_node(hn, tmp, hash, i) \ + for (i = 0; i < MLX5_L2_ADDR_HASH_SIZE; i++) \ + hlist_for_each_entry_safe(hn, tmp, &(hash)[i], hlist) + +#define l2addr_hash_find(hash, mac, type) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + bool found = false; \ + type *ptr = NULL; \ + \ + hlist_for_each_entry(ptr, &(hash)[ix], node.hlist) \ + if (ether_addr_equal(ptr->node.addr, mac)) {\ + found = true; \ + break; \ + } \ + if (!found) \ + ptr = NULL; \ + ptr; \ +}) + +#define l2addr_hash_add(hash, mac, type, gfp) ({ \ + int ix = MLX5_L2_ADDR_HASH(mac); \ + type *ptr = NULL; \ + \ + ptr = kzalloc(sizeof(type), gfp); \ + if (ptr) { \ + ether_addr_copy(ptr->node.addr, mac); \ + hlist_add_head(&ptr->node.hlist, &(hash)[ix]);\ + } \ + ptr; \ +}) + +#define l2addr_hash_del(ptr) ({ \ + hlist_del(&(ptr)->node.hlist); \ + kfree(ptr); \ +}) + +#ifdef CONFIG_MLX5_MPFS +int mlx5_mpfs_init(struct mlx5_core_dev *dev); +void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev); +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac); +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac); +#else /* #ifndef CONFIG_MLX5_MPFS */ +static inline int mlx5_mpfs_init(struct mlx5_core_dev *dev) { return 0; } +static inline void mlx5_mpfs_cleanup(struct mlx5_core_dev *dev) {} +static inline int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +static inline int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u8 *mac) { return 0; } +#endif +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8c4b45ef539c..0d2c8dcd6eae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -54,9 +54,8 @@ #include <net/devlink.h> #include "mlx5_core.h" #include "fs_core.h" -#ifdef CONFIG_MLX5_CORE_EN +#include "lib/mpfs.h" #include "eswitch.h" -#endif #include "lib/mlx5.h" #include "fpga/core.h" #include "accel/ipsec.h" @@ -788,7 +787,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -EOPNOTSUPP; } - static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { struct pci_dev *pdev = dev->pdev; @@ -897,13 +895,17 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_tables_cleanup; } -#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_mpfs_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init l2 table %d\n", err); + goto err_rl_cleanup; + } + err = mlx5_eswitch_init(dev); if (err) { dev_err(&pdev->dev, "Failed to init eswitch %d\n", err); - goto err_rl_cleanup; + goto err_mpfs_cleanup; } -#endif err = mlx5_sriov_init(dev); if (err) { @@ -922,13 +924,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) err_sriov_cleanup: mlx5_sriov_cleanup(dev); err_eswitch_cleanup: -#ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); - +err_mpfs_cleanup: + mlx5_mpfs_cleanup(dev); err_rl_cleanup: -#endif mlx5_cleanup_rl_table(dev); - err_tables_cleanup: mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); @@ -946,9 +946,8 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) { mlx5_fpga_cleanup(dev); mlx5_sriov_cleanup(dev); -#ifdef CONFIG_MLX5_CORE_EN mlx5_eswitch_cleanup(dev->priv.eswitch); -#endif + mlx5_mpfs_cleanup(dev); mlx5_cleanup_rl_table(dev); mlx5_cleanup_reserved_gids(dev); mlx5_cleanup_mkey_table(dev); @@ -1106,10 +1105,6 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_fs; } -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_attach(dev->priv.eswitch); -#endif - err = mlx5_sriov_attach(dev); if (err) { dev_err(&pdev->dev, "sriov init failed %d\n", err); @@ -1152,9 +1147,6 @@ err_fpga_start: mlx5_sriov_detach(dev); err_sriov: -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_detach(dev->priv.eswitch); -#endif mlx5_cleanup_fs(dev); err_fs: @@ -1225,9 +1217,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_fpga_device_stop(dev); mlx5_sriov_detach(dev); -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_detach(dev->priv.eswitch); -#endif mlx5_cleanup_fs(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); @@ -1258,7 +1247,7 @@ struct mlx5_core_event_handler { }; static const struct devlink_ops mlx5_devlink_ops = { -#ifdef CONFIG_MLX5_CORE_EN +#ifdef CONFIG_MLX5_ESWITCH .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, @@ -1298,6 +1287,9 @@ static int init_one(struct pci_dev *pdev, mutex_init(&dev->pci_status_mutex); mutex_init(&dev->intf_state_mutex); + INIT_LIST_HEAD(&priv->waiting_events_list); + priv->is_accum_events = false; + #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING err = init_srcu_struct(&priv->pfault_srcu); if (err) { @@ -1352,7 +1344,6 @@ clean_srcu: cleanup_srcu_struct(&priv->pfault_srcu); clean_dev: #endif - pci_set_drvdata(pdev, NULL); devlink_free(devlink); return err; @@ -1379,7 +1370,6 @@ static void remove_one(struct pci_dev *pdev) #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING cleanup_srcu_struct(&priv->pfault_srcu); #endif - pci_set_drvdata(pdev, NULL); devlink_free(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 01d637dac533..b7c2900b75f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -43,6 +43,10 @@ #define DRIVER_VERSION "5.0-0" #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev)) +#define MLX5_VPORT_MANAGER(mdev) \ + (MLX5_CAP_GEN(mdev, vport_group_manager) && \ + (MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && \ + mlx5_core_is_pf(mdev)) extern uint mlx5_core_debug_mask; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index 28d8472b36f1..6c48e9959b65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -34,9 +34,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/vport.h> #include "mlx5_core.h" -#ifdef CONFIG_MLX5_CORE_EN #include "eswitch.h" -#endif bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) { @@ -90,14 +88,12 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) return -EBUSY; } -#ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); if (err) { mlx5_core_warn(dev, "failed to enable eswitch SRIOV (%d)\n", err); return err; } -#endif for (vf = 0; vf < num_vfs; vf++) { err = mlx5_core_enable_hca(dev, vf + 1); @@ -117,7 +113,6 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) } } mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); - } return 0; @@ -130,11 +125,7 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) int vf; if (!sriov->enabled_vfs) -#ifdef CONFIG_MLX5_CORE_EN - goto disable_sriov_resources; -#else - return; -#endif + goto out; for (vf = 0; vf < sriov->num_vfs; vf++) { if (!sriov->vfs_ctx[vf].enabled) @@ -148,10 +139,8 @@ static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) sriov->enabled_vfs--; } -#ifdef CONFIG_MLX5_CORE_EN -disable_sriov_resources: +out: mlx5_eswitch_disable_sriov(dev->priv.eswitch); -#endif if (mlx5_wait_for_vf_pages(dev)) mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 695adff89d71..d56eea310509 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -75,6 +75,7 @@ config MLXSW_SPECTRUM depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q depends on PSAMPLE || PSAMPLE=n depends on BRIDGE || BRIDGE=n + depends on IPV6 || IPV6=n select PARMAN select MLXFW default m diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 62fc42f396bb..891ff418bb5e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -16,8 +16,9 @@ mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o spectrum_router.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ spectrum_acl.o spectrum_flower.o \ - spectrum_cnt.o spectrum_dpipe.o \ - spectrum_fid.o + spectrum_cnt.o spectrum_fid.o \ + spectrum_ipip.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o +mlxsw_spectrum-$(CONFIG_NET_DEVLINK) += spectrum_dpipe.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o mlxsw_minimal-objs := minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index affe84eb4bff..9d5e7cf288be 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -667,7 +667,7 @@ static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, int err; dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", - trans->tid, reg->id, mlxsw_reg_id_str(reg->id), + tid, reg->id, mlxsw_reg_id_str(reg->id), mlxsw_core_reg_access_type_str(type)); skb = mlxsw_emad_alloc(mlxsw_core, reg->len); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index 9807ef814e42..f6963b0b4a55 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -57,6 +57,9 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, MLXSW_AFK_ELEMENT_MAX, }; @@ -104,6 +107,9 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), MLXSW_AFK_ELEMENT_INFO_U32(TCP_FLAGS, 0x10, 23, 9), + MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x14, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x14, 9, 2), + MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x14, 11, 6), MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32), MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8), diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1bd34d9a7b9e..cc27c5de5a1d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5,6 +5,7 @@ * Copyright (c) 2015 Elad Raz <eladr@mellanox.com> * Copyright (c) 2015-2017 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -3679,15 +3680,17 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP, MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP, MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4, + MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP, MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF, MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP, - MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS, + MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS, MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP, MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE, MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME, MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP, MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD, + MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND, }; /* reg_htgt_trap_group @@ -3952,10 +3955,12 @@ MLXSW_ITEM32(reg, rgcr, pcp_rw, 0x18, 16, 2); */ MLXSW_ITEM32(reg, rgcr, activity_dis, 0x20, 0, 8); -static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en) +static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en, + bool ipv6_en) { MLXSW_REG_ZERO(rgcr, payload); mlxsw_reg_rgcr_ipv4_en_set(payload, ipv4_en); + mlxsw_reg_rgcr_ipv6_en_set(payload, ipv6_en); } /* RITR - Router Interface Table Register @@ -3988,16 +3993,18 @@ MLXSW_ITEM32(reg, ritr, ipv4, 0x00, 29, 1); MLXSW_ITEM32(reg, ritr, ipv6, 0x00, 28, 1); enum mlxsw_reg_ritr_if_type { + /* VLAN interface. */ MLXSW_REG_RITR_VLAN_IF, + /* FID interface. */ MLXSW_REG_RITR_FID_IF, + /* Sub-port interface. */ MLXSW_REG_RITR_SP_IF, + /* Loopback Interface. */ + MLXSW_REG_RITR_LOOPBACK_IF, }; /* reg_ritr_type - * Router interface type. - * 0 - VLAN interface. - * 1 - FID interface. - * 2 - Sub-port interface. + * Router interface type as per enum mlxsw_reg_ritr_if_type. * Access: RW */ MLXSW_ITEM32(reg, ritr, type, 0x00, 23, 3); @@ -4125,6 +4132,67 @@ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); */ MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12); +/* Loopback Interface */ + +enum mlxsw_reg_ritr_loopback_protocol { + /* IPinIP IPv4 underlay Unicast */ + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4, + /* IPinIP IPv6 underlay Unicast */ + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV6, +}; + +/* reg_ritr_loopback_protocol + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_protocol, 0x08, 28, 4); + +enum mlxsw_reg_ritr_loopback_ipip_type { + /* Tunnel is IPinIP. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_IP, + /* Tunnel is GRE, no key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP, + /* Tunnel is GRE, with a key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP, +}; + +/* reg_ritr_loopback_ipip_type + * Encapsulation type. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_type, 0x10, 24, 4); + +enum mlxsw_reg_ritr_loopback_ipip_options { + /* The key is defined by gre_key. */ + MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, +}; + +/* reg_ritr_loopback_ipip_options + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_options, 0x10, 20, 4); + +/* reg_ritr_loopback_ipip_uvr + * Underlay Virtual Router ID. + * Range is 0..cap_max_virtual_routers-1. + * Reserved for Spectrum-2. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_uvr, 0x10, 0, 16); + +/* reg_ritr_loopback_ipip_usip* + * Encapsulation Underlay source IP. + * Access: RW + */ +MLXSW_ITEM_BUF(reg, ritr, loopback_ipip_usip6, 0x18, 16); +MLXSW_ITEM32(reg, ritr, loopback_ipip_usip4, 0x24, 0, 32); + +/* reg_ritr_loopback_ipip_gre_key + * GRE Key. + * Reserved when ipip_type is not IP_IN_GRE_KEY_IN_IP. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, loopback_ipip_gre_key, 0x28, 0, 32); + /* Shared between ingress/egress */ enum mlxsw_reg_ritr_counter_set_type { /* No Count. */ @@ -4195,24 +4263,54 @@ static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag, static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, enum mlxsw_reg_ritr_if_type type, - u16 rif, u16 vr_id, u16 mtu, - const char *mac) + u16 rif, u16 vr_id, u16 mtu) { bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL; MLXSW_REG_ZERO(ritr, payload); mlxsw_reg_ritr_enable_set(payload, enable); mlxsw_reg_ritr_ipv4_set(payload, 1); + mlxsw_reg_ritr_ipv6_set(payload, 1); mlxsw_reg_ritr_type_set(payload, type); mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_ipv6_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); +} + +static inline void mlxsw_reg_ritr_mac_pack(char *payload, const char *mac) +{ mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); } +static inline void +mlxsw_reg_ritr_loopback_ipip_common_pack(char *payload, + enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, + enum mlxsw_reg_ritr_loopback_ipip_options options, + u16 uvr_id, u32 gre_key) +{ + mlxsw_reg_ritr_loopback_ipip_type_set(payload, ipip_type); + mlxsw_reg_ritr_loopback_ipip_options_set(payload, options); + mlxsw_reg_ritr_loopback_ipip_uvr_set(payload, uvr_id); + mlxsw_reg_ritr_loopback_ipip_gre_key_set(payload, gre_key); +} + +static inline void +mlxsw_reg_ritr_loopback_ipip4_pack(char *payload, + enum mlxsw_reg_ritr_loopback_ipip_type ipip_type, + enum mlxsw_reg_ritr_loopback_ipip_options options, + u16 uvr_id, u32 usip, u32 gre_key) +{ + mlxsw_reg_ritr_loopback_protocol_set(payload, + MLXSW_REG_RITR_LOOPBACK_PROTOCOL_IPIP_IPV4); + mlxsw_reg_ritr_loopback_ipip_common_pack(payload, ipip_type, options, + uvr_id, gre_key); + mlxsw_reg_ritr_loopback_ipip_usip4_set(payload, usip); +} + /* RATR - Router Adjacency Table Register * -------------------------------------- * The RATR register is used to configure the Router Adjacency (next-hop) @@ -4268,6 +4366,38 @@ MLXSW_ITEM32(reg, ratr, v, 0x00, 24, 1); */ MLXSW_ITEM32(reg, ratr, a, 0x00, 16, 1); +enum mlxsw_reg_ratr_type { + /* Ethernet */ + MLXSW_REG_RATR_TYPE_ETHERNET, + /* IPoIB Unicast without GRH. + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_UC, + /* IPoIB Unicast with GRH. Supported only in table 0 (Ethernet unicast + * adjacency). + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_UC_W_GRH, + /* IPoIB Multicast. + * Reserved for Spectrum. + */ + MLXSW_REG_RATR_TYPE_IPOIB_MC, + /* MPLS. + * Reserved for SwitchX/-2. + */ + MLXSW_REG_RATR_TYPE_MPLS, + /* IPinIP Encap. + * Reserved for SwitchX/-2. + */ + MLXSW_REG_RATR_TYPE_IPIP, +}; + +/* reg_ratr_type + * Adjacency entry type. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, type, 0x04, 28, 4); + /* reg_ratr_adjacency_index_low * Bits 15:0 of index into the adjacency table. * For SwitchX and SwitchX-2, the adjacency table is linear and @@ -4297,17 +4427,17 @@ enum mlxsw_reg_ratr_trap_action { */ MLXSW_ITEM32(reg, ratr, trap_action, 0x0C, 28, 4); -enum mlxsw_reg_ratr_trap_id { - MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0 = 0, - MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1 = 1, -}; - /* reg_ratr_adjacency_index_high * Bits 23:16 of the adjacency_index. * Access: Index */ MLXSW_ITEM32(reg, ratr, adjacency_index_high, 0x0C, 16, 8); +enum mlxsw_reg_ratr_trap_id { + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS0, + MLXSW_REG_RATR_TRAP_ID_RTR_EGRESS1, +}; + /* reg_ratr_trap_id * Trap ID to be reported to CPU. * Trap-ID is RTR_EGRESS0 or RTR_EGRESS1. @@ -4322,14 +4452,44 @@ MLXSW_ITEM32(reg, ratr, trap_id, 0x0C, 0, 8); */ MLXSW_ITEM_BUF(reg, ratr, eth_destination_mac, 0x12, 6); +enum mlxsw_reg_ratr_ipip_type { + /* IPv4, address set by mlxsw_reg_ratr_ipip_ipv4_udip. */ + MLXSW_REG_RATR_IPIP_TYPE_IPV4, + /* IPv6, address set by mlxsw_reg_ratr_ipip_ipv6_ptr. */ + MLXSW_REG_RATR_IPIP_TYPE_IPV6, +}; + +/* reg_ratr_ipip_type + * Underlay destination ip type. + * Note: the type field must match the protocol of the router interface. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_type, 0x10, 16, 4); + +/* reg_ratr_ipip_ipv4_udip + * Underlay ipv4 dip. + * Reserved when ipip_type is IPv6. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_ipv4_udip, 0x18, 0, 32); + +/* reg_ratr_ipip_ipv6_ptr + * Pointer to IPv6 underlay destination ip address. + * For Spectrum: Pointer to KVD linear space. + * Access: RW + */ +MLXSW_ITEM32(reg, ratr, ipip_ipv6_ptr, 0x1C, 0, 24); + static inline void mlxsw_reg_ratr_pack(char *payload, enum mlxsw_reg_ratr_op op, bool valid, + enum mlxsw_reg_ratr_type type, u32 adjacency_index, u16 egress_rif) { MLXSW_REG_ZERO(ratr, payload); mlxsw_reg_ratr_op_set(payload, op); mlxsw_reg_ratr_v_set(payload, valid); + mlxsw_reg_ratr_type_set(payload, type); mlxsw_reg_ratr_adjacency_index_low_set(payload, adjacency_index); mlxsw_reg_ratr_adjacency_index_high_set(payload, adjacency_index >> 16); mlxsw_reg_ratr_egress_router_interface_set(payload, egress_rif); @@ -4341,6 +4501,12 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); } +static inline void mlxsw_reg_ratr_ipip4_entry_pack(char *payload, u32 ipv4_udip) +{ + mlxsw_reg_ratr_ipip_type_set(payload, MLXSW_REG_RATR_IPIP_TYPE_IPV4); + mlxsw_reg_ratr_ipip_ipv4_udip_set(payload, ipv4_udip); +} + /* RICNT - Router Interface Counter Register * ----------------------------------------- * The RICNT register retrieves per port performance counters @@ -4712,12 +4878,13 @@ MLXSW_ITEM32(reg, ralue, prefix_len, 0x08, 0, 8); /* reg_ralue_dip* * The prefix of the route or of the marker that the object of the LPM * is compared with. The most significant bits of the dip are the prefix. - * The list significant bits must be '0' if the prefix_len is smaller + * The least significant bits must be '0' if the prefix_len is smaller * than 128 for IPv6 or smaller than 32 for IPv4. * IPv4 address uses bits dip[31:0] and bits dip[127:32] are reserved. * Access: Index */ MLXSW_ITEM32(reg, ralue, dip4, 0x18, 0, 32); +MLXSW_ITEM_BUF(reg, ralue, dip6, 0x0C, 16); enum mlxsw_reg_ralue_entry_type { MLXSW_REG_RALUE_ENTRY_TYPE_MARKER_ENTRY = 1, @@ -4806,7 +4973,7 @@ MLXSW_ITEM32(reg, ralue, ecmp_size, 0x28, 0, 13); */ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); -/* reg_ralue_v +/* reg_ralue_ip2me_v * Valid bit for the tunnel_ptr field. * If valid = 0 then trap to CPU as IP2ME trap ID. * If valid = 1 and the packet format allows NVE or IPinIP tunnel @@ -4816,15 +4983,15 @@ MLXSW_ITEM32(reg, ralue, local_erif, 0x24, 0, 16); * Only relevant in case of IP2ME action. * Access: RW */ -MLXSW_ITEM32(reg, ralue, v, 0x24, 31, 1); +MLXSW_ITEM32(reg, ralue, ip2me_v, 0x24, 31, 1); -/* reg_ralue_tunnel_ptr +/* reg_ralue_ip2me_tunnel_ptr * Tunnel Pointer for NVE or IPinIP tunnel decapsulation. * For Spectrum, pointer to KVD Linear. * Only relevant in case of IP2ME action. * Access: RW */ -MLXSW_ITEM32(reg, ralue, tunnel_ptr, 0x24, 0, 24); +MLXSW_ITEM32(reg, ralue, ip2me_tunnel_ptr, 0x24, 0, 24); static inline void mlxsw_reg_ralue_pack(char *payload, enum mlxsw_reg_ralxx_protocol protocol, @@ -4851,6 +5018,16 @@ static inline void mlxsw_reg_ralue_pack4(char *payload, mlxsw_reg_ralue_dip4_set(payload, dip); } +static inline void mlxsw_reg_ralue_pack6(char *payload, + enum mlxsw_reg_ralxx_protocol protocol, + enum mlxsw_reg_ralue_op op, + u16 virtual_router, u8 prefix_len, + const void *dip) +{ + mlxsw_reg_ralue_pack(payload, protocol, op, virtual_router, prefix_len); + mlxsw_reg_ralue_dip6_memcpy_to(payload, dip); +} + static inline void mlxsw_reg_ralue_act_remote_pack(char *payload, enum mlxsw_reg_ralue_trap_action trap_action, @@ -4883,6 +5060,15 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload) MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); } +static inline void +mlxsw_reg_ralue_act_ip2me_tun_pack(char *payload, u32 tunnel_ptr) +{ + mlxsw_reg_ralue_action_type_set(payload, + MLXSW_REG_RALUE_ACTION_TYPE_IP2ME); + mlxsw_reg_ralue_ip2me_v_set(payload, 1); + mlxsw_reg_ralue_ip2me_tunnel_ptr_set(payload, tunnel_ptr); +} + /* RAUHT - Router Algorithmic LPM Unicast Host Table Register * ---------------------------------------------------------- * The RAUHT register is used to configure and query the Unicast Host table in @@ -4954,6 +5140,7 @@ MLXSW_ITEM32(reg, rauht, rif, 0x00, 0, 16); * Access: Index */ MLXSW_ITEM32(reg, rauht, dip4, 0x1C, 0x0, 32); +MLXSW_ITEM_BUF(reg, rauht, dip6, 0x10, 16); enum mlxsw_reg_rauht_trap_action { MLXSW_REG_RAUHT_TRAP_ACTION_NOP, @@ -4982,6 +5169,15 @@ enum mlxsw_reg_rauht_trap_id { */ MLXSW_ITEM32(reg, rauht, trap_id, 0x60, 0, 9); +enum mlxsw_reg_flow_counter_set_type { + /* No count */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + /* reg_rauht_counter_set_type * Counter set type for flow counters * Access: RW @@ -5018,6 +5214,23 @@ static inline void mlxsw_reg_rauht_pack4(char *payload, mlxsw_reg_rauht_dip4_set(payload, dip); } +static inline void mlxsw_reg_rauht_pack6(char *payload, + enum mlxsw_reg_rauht_op op, u16 rif, + const char *mac, const char *dip) +{ + mlxsw_reg_rauht_pack(payload, op, rif, mac); + mlxsw_reg_rauht_type_set(payload, MLXSW_REG_RAUHT_TYPE_IPV6); + mlxsw_reg_rauht_dip6_memcpy_to(payload, dip); +} + +static inline void mlxsw_reg_rauht_pack_counter(char *payload, + u64 counter_index) +{ + mlxsw_reg_rauht_counter_index_set(payload, counter_index); + mlxsw_reg_rauht_counter_set_type_set(payload, + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); +} + /* RALEU - Router Algorithmic LPM ECMP Update Register * --------------------------------------------------- * The register enables updating the ECMP section in the action for multiple @@ -5216,6 +5429,30 @@ MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv4_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, 0, 32, MLXSW_REG_RAUHTD_IPV4_ENT_LEN, 0x04, false); +#define MLXSW_REG_RAUHTD_IPV6_ENT_LEN 0x20 + +/* reg_rauhtd_ipv6_ent_a + * Activity. Set for new entries. Set if a packet lookup has hit on the + * specific entry. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_a, MLXSW_REG_RAUHTD_BASE_LEN, 16, 1, + MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_rif + * Router interface. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, rauhtd, ipv6_ent_rif, MLXSW_REG_RAUHTD_BASE_LEN, 0, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x00, false); + +/* reg_rauhtd_ipv6_ent_dip + * Destination IPv6 address. + * Access: RO + */ +MLXSW_ITEM_BUF_INDEXED(reg, rauhtd, ipv6_ent_dip, MLXSW_REG_RAUHTD_BASE_LEN, + 16, MLXSW_REG_RAUHTD_IPV6_ENT_LEN, 0x10); + static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, int ent_index, u16 *p_rif, u32 *p_dip) @@ -5224,6 +5461,141 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload, *p_dip = mlxsw_reg_rauhtd_ipv4_ent_dip_get(payload, ent_index); } +static inline void mlxsw_reg_rauhtd_ent_ipv6_unpack(char *payload, + int rec_index, u16 *p_rif, + char *p_dip) +{ + *p_rif = mlxsw_reg_rauhtd_ipv6_ent_rif_get(payload, rec_index); + mlxsw_reg_rauhtd_ipv6_ent_dip_memcpy_from(payload, rec_index, p_dip); +} + +/* RTDP - Routing Tunnel Decap Properties Register + * ----------------------------------------------- + * The RTDP register is used for configuring the tunnel decap properties of NVE + * and IPinIP. + */ +#define MLXSW_REG_RTDP_ID 0x8020 +#define MLXSW_REG_RTDP_LEN 0x44 + +MLXSW_REG_DEFINE(rtdp, MLXSW_REG_RTDP_ID, MLXSW_REG_RTDP_LEN); + +enum mlxsw_reg_rtdp_type { + MLXSW_REG_RTDP_TYPE_NVE, + MLXSW_REG_RTDP_TYPE_IPIP, +}; + +/* reg_rtdp_type + * Type of the RTDP entry as per enum mlxsw_reg_rtdp_type. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, type, 0x00, 28, 4); + +/* reg_rtdp_tunnel_index + * Index to the Decap entry. + * For Spectrum, Index to KVD Linear. + * Access: Index + */ +MLXSW_ITEM32(reg, rtdp, tunnel_index, 0x00, 0, 24); + +/* IPinIP */ + +/* reg_rtdp_ipip_irif + * Ingress Router Interface for the overlay router + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_irif, 0x04, 16, 16); + +enum mlxsw_reg_rtdp_ipip_sip_check { + /* No sip checks. */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_NO, + /* Filter packet if underlay is not IPv4 or if underlay SIP does not + * equal ipv4_usip. + */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4, + /* Filter packet if underlay is not IPv6 or if underlay SIP does not + * equal ipv6_usip. + */ + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6 = 3, +}; + +/* reg_rtdp_ipip_sip_check + * SIP check to perform. If decapsulation failed due to these configurations + * then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_sip_check, 0x04, 0, 3); + +/* If set, allow decapsulation of IPinIP (without GRE). */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_IPIP BIT(0) +/* If set, allow decapsulation of IPinGREinIP without a key. */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE BIT(1) +/* If set, allow decapsulation of IPinGREinIP with a key. */ +#define MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY BIT(2) + +/* reg_rtdp_ipip_type_check + * Flags as per MLXSW_REG_RTDP_IPIP_TYPE_CHECK_*. If decapsulation failed due to + * these configurations then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_type_check, 0x08, 24, 3); + +/* reg_rtdp_ipip_gre_key_check + * Whether GRE key should be checked. When check is enabled: + * - A packet received as IPinIP (without GRE) will always pass. + * - A packet received as IPinGREinIP without a key will not pass the check. + * - A packet received as IPinGREinIP with a key will pass the check only if the + * key in the packet is equal to expected_gre_key. + * If decapsulation failed due to GRE key then trap_id is IPIP_DECAP_ERROR. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_gre_key_check, 0x08, 23, 1); + +/* reg_rtdp_ipip_ipv4_usip + * Underlay IPv4 address for ipv4 source address check. + * Reserved when sip_check is not '1'. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_ipv4_usip, 0x0C, 0, 32); + +/* reg_rtdp_ipip_ipv6_usip_ptr + * This field is valid when sip_check is "sipv6 check explicitly". This is a + * pointer to the IPv6 DIP which is configured by RIPS. For Spectrum, the index + * is to the KVD linear. + * Reserved when sip_check is not MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV6. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_ipv6_usip_ptr, 0x10, 0, 24); + +/* reg_rtdp_ipip_expected_gre_key + * GRE key for checking. + * Reserved when gre_key_check is '0'. + * Access: RW + */ +MLXSW_ITEM32(reg, rtdp, ipip_expected_gre_key, 0x14, 0, 32); + +static inline void mlxsw_reg_rtdp_pack(char *payload, + enum mlxsw_reg_rtdp_type type, + u32 tunnel_index) +{ + MLXSW_REG_ZERO(rtdp, payload); + mlxsw_reg_rtdp_type_set(payload, type); + mlxsw_reg_rtdp_tunnel_index_set(payload, tunnel_index); +} + +static inline void +mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif, + enum mlxsw_reg_rtdp_ipip_sip_check sip_check, + unsigned int type_check, bool gre_key_check, + u32 ipv4_usip, u32 expected_gre_key) +{ + mlxsw_reg_rtdp_ipip_irif_set(payload, irif); + mlxsw_reg_rtdp_ipip_sip_check_set(payload, sip_check); + mlxsw_reg_rtdp_ipip_type_check_set(payload, type_check); + mlxsw_reg_rtdp_ipip_gre_key_check_set(payload, gre_key_check); + mlxsw_reg_rtdp_ipip_ipv4_usip_set(payload, ipv4_usip); + mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key); +} + /* MFCR - Management Fan Control Register * -------------------------------------- * This register controls the settings of the Fan Speed PWM mechanism. @@ -5982,15 +6354,6 @@ static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e, MLXSW_REG_DEFINE(mgpc, MLXSW_REG_MGPC_ID, MLXSW_REG_MGPC_LEN); -enum mlxsw_reg_mgpc_counter_set_type { - /* No count */ - MLXSW_REG_MGPC_COUNTER_SET_TYPE_NO_COUT = 0x00, - /* Count packets and bytes */ - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, - /* Count only packets */ - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS = 0x05, -}; - /* reg_mgpc_counter_set_type * Counter set type. * Access: OP @@ -6030,7 +6393,7 @@ MLXSW_ITEM64(reg, mgpc, packet_counter, 0x10, 0, 64); static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, enum mlxsw_reg_mgpc_opcode opcode, - enum mlxsw_reg_mgpc_counter_set_type set_type) + enum mlxsw_reg_flow_counter_set_type set_type) { MLXSW_REG_ZERO(mgpc, payload); mlxsw_reg_mgpc_counter_index_set(payload, counter_index); @@ -6494,6 +6857,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rgcr), MLXSW_REG(ritr), MLXSW_REG(ratr), + MLXSW_REG(rtdp), MLXSW_REG(ricnt), MLXSW_REG(ralta), MLXSW_REG(ralst), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c6a3e61b53bd..ed7cd6c48019 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -58,6 +58,7 @@ #include <net/tc_act/tc_mirred.h> #include <net/netevent.h> #include <net/tc_act/tc_sample.h> +#include <net/addrconf.h> #include "spectrum.h" #include "pci.h" @@ -381,12 +382,14 @@ int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP, - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); if (err) return err; - *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl); - *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl); + if (packets) + *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl); + if (bytes) + *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl); return 0; } @@ -396,7 +399,7 @@ static int mlxsw_sp_flow_counter_clear(struct mlxsw_sp *mlxsw_sp, char mgpc_pl[MLXSW_REG_MGPC_LEN]; mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR, - MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + MLXSW_REG_FLOW_COUNTER_SET_TYPE_PACKETS_BYTES); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); } @@ -1616,16 +1619,16 @@ mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port) } static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, - __be16 protocol, - struct tc_cls_matchall_offload *cls, + struct tc_cls_matchall_offload *f, bool ingress) { struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; + __be16 protocol = f->common.protocol; const struct tc_action *a; LIST_HEAD(actions); int err; - if (!tc_single_action(cls->exts)) { + if (!tcf_exts_has_one_action(f->exts)) { netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n"); return -EOPNOTSUPP; } @@ -1633,9 +1636,9 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL); if (!mall_tc_entry) return -ENOMEM; - mall_tc_entry->cookie = cls->cookie; + mall_tc_entry->cookie = f->cookie; - tcf_exts_to_list(cls->exts, &actions); + tcf_exts_to_list(f->exts, &actions); a = list_first_entry(&actions, struct tc_action, list); if (is_tcf_mirred_egress_mirror(a) && protocol == htons(ETH_P_ALL)) { @@ -1647,7 +1650,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, mirror, a, ingress); } else if (is_tcf_sample(a) && protocol == htons(ETH_P_ALL)) { mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE; - err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, cls, + err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, f, a, ingress); } else { err = -EOPNOTSUPP; @@ -1665,12 +1668,12 @@ err_add_action: } static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, - struct tc_cls_matchall_offload *cls) + struct tc_cls_matchall_offload *f) { struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry; mall_tc_entry = mlxsw_sp_port_mall_tc_entry_find(mlxsw_sp_port, - cls->cookie); + f->cookie); if (!mall_tc_entry) { netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n"); return; @@ -1692,49 +1695,72 @@ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, kfree(mall_tc_entry); } -static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 proto, - struct tc_to_netdev *tc) +static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_matchall_offload *f) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); + bool ingress; - if (chain_index) + if (is_classid_clsact_ingress(f->common.classid)) + ingress = true; + else if (is_classid_clsact_egress(f->common.classid)) + ingress = false; + else return -EOPNOTSUPP; - switch (tc->type) { - case TC_SETUP_MATCHALL: - switch (tc->cls_mall->command) { - case TC_CLSMATCHALL_REPLACE: - return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, - proto, - tc->cls_mall, - ingress); - case TC_CLSMATCHALL_DESTROY: - mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, - tc->cls_mall); - return 0; - default: - return -EOPNOTSUPP; - } - case TC_SETUP_CLSFLOWER: - switch (tc->cls_flower->command) { - case TC_CLSFLOWER_REPLACE: - return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, - proto, tc->cls_flower); - case TC_CLSFLOWER_DESTROY: - mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, - tc->cls_flower); - return 0; - case TC_CLSFLOWER_STATS: - return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, - tc->cls_flower); - default: - return -EOPNOTSUPP; - } + if (f->common.chain_index) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_CLSMATCHALL_REPLACE: + return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f, + ingress); + case TC_CLSMATCHALL_DESTROY: + mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, f); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_cls_flower_offload *f) +{ + bool ingress; + + if (is_classid_clsact_ingress(f->common.classid)) + ingress = true; + else if (is_classid_clsact_egress(f->common.classid)) + ingress = false; + else + return -EOPNOTSUPP; + + switch (f->command) { + case TC_CLSFLOWER_REPLACE: + return mlxsw_sp_flower_replace(mlxsw_sp_port, ingress, f); + case TC_CLSFLOWER_DESTROY: + mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, f); + return 0; + case TC_CLSFLOWER_STATS: + return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, f); + default: + return -EOPNOTSUPP; } +} - return -EOPNOTSUPP; +static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + switch (type) { + case TC_SETUP_CLSMATCHALL: + return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data); + case TC_SETUP_CLSFLOWER: + return mlxsw_sp_setup_tc_cls_flower(mlxsw_sp_port, type_data); + default: + return -EOPNOTSUPP; + } } static const struct net_device_ops mlxsw_sp_port_netdev_ops = { @@ -3333,15 +3359,48 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD, + false), + MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD, + false), /* L3 traps */ - MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false), - MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), - MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), - MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false), - MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false), + MLXSW_SP_RXL_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false), + MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false), + MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false), + MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false), + MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND, + false), + MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false), + MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, + false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(HOST_MISS_IPV6, TRAP_TO_CPU, HOST_MISS, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false), /* PKT Sample trap */ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU, false, SP_IP2ME, DISCARD), @@ -3376,15 +3435,17 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) burst_size = 7; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: rate = 16 * 1024; burst_size = 10; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: rate = 1024; burst_size = 7; break; @@ -3433,21 +3494,23 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) priority = 5; tc = 5; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP: priority = 4; tc = 4; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD: priority = 3; tc = 3; break; case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND: priority = 2; tc = 2; break; - case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_HOST_MISS: case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP: case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE: priority = 1; @@ -3694,7 +3757,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) mlxsw_sp_fids_fini(mlxsw_sp); } -static struct mlxsw_config_profile mlxsw_sp_config_profile = { +static const struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_mid = 1, @@ -4363,6 +4426,10 @@ static struct notifier_block mlxsw_sp_inetaddr_nb __read_mostly = { .priority = 10, /* Must be called before FIB notifier block */ }; +static struct notifier_block mlxsw_sp_inet6addr_nb __read_mostly = { + .notifier_call = mlxsw_sp_inet6addr_event, +}; + static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = { .notifier_call = mlxsw_sp_router_netevent_event, }; @@ -4383,6 +4450,7 @@ static int __init mlxsw_sp_module_init(void) register_netdevice_notifier(&mlxsw_sp_netdevice_nb); register_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); + register_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); register_netevent_notifier(&mlxsw_sp_router_netevent_nb); err = mlxsw_core_driver_register(&mlxsw_sp_driver); @@ -4399,6 +4467,7 @@ err_pci_driver_register: mlxsw_core_driver_unregister(&mlxsw_sp_driver); err_core_driver_register: unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); return err; @@ -4409,6 +4478,7 @@ static void __exit mlxsw_sp_module_exit(void) mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver); mlxsw_core_driver_unregister(&mlxsw_sp_driver); unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb); + unregister_inet6addr_notifier(&mlxsw_sp_inet6addr_nb); unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb); unregister_netdevice_notifier(&mlxsw_sp_netdevice_nb); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 5ef98d4d0ab6..84ce83acdc19 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -77,6 +77,7 @@ enum mlxsw_sp_rif_type { MLXSW_SP_RIF_TYPE_SUBPORT, MLXSW_SP_RIF_TYPE_VLAN, MLXSW_SP_RIF_TYPE_FID, + MLXSW_SP_RIF_TYPE_IPIP_LB, /* IP-in-IP loopback. */ MLXSW_SP_RIF_TYPE_MAX, }; @@ -384,6 +385,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); int mlxsw_sp_inetaddr_event(struct notifier_block *unused, unsigned long event, void *ptr); +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr); int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, struct netdev_notifier_changeupper_info *info); void @@ -415,6 +418,7 @@ struct mlxsw_sp_acl_profile_ops { int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, struct net_device *dev, bool ingress); void (*ruleset_unbind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv); + u16 (*ruleset_group_id)(void *ruleset_priv); size_t rule_priv_size; int (*rule_add)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -438,11 +442,16 @@ struct mlxsw_sp_acl_ruleset; /* spectrum_acl.c */ struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); struct mlxsw_sp_acl_ruleset * -mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, bool ingress, +mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, + enum mlxsw_sp_acl_profile profile); +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, enum mlxsw_sp_acl_profile profile); void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset); +u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset); struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl); @@ -506,7 +515,7 @@ extern const struct mlxsw_sp_acl_ops mlxsw_sp_acl_tcam_ops; /* spectrum_flower.c */ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, - __be16 protocol, struct tc_cls_flower_offload *f); + struct tc_cls_flower_offload *f); void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct tc_cls_flower_offload *f); int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 01a1501b56ca..4b2455e3e079 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -74,6 +74,7 @@ struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl) struct mlxsw_sp_acl_ruleset_ht_key { struct net_device *dev; /* dev this ruleset is bound to */ bool ingress; + u32 chain_index; const struct mlxsw_sp_acl_profile_ops *ops; }; @@ -163,7 +164,8 @@ static void mlxsw_sp_acl_ruleset_destroy(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, - struct net_device *dev, bool ingress) + struct net_device *dev, bool ingress, + u32 chain_index) { const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; struct mlxsw_sp_acl *acl = mlxsw_sp->acl; @@ -171,13 +173,20 @@ static int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp, ruleset->ht_key.dev = dev; ruleset->ht_key.ingress = ingress; + ruleset->ht_key.chain_index = chain_index; err = rhashtable_insert_fast(&acl->ruleset_ht, &ruleset->ht_node, mlxsw_sp_acl_ruleset_ht_params); if (err) return err; - err = ops->ruleset_bind(mlxsw_sp, ruleset->priv, dev, ingress); - if (err) - goto err_ops_ruleset_bind; + if (!ruleset->ht_key.chain_index) { + /* We only need ruleset with chain index 0, the implicit one, + * to be directly bound to device. The rest of the rulesets + * are bound by "Goto action set". + */ + err = ops->ruleset_bind(mlxsw_sp, ruleset->priv, dev, ingress); + if (err) + goto err_ops_ruleset_bind; + } return 0; err_ops_ruleset_bind: @@ -192,7 +201,8 @@ static void mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; struct mlxsw_sp_acl *acl = mlxsw_sp->acl; - ops->ruleset_unbind(mlxsw_sp, ruleset->priv); + if (!ruleset->ht_key.chain_index) + ops->ruleset_unbind(mlxsw_sp, ruleset->priv); rhashtable_remove_fast(&acl->ruleset_ht, &ruleset->ht_node, mlxsw_sp_acl_ruleset_ht_params); } @@ -211,14 +221,48 @@ static void mlxsw_sp_acl_ruleset_ref_dec(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ruleset_destroy(mlxsw_sp, ruleset); } +static struct mlxsw_sp_acl_ruleset * +__mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp_acl *acl, struct net_device *dev, + bool ingress, u32 chain_index, + const struct mlxsw_sp_acl_profile_ops *ops) +{ + struct mlxsw_sp_acl_ruleset_ht_key ht_key; + + memset(&ht_key, 0, sizeof(ht_key)); + ht_key.dev = dev; + ht_key.ingress = ingress; + ht_key.chain_index = chain_index; + ht_key.ops = ops; + return rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key, + mlxsw_sp_acl_ruleset_ht_params); +} + struct mlxsw_sp_acl_ruleset * -mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, bool ingress, +mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, + enum mlxsw_sp_acl_profile profile) +{ + const struct mlxsw_sp_acl_profile_ops *ops; + struct mlxsw_sp_acl *acl = mlxsw_sp->acl; + struct mlxsw_sp_acl_ruleset *ruleset; + + ops = acl->ops->profile_ops(mlxsw_sp, profile); + if (!ops) + return ERR_PTR(-EINVAL); + ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, dev, ingress, + chain_index, ops); + if (!ruleset) + return ERR_PTR(-ENOENT); + return ruleset; +} + +struct mlxsw_sp_acl_ruleset * +mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, struct net_device *dev, + bool ingress, u32 chain_index, enum mlxsw_sp_acl_profile profile) { const struct mlxsw_sp_acl_profile_ops *ops; struct mlxsw_sp_acl *acl = mlxsw_sp->acl; - struct mlxsw_sp_acl_ruleset_ht_key ht_key; struct mlxsw_sp_acl_ruleset *ruleset; int err; @@ -226,12 +270,8 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, if (!ops) return ERR_PTR(-EINVAL); - memset(&ht_key, 0, sizeof(ht_key)); - ht_key.dev = dev; - ht_key.ingress = ingress; - ht_key.ops = ops; - ruleset = rhashtable_lookup_fast(&acl->ruleset_ht, &ht_key, - mlxsw_sp_acl_ruleset_ht_params); + ruleset = __mlxsw_sp_acl_ruleset_lookup(acl, dev, ingress, + chain_index, ops); if (ruleset) { mlxsw_sp_acl_ruleset_ref_inc(ruleset); return ruleset; @@ -239,7 +279,8 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp, ruleset = mlxsw_sp_acl_ruleset_create(mlxsw_sp, ops); if (IS_ERR(ruleset)) return ruleset; - err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, ruleset, dev, ingress); + err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, ruleset, dev, + ingress, chain_index); if (err) goto err_ruleset_bind; return ruleset; @@ -255,6 +296,13 @@ void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); } +u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset) +{ + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + + return ops->ruleset_group_id(ruleset->priv); +} + static int mlxsw_sp_acl_rulei_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei) @@ -369,7 +417,7 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, local_port = mlxsw_sp_port->local_port; in_port = false; } else { - /* If out_dev is NULL, the called wants to + /* If out_dev is NULL, the caller wants to * set forward to ingress port. */ local_port = 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h index 85d5001a5818..fb8031828454 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h @@ -70,6 +70,9 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4[] = { MLXSW_AFK_ELEMENT_INST_U32(SRC_IP4, 0x00, 0, 32), + MLXSW_AFK_ELEMENT_INST_U32(IP_ECN, 0x04, 4, 2), + MLXSW_AFK_ELEMENT_INST_U32(IP_TTL_, 0x04, 24, 8), + MLXSW_AFK_ELEMENT_INST_U32(IP_DSCP, 0x08, 0, 6), MLXSW_AFK_ELEMENT_INST_U32(TCP_FLAGS, 0x08, 8, 9), /* TCP_CONTROL+TCP_ECN */ }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 61a10f166f97..50b40de1fb91 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -295,6 +295,12 @@ mlxsw_sp_acl_tcam_group_unbind(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbt), ppbt_pl); } +static u16 +mlxsw_sp_acl_tcam_group_id(struct mlxsw_sp_acl_tcam_group *group) +{ + return group->id; +} + static unsigned int mlxsw_sp_acl_tcam_region_prio(struct mlxsw_sp_acl_tcam_region *region) { @@ -984,6 +990,9 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_VID, MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_TCP_FLAGS, + MLXSW_AFK_ELEMENT_IP_TTL_, + MLXSW_AFK_ELEMENT_IP_ECN, + MLXSW_AFK_ELEMENT_IP_DSCP, }; static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = { @@ -1060,6 +1069,14 @@ mlxsw_sp_acl_tcam_flower_ruleset_unbind(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_tcam_group_unbind(mlxsw_sp, &ruleset->group); } +static u16 +mlxsw_sp_acl_tcam_flower_ruleset_group_id(void *ruleset_priv) +{ + struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv; + + return mlxsw_sp_acl_tcam_group_id(&ruleset->group); +} + static int mlxsw_sp_acl_tcam_flower_rule_add(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv, void *rule_priv, @@ -1096,6 +1113,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .ruleset_del = mlxsw_sp_acl_tcam_flower_ruleset_del, .ruleset_bind = mlxsw_sp_acl_tcam_flower_ruleset_bind, .ruleset_unbind = mlxsw_sp_acl_tcam_flower_ruleset_unbind, + .ruleset_group_id = mlxsw_sp_acl_tcam_flower_ruleset_group_id, .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index af2c65a3fd9f..51e6846da72b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -74,6 +74,9 @@ static struct devlink_dpipe_header mlxsw_sp_dpipe_header_metadata = { static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = { &mlxsw_sp_dpipe_header_metadata, + &devlink_dpipe_header_ethernet, + &devlink_dpipe_header_ipv4, + &devlink_dpipe_header_ipv6, }; static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = { @@ -114,26 +117,6 @@ static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv, return devlink_dpipe_match_put(skb, &match); } -static void mlxsw_sp_erif_entry_clear(struct devlink_dpipe_entry *entry) -{ - unsigned int value_count, value_index; - struct devlink_dpipe_value *value; - - value = entry->action_values; - value_count = entry->action_values_count; - for (value_index = 0; value_index < value_count; value_index++) { - kfree(value[value_index].value); - kfree(value[value_index].mask); - } - - value = entry->match_values; - value_count = entry->match_values_count; - for (value_index = 0; value_index < value_count; value_index++) { - kfree(value[value_index].value); - kfree(value[value_index].mask); - } -} - static void mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match, struct devlink_dpipe_action *action) @@ -215,8 +198,8 @@ static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp_table_erif_entries_dump(void *priv, bool counters_enabled, - struct devlink_dpipe_dump_ctx *dump_ctx) +mlxsw_sp_dpipe_table_erif_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) { struct devlink_dpipe_value match_value, action_value; struct devlink_dpipe_action action = {0}; @@ -270,16 +253,16 @@ start_again: goto start_again; rtnl_unlock(); - mlxsw_sp_erif_entry_clear(&entry); + devlink_dpipe_entry_clear(&entry); return 0; err_entry_append: err_entry_get: rtnl_unlock(); - mlxsw_sp_erif_entry_clear(&entry); + devlink_dpipe_entry_clear(&entry); return err; } -static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable) +static int mlxsw_sp_dpipe_table_erif_counters_update(void *priv, bool enable) { struct mlxsw_sp *mlxsw_sp = priv; int i; @@ -301,24 +284,29 @@ static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable) return 0; } +static u64 mlxsw_sp_dpipe_table_erif_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); +} + static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = { .matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump, .actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump, - .entries_dump = mlxsw_sp_table_erif_entries_dump, - .counters_set_update = mlxsw_sp_table_erif_counters_update, + .entries_dump = mlxsw_sp_dpipe_table_erif_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_erif_counters_update, + .size_get = mlxsw_sp_dpipe_table_erif_size_get, }; static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); - u64 table_size; - table_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); return devlink_dpipe_table_register(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF, &mlxsw_sp_erif_ops, - mlxsw_sp, table_size, - false); + mlxsw_sp, false); } static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp) @@ -328,6 +316,516 @@ static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp) devlink_dpipe_table_unregister(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF); } +static int mlxsw_sp_dpipe_table_host_matches_dump(struct sk_buff *skb, int type) +{ + struct devlink_dpipe_match match = {0}; + int err; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + err = devlink_dpipe_match_put(skb, &match); + if (err) + return err; + + switch (type) { + case AF_INET: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv4; + match.field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &devlink_dpipe_header_ipv6; + match.field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + return devlink_dpipe_match_put(skb, &match); +} + +static int +mlxsw_sp_dpipe_table_host4_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET); +} + +static int +mlxsw_sp_dpipe_table_host_actions_dump(void *priv, struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &devlink_dpipe_header_ethernet; + action.field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; + + return devlink_dpipe_action_put(skb, &action); +} + +enum mlxsw_sp_dpipe_table_host_match { + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF, + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP, + MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT, +}; + +static void +mlxsw_sp_dpipe_table_host_match_action_prepare(struct devlink_dpipe_match *matches, + struct devlink_dpipe_action *action, + int type) +{ + struct devlink_dpipe_match *match; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + switch (type) { + case AF_INET: + match->header = &devlink_dpipe_header_ipv4; + match->field_id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP; + break; + case AF_INET6: + match->header = &devlink_dpipe_header_ipv6; + match->field_id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP; + break; + default: + WARN_ON(1); + return; + } + + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &devlink_dpipe_header_ethernet; + action->field_id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC; +} + +static int +mlxsw_sp_dpipe_table_host_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_values, + struct devlink_dpipe_match *matches, + struct devlink_dpipe_value *action_value, + struct devlink_dpipe_action *action, + int type) +{ + struct devlink_dpipe_value *match_value; + struct devlink_dpipe_match *match; + + entry->match_values = match_values; + entry->match_values_count = MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT; + + entry->action_values = action_value; + entry->action_values_count = 1; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + match = &matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + match_value = &match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + + match_value->match = match; + switch (type) { + case AF_INET: + match_value->value_size = sizeof(u32); + break; + case AF_INET6: + match_value->value_size = sizeof(struct in6_addr); + break; + default: + WARN_ON(1); + return -EINVAL; + } + + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action_value->action = action; + action_value->value_size = sizeof(u64); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + return -ENOMEM; + + return 0; +} + +static void +__mlxsw_sp_dpipe_table_host_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_rif *rif, + unsigned char *ha, void *dip) +{ + struct devlink_dpipe_value *value; + u32 *rif_value; + u8 *ha_value; + + /* Set Match RIF index */ + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_RIF]; + + rif_value = value->value; + *rif_value = mlxsw_sp_rif_index(rif); + value->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + value->mapping_valid = true; + + /* Set Match DIP */ + value = &entry->match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_DIP]; + memcpy(value->value, dip, value->value_size); + + /* Set Action DMAC */ + value = entry->action_values; + ha_value = value->value; + ether_addr_copy(ha_value, ha); +} + +static void +mlxsw_sp_dpipe_table_host4_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + unsigned char *ha; + u32 dip; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh4_entry_dip(neigh_entry); + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, &dip); +} + +static void +mlxsw_sp_dpipe_table_host6_entry_fill(struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif) +{ + struct in6_addr *dip; + unsigned char *ha; + + ha = mlxsw_sp_neigh_entry_ha(neigh_entry); + dip = mlxsw_sp_neigh6_entry_dip(neigh_entry); + + __mlxsw_sp_dpipe_table_host_entry_fill(entry, rif, ha, dip); +} + +static void +mlxsw_sp_dpipe_table_host_entry_fill(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + struct mlxsw_sp_neigh_entry *neigh_entry, + struct mlxsw_sp_rif *rif, + int type) +{ + int err; + + switch (type) { + case AF_INET: + mlxsw_sp_dpipe_table_host4_entry_fill(entry, neigh_entry, rif); + break; + case AF_INET6: + mlxsw_sp_dpipe_table_host6_entry_fill(entry, neigh_entry, rif); + break; + default: + WARN_ON(1); + return; + } + + err = mlxsw_sp_neigh_counter_get(mlxsw_sp, neigh_entry, + &entry->counter); + if (!err) + entry->counter_valid = true; +} + +static int +mlxsw_sp_dpipe_table_host_entries_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx, + int type) +{ + int rif_neigh_count = 0; + int rif_neigh_skip = 0; + int neigh_count = 0; + int rif_count; + int i, j; + int err; + + rtnl_lock(); + i = 0; + rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + goto err_ctx_prepare; + j = 0; + rif_neigh_skip = rif_neigh_count; + for (; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + + rif_neigh_count = 0; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + if (rif_neigh_count < rif_neigh_skip) + goto skip; + + mlxsw_sp_dpipe_table_host_entry_fill(mlxsw_sp, entry, + neigh_entry, rif, + type); + entry->index = neigh_count; + err = devlink_dpipe_entry_ctx_append(dump_ctx, entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + else + goto out; + } + goto err_entry_append; + } + neigh_count++; + j++; +skip: + rif_neigh_count++; + } + rif_neigh_skip = 0; + } +out: + devlink_dpipe_entry_ctx_close(dump_ctx); + if (i != rif_count) + goto start_again; + + rtnl_unlock(); + return 0; + +err_ctx_prepare: +err_entry_append: + rtnl_unlock(); + return err; +} + +static int +mlxsw_sp_dpipe_table_host_entries_dump(struct mlxsw_sp *mlxsw_sp, + bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx, + int type) +{ + struct devlink_dpipe_value match_values[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT]; + struct devlink_dpipe_match matches[MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT]; + struct devlink_dpipe_value action_value; + struct devlink_dpipe_action action = {0}; + struct devlink_dpipe_entry entry = {0}; + int err; + + memset(matches, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT * + sizeof(matches[0])); + memset(match_values, 0, MLXSW_SP_DPIPE_TABLE_HOST_MATCH_COUNT * + sizeof(match_values[0])); + memset(&action_value, 0, sizeof(action_value)); + + mlxsw_sp_dpipe_table_host_match_action_prepare(matches, &action, type); + err = mlxsw_sp_dpipe_table_host_entry_prepare(&entry, match_values, + matches, &action_value, + &action, type); + if (err) + goto out; + + err = mlxsw_sp_dpipe_table_host_entries_get(mlxsw_sp, &entry, + counters_enabled, dump_ctx, + type); +out: + devlink_dpipe_entry_clear(&entry); + return err; +} + +static int +mlxsw_sp_dpipe_table_host4_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET); +} + +static void +mlxsw_sp_dpipe_table_host_counters_update(struct mlxsw_sp *mlxsw_sp, + bool enable, int type) +{ + int i; + + rtnl_lock(); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + mlxsw_sp_neigh_entry_counter_update(mlxsw_sp, + neigh_entry, + enable); + } + } + rtnl_unlock(); +} + +static int mlxsw_sp_dpipe_table_host4_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET); + return 0; +} + +static u64 +mlxsw_sp_dpipe_table_host_size_get(struct mlxsw_sp *mlxsw_sp, int type) +{ + u64 size = 0; + int i; + + rtnl_lock(); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + struct mlxsw_sp_rif *rif = mlxsw_sp_rif_by_index(mlxsw_sp, i); + struct mlxsw_sp_neigh_entry *neigh_entry; + + if (!rif) + continue; + mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) { + int neigh_type = mlxsw_sp_neigh_entry_type(neigh_entry); + + if (neigh_type != type) + continue; + + if (neigh_type == AF_INET6 && + mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + continue; + + size++; + } + } + rtnl_unlock(); + + return size; +} + +static u64 mlxsw_sp_dpipe_table_host4_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host4_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host4_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host4_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host4_counters_update, + .size_get = mlxsw_sp_dpipe_table_host4_size_get, +}; + +static int mlxsw_sp_dpipe_host4_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4, + &mlxsw_sp_host4_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_host4_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST4); +} + +static int +mlxsw_sp_dpipe_table_host6_matches_dump(void *priv, struct sk_buff *skb) +{ + return mlxsw_sp_dpipe_table_host_matches_dump(skb, AF_INET6); +} + +static int +mlxsw_sp_dpipe_table_host6_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_entries_dump(mlxsw_sp, + counters_enabled, + dump_ctx, AF_INET6); +} + +static int mlxsw_sp_dpipe_table_host6_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + mlxsw_sp_dpipe_table_host_counters_update(mlxsw_sp, enable, AF_INET6); + return 0; +} + +static u64 mlxsw_sp_dpipe_table_host6_size_get(void *priv) +{ + struct mlxsw_sp *mlxsw_sp = priv; + + return mlxsw_sp_dpipe_table_host_size_get(mlxsw_sp, AF_INET6); +} + +static struct devlink_dpipe_table_ops mlxsw_sp_host6_ops = { + .matches_dump = mlxsw_sp_dpipe_table_host6_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_host_actions_dump, + .entries_dump = mlxsw_sp_dpipe_table_host6_entries_dump, + .counters_set_update = mlxsw_sp_dpipe_table_host6_counters_update, + .size_get = mlxsw_sp_dpipe_table_host6_size_get, +}; + +static int mlxsw_sp_dpipe_host6_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6, + &mlxsw_sp_host6_ops, + mlxsw_sp, false); +} + +static void mlxsw_sp_dpipe_host6_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_HOST6); +} + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); @@ -339,10 +837,22 @@ int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) return err; err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp); if (err) - goto err_erif_register; + goto err_erif_table_init; + + err = mlxsw_sp_dpipe_host4_table_init(mlxsw_sp); + if (err) + goto err_host4_table_init; + + err = mlxsw_sp_dpipe_host6_table_init(mlxsw_sp); + if (err) + goto err_host6_table_init; return 0; -err_erif_register: +err_host6_table_init: + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); +err_host4_table_init: + mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); +err_erif_table_init: devlink_dpipe_headers_unregister(priv_to_devlink(mlxsw_sp->core)); return err; } @@ -351,6 +861,8 @@ void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) { struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + mlxsw_sp_dpipe_host6_table_fini(mlxsw_sp); + mlxsw_sp_dpipe_host4_table_fini(mlxsw_sp); mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); devlink_dpipe_headers_unregister(devlink); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h index d2089298cba3..283fde4e6783 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -35,9 +35,26 @@ #ifndef _MLXSW_PIPELINE_H_ #define _MLXSW_PIPELINE_H_ +#if IS_ENABLED(CONFIG_NET_DEVLINK) + int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp); +#else + +static inline int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) +{ + return 0; +} + +static inline void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) +{ +} + +#endif + #define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST4 "mlxsw_host4" +#define MLXSW_SP_DPIPE_TABLE_NAME_HOST6 "mlxsw_host6" #endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c index 6afbe9ec64e2..bbd238e50f05 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_fid.c @@ -109,7 +109,6 @@ static const int mlxsw_sp_sfgc_uc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_BROADCAST] = 1, - [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_NON_IP] = 1, [MLXSW_REG_SFGC_TYPE_IPV4_LINK_LOCAL] = 1, [MLXSW_REG_SFGC_TYPE_IPV6_ALL_HOST] = 1, @@ -117,6 +116,7 @@ static const int mlxsw_sp_sfgc_bc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { static const int mlxsw_sp_sfgc_mc_packet_types[MLXSW_REG_SFGC_TYPE_MAX] = { [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV4] = 1, + [MLXSW_REG_SFGC_TYPE_UNREGISTERED_MULTICAST_IPV6] = 1, }; static const int *mlxsw_sp_packet_type_sfgc_types[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index 21bb2bf62d3e..8aace9a06a5d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -45,7 +45,7 @@ #include "core_acl_flex_keys.h" static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, + struct net_device *dev, bool ingress, struct mlxsw_sp_acl_rule_info *rulei, struct tcf_exts *exts) { @@ -53,7 +53,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, LIST_HEAD(actions); int err; - if (tc_no_actions(exts)) + if (!tcf_exts_has_actions(exts)) return 0; /* Count action is inserted first */ @@ -71,6 +71,20 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, err = mlxsw_sp_acl_rulei_act_trap(rulei); if (err) return err; + } else if (is_tcf_gact_goto_chain(a)) { + u32 chain_index = tcf_gact_goto_chain_index(a); + struct mlxsw_sp_acl_ruleset *ruleset; + u16 group_id; + + ruleset = mlxsw_sp_acl_ruleset_lookup(mlxsw_sp, dev, + ingress, + chain_index, + MLXSW_SP_ACL_PROFILE_FLOWER); + if (IS_ERR(ruleset)) + return PTR_ERR(ruleset); + + group_id = mlxsw_sp_acl_ruleset_group_id(ruleset); + mlxsw_sp_acl_rulei_act_jump(rulei, group_id); } else if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; @@ -212,11 +226,46 @@ static int mlxsw_sp_flower_parse_tcp(struct mlxsw_sp *mlxsw_sp, return 0; } +static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + struct tc_cls_flower_offload *f, + u16 n_proto) +{ + struct flow_dissector_key_ip *key, *mask; + + if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) + return 0; + + if (n_proto != ETH_P_IP && n_proto != ETH_P_IPV6) { + dev_err(mlxsw_sp->bus_info->dev, "IP keys supported only for IPv4/6\n"); + return -EINVAL; + } + + key = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->key); + mask = skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_IP, + f->mask); + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_TTL_, + key->ttl, mask->ttl); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_ECN, + key->tos & 0x3, mask->tos & 0x3); + + mlxsw_sp_acl_rulei_keymask_u32(rulei, MLXSW_AFK_ELEMENT_IP_DSCP, + key->tos >> 6, mask->tos >> 6); + + return 0; +} + static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, - struct net_device *dev, + struct net_device *dev, bool ingress, struct mlxsw_sp_acl_rule_info *rulei, struct tc_cls_flower_offload *f) { + u16 n_proto_mask = 0; + u16 n_proto_key = 0; u16 addr_type = 0; u8 ip_proto = 0; int err; @@ -229,12 +278,13 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS) | BIT(FLOW_DISSECTOR_KEY_TCP) | + BIT(FLOW_DISSECTOR_KEY_IP) | BIT(FLOW_DISSECTOR_KEY_VLAN))) { dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); return -EOPNOTSUPP; } - mlxsw_sp_acl_rulei_priority(rulei, f->prio); + mlxsw_sp_acl_rulei_priority(rulei, f->common.prio); if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { struct flow_dissector_key_control *key = @@ -253,8 +303,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, skb_flow_dissector_target(f->dissector, FLOW_DISSECTOR_KEY_BASIC, f->mask); - u16 n_proto_key = ntohs(key->n_proto); - u16 n_proto_mask = ntohs(mask->n_proto); + n_proto_key = ntohs(key->n_proto); + n_proto_mask = ntohs(mask->n_proto); if (n_proto_key == ETH_P_ALL) { n_proto_key = 0; @@ -324,11 +374,16 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, if (err) return err; - return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, rulei, f->exts); + err = mlxsw_sp_flower_parse_ip(mlxsw_sp, rulei, f, n_proto_key & n_proto_mask); + if (err) + return err; + + return mlxsw_sp_flower_parse_actions(mlxsw_sp, dev, ingress, + rulei, f->exts); } int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, - __be16 protocol, struct tc_cls_flower_offload *f) + struct tc_cls_flower_offload *f) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct net_device *dev = mlxsw_sp_port->dev; @@ -338,6 +393,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, int err; ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, dev, ingress, + f->common.chain_index, MLXSW_SP_ACL_PROFILE_FLOWER); if (IS_ERR(ruleset)) return PTR_ERR(ruleset); @@ -349,7 +405,7 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, } rulei = mlxsw_sp_acl_rule_rulei(rule); - err = mlxsw_sp_flower_parse(mlxsw_sp, dev, rulei, f); + err = mlxsw_sp_flower_parse(mlxsw_sp, dev, ingress, rulei, f); if (err) goto err_flower_parse; @@ -381,7 +437,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct mlxsw_sp_acl_rule *rule; ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, - ingress, + ingress, f->common.chain_index, MLXSW_SP_ACL_PROFILE_FLOWER); if (IS_ERR(ruleset)) return; @@ -407,7 +463,7 @@ int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, int err; ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, - ingress, + ingress, f->common.chain_index, MLXSW_SP_ACL_PROFILE_FLOWER); if (WARN_ON(IS_ERR(ruleset))) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c new file mode 100644 index 000000000000..702fe945227c --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -0,0 +1,214 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/ip_tunnels.h> + +#include "spectrum_ipip.h" + +static bool +mlxsw_sp_ipip_netdev_has_ikey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return !!(tun->parms.i_flags & TUNNEL_KEY); +} + +static bool +mlxsw_sp_ipip_netdev_has_okey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return !!(tun->parms.o_flags & TUNNEL_KEY); +} + +static u32 mlxsw_sp_ipip_netdev_ikey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return mlxsw_sp_ipip_netdev_has_ikey(ol_dev) ? + be32_to_cpu(tun->parms.i_key) : 0; +} + +static u32 mlxsw_sp_ipip_netdev_okey(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? + be32_to_cpu(tun->parms.o_key) : 0; +} + +static int +mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev); + char ratr_pl[MLXSW_REG_RATR_LEN]; + + mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, + true, MLXSW_REG_RATR_TYPE_IPIP, + adj_index, rif_index); + mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int +mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(struct mlxsw_sp *mlxsw_sp, + u32 tunnel_index, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + bool has_ikey = mlxsw_sp_ipip_netdev_has_ikey(ipip_entry->ol_dev); + u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); + u32 ikey = mlxsw_sp_ipip_netdev_ikey(ipip_entry->ol_dev); + char rtdp_pl[MLXSW_REG_RTDP_LEN]; + unsigned int type_check; + u32 daddr4; + + mlxsw_reg_rtdp_pack(rtdp_pl, MLXSW_REG_RTDP_TYPE_IPIP, tunnel_index); + + type_check = has_ikey ? + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE_KEY : + MLXSW_REG_RTDP_IPIP_TYPE_CHECK_ALLOW_GRE; + + /* Linux demuxes tunnels based on packet SIP (which must match tunnel + * remote IP). Thus configure decap so that it filters out packets that + * are not IPv4 or have the wrong SIP. IPIP_DECAP_ERROR trap is + * generated for packets that fail this criterion. Linux then handles + * such packets in slow path and generates ICMP destination unreachable. + */ + daddr4 = be32_to_cpu(mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev)); + mlxsw_reg_rtdp_ipip4_pack(rtdp_pl, rif_index, + MLXSW_REG_RTDP_IPIP_SIP_CHECK_FILTER_IPV4, + type_check, has_ikey, daddr4, ikey); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); +} + +static int +mlxsw_sp_ipip_fib_entry_op_gre4_ralue(struct mlxsw_sp *mlxsw_sp, + u32 dip, u8 prefix_len, u16 ul_vr_id, + enum mlxsw_reg_ralue_op op, + u32 tunnel_index) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; + + mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_REG_RALXX_PROTOCOL_IPV4, op, + ul_vr_id, prefix_len, dip); + mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl, tunnel_index); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static int mlxsw_sp_ipip_fib_entry_op_gre4(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + enum mlxsw_reg_ralue_op op, + u32 tunnel_index) +{ + u16 ul_vr_id = mlxsw_sp_ipip_lb_ul_vr_id(ipip_entry->ol_lb); + __be32 dip; + int err; + + err = mlxsw_sp_ipip_fib_entry_op_gre4_rtdp(mlxsw_sp, tunnel_index, + ipip_entry); + if (err) + return err; + + dip = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, + ipip_entry->ol_dev).addr4; + return mlxsw_sp_ipip_fib_entry_op_gre4_ralue(mlxsw_sp, be32_to_cpu(dip), + 32, ul_vr_id, op, + tunnel_index); +} + +static bool mlxsw_sp_ipip_tunnel_complete(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + union mlxsw_sp_l3addr saddr = mlxsw_sp_ipip_netdev_saddr(proto, ol_dev); + union mlxsw_sp_l3addr daddr = mlxsw_sp_ipip_netdev_daddr(proto, ol_dev); + union mlxsw_sp_l3addr naddr = {0}; + + /* Tunnels with unset local or remote address are valid in Linux and + * used for lightweight tunnels (LWT) and Non-Broadcast Multi-Access + * (NBMA) tunnels. In principle these can be offloaded, but the driver + * currently doesn't support this. So punt. + */ + return memcmp(&saddr, &naddr, sizeof(naddr)) && + memcmp(&daddr, &naddr, sizeof(naddr)); +} + +static bool mlxsw_sp_ipip_can_offload_gre4(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev, + enum mlxsw_sp_l3proto ol_proto) +{ + struct ip_tunnel *tunnel = netdev_priv(ol_dev); + __be16 okflags = TUNNEL_KEY; /* We can't offload any other features. */ + bool inherit_ttl = tunnel->parms.iph.ttl == 0; + bool inherit_tos = tunnel->parms.iph.tos & 0x1; + + return (tunnel->parms.i_flags & ~okflags) == 0 && + (tunnel->parms.o_flags & ~okflags) == 0 && + inherit_ttl && inherit_tos && + mlxsw_sp_ipip_tunnel_complete(MLXSW_SP_L3_PROTO_IPV4, ol_dev); +} + +static struct mlxsw_sp_rif_ipip_lb_config +mlxsw_sp_ipip_ol_loopback_config_gre4(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) +{ + enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; + + lb_ipipt = mlxsw_sp_ipip_netdev_has_okey(ol_dev) ? + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_KEY_IN_IP : + MLXSW_REG_RITR_LOOPBACK_IPIP_TYPE_IP_IN_GRE_IN_IP; + return (struct mlxsw_sp_rif_ipip_lb_config){ + .lb_ipipt = lb_ipipt, + .okey = mlxsw_sp_ipip_netdev_okey(ol_dev), + .ul_protocol = MLXSW_SP_L3_PROTO_IPV4, + .saddr = mlxsw_sp_ipip_netdev_saddr(MLXSW_SP_L3_PROTO_IPV4, + ol_dev), + }; +} + +static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { + .dev_type = ARPHRD_IPGRE, + .ul_proto = MLXSW_SP_L3_PROTO_IPV4, + .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre4, + .fib_entry_op = mlxsw_sp_ipip_fib_entry_op_gre4, + .can_offload = mlxsw_sp_ipip_can_offload_gre4, + .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre4, +}; + +const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[] = { + [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, +}; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h new file mode 100644 index 000000000000..1c2db831d83b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -0,0 +1,79 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_IPIP_H_ +#define _MLXSW_IPIP_H_ + +#include "spectrum_router.h" +#include <net/ip_fib.h> + +enum mlxsw_sp_ipip_type { + MLXSW_SP_IPIP_TYPE_GRE4, + MLXSW_SP_IPIP_TYPE_MAX, +}; + +struct mlxsw_sp_ipip_entry { + enum mlxsw_sp_ipip_type ipipt; + struct net_device *ol_dev; /* Overlay. */ + struct mlxsw_sp_rif_ipip_lb *ol_lb; + unsigned int ref_count; /* Number of next hops using the tunnel. */ + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct list_head ipip_list_node; +}; + +struct mlxsw_sp_ipip_ops { + int dev_type; + enum mlxsw_sp_l3proto ul_proto; /* Underlay. */ + + int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry); + + bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev, + enum mlxsw_sp_l3proto ol_proto); + + /* Return a configuration for creating an overlay loopback RIF. */ + struct mlxsw_sp_rif_ipip_lb_config + (*ol_loopback_config)(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev); + + int (*fib_entry_op)(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + enum mlxsw_reg_ralue_op op, + u32 tunnel_index); +}; + +extern const struct mlxsw_sp_ipip_ops *mlxsw_sp_ipip_ops_arr[]; + +#endif /* _MLXSW_IPIP_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4b2e0fd7d51e..f0fb898533fb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1,9 +1,10 @@ /* * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c - * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016-2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> * Copyright (c) 2016 Yotam Gigi <yotamg@mellanox.com> + * Copyright (c) 2017 Petr Machata <petrm@mellanox.com> * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,18 +44,27 @@ #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_bridge.h> +#include <linux/socket.h> +#include <linux/route.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> #include <net/ip_fib.h> +#include <net/ip6_fib.h> #include <net/fib_rules.h> +#include <net/ip_tunnels.h> #include <net/l3mdev.h> +#include <net/addrconf.h> +#include <net/ndisc.h> +#include <net/ipv6.h> +#include <net/fib_notifier.h> #include "spectrum.h" #include "core.h" #include "reg.h" #include "spectrum_cnt.h" #include "spectrum_dpipe.h" +#include "spectrum_ipip.h" #include "spectrum_router.h" struct mlxsw_sp_vr; @@ -79,9 +89,11 @@ struct mlxsw_sp_router { struct delayed_work nexthop_probe_dw; #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ struct list_head nexthop_neighs_list; + struct list_head ipip_list; bool aborted; struct notifier_block fib_nb; const struct mlxsw_sp_rif_ops **rif_ops_arr; + const struct mlxsw_sp_ipip_ops **ipip_ops_arr; }; struct mlxsw_sp_rif { @@ -122,6 +134,17 @@ struct mlxsw_sp_rif_subport { bool lag; }; +struct mlxsw_sp_rif_ipip_lb { + struct mlxsw_sp_rif common; + struct mlxsw_sp_rif_ipip_lb_config lb_config; + u16 ul_vr_id; /* Reserved for Spectrum-2. */ +}; + +struct mlxsw_sp_rif_params_ipip_lb { + struct mlxsw_sp_rif_params common; + struct mlxsw_sp_rif_ipip_lb_config lb_config; +}; + struct mlxsw_sp_rif_ops { enum mlxsw_sp_rif_type type; size_t rif_size; @@ -304,7 +327,7 @@ static struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); -#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) +#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1) struct mlxsw_sp_prefix_usage { DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT); @@ -314,19 +337,6 @@ struct mlxsw_sp_prefix_usage { for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) static bool -mlxsw_sp_prefix_usage_subset(struct mlxsw_sp_prefix_usage *prefix_usage1, - struct mlxsw_sp_prefix_usage *prefix_usage2) -{ - unsigned char prefix; - - mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage1) { - if (!test_bit(prefix, prefix_usage2->b)) - return false; - } - return true; -} - -static bool mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1, struct mlxsw_sp_prefix_usage *prefix_usage2) { @@ -371,6 +381,14 @@ enum mlxsw_sp_fib_entry_type { MLXSW_SP_FIB_ENTRY_TYPE_REMOTE, MLXSW_SP_FIB_ENTRY_TYPE_LOCAL, MLXSW_SP_FIB_ENTRY_TYPE_TRAP, + + /* This is a special case of local delivery, where a packet should be + * decapsulated on reception. Note that there is no corresponding ENCAP, + * because that's a type of next hop, not of FIB entry. (There can be + * several next hops in a REMOTE entry, and some of them may be + * encapsulating entries.) + */ + MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP, }; struct mlxsw_sp_nexthop_group; @@ -384,11 +402,9 @@ struct mlxsw_sp_fib_node { struct mlxsw_sp_fib_key key; }; -struct mlxsw_sp_fib_entry_params { - u32 tb_id; - u32 prio; - u8 tos; - u8 type; +struct mlxsw_sp_fib_entry_decap { + struct mlxsw_sp_ipip_entry *ipip_entry; + u32 tunnel_index; }; struct mlxsw_sp_fib_entry { @@ -397,13 +413,26 @@ struct mlxsw_sp_fib_entry { enum mlxsw_sp_fib_entry_type type; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; - struct mlxsw_sp_fib_entry_params params; - bool offloaded; + struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */ +}; + +struct mlxsw_sp_fib4_entry { + struct mlxsw_sp_fib_entry common; + u32 tb_id; + u32 prio; + u8 tos; + u8 type; +}; + +struct mlxsw_sp_fib6_entry { + struct mlxsw_sp_fib_entry common; + struct list_head rt6_list; + unsigned int nrt6; }; -enum mlxsw_sp_l3proto { - MLXSW_SP_L3_PROTO_IPV4, - MLXSW_SP_L3_PROTO_IPV6, +struct mlxsw_sp_rt6 { + struct list_head list; + struct rt6_info *rt; }; struct mlxsw_sp_lpm_tree { @@ -428,6 +457,7 @@ struct mlxsw_sp_vr { u32 tb_id; /* kernel fib table id */ unsigned int rif_count; struct mlxsw_sp_fib *fib4; + struct mlxsw_sp_fib *fib6; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; @@ -487,15 +517,15 @@ static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } -static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_lpm_tree *lpm_tree) +static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) { char ralta_pl[MLXSW_REG_RALTA_LEN]; mlxsw_reg_ralta_pack(ralta_pl, false, (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, lpm_tree->id); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } static int @@ -551,10 +581,10 @@ err_left_struct_set: return ERR_PTR(err); } -static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_lpm_tree *lpm_tree) +static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) { - return mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); + mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree); } static struct mlxsw_sp_lpm_tree * @@ -571,24 +601,21 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, lpm_tree->proto == proto && mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, prefix_usage)) - goto inc_ref_count; + return lpm_tree; } - lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, - proto); - if (IS_ERR(lpm_tree)) - return lpm_tree; + return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto); +} -inc_ref_count: +static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree) +{ lpm_tree->ref_count++; - return lpm_tree; } -static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_lpm_tree *lpm_tree) +static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_lpm_tree *lpm_tree) { if (--lpm_tree->ref_count == 0) - return mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); - return 0; + mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree); } #define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */ @@ -625,7 +652,7 @@ static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) { - return !!vr->fib4; + return !!vr->fib4 || !!vr->fib6; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -642,13 +669,13 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) } static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib *fib) + const struct mlxsw_sp_fib *fib, u8 tree_id) { char raltb_pl[MLXSW_REG_RALTB_LEN]; mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, (enum mlxsw_reg_ralxx_protocol) fib->proto, - fib->lpm_tree->id); + tree_id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -694,7 +721,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, case MLXSW_SP_L3_PROTO_IPV4: return vr->fib4; case MLXSW_SP_L3_PROTO_IPV6: - BUG_ON(1); + return vr->fib6; } return NULL; } @@ -703,6 +730,7 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { struct mlxsw_sp_vr *vr; + int err; vr = mlxsw_sp_vr_find_unused(mlxsw_sp); if (!vr) @@ -710,54 +738,26 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr->fib4)) return ERR_CAST(vr->fib4); + vr->fib6 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(vr->fib6)) { + err = PTR_ERR(vr->fib6); + goto err_fib6_create; + } vr->tb_id = tb_id; return vr; -} -static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) -{ +err_fib6_create: mlxsw_sp_fib_destroy(vr->fib4); vr->fib4 = NULL; + return ERR_PTR(err); } -static int -mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib, - struct mlxsw_sp_prefix_usage *req_prefix_usage) +static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { - struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree; - struct mlxsw_sp_lpm_tree *new_tree; - int err; - - if (mlxsw_sp_prefix_usage_eq(req_prefix_usage, &lpm_tree->prefix_usage)) - return 0; - - new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, - fib->proto); - if (IS_ERR(new_tree)) { - /* We failed to get a tree according to the required - * prefix usage. However, the current tree might be still good - * for us if our requirement is subset of the prefixes used - * in the tree. - */ - if (mlxsw_sp_prefix_usage_subset(req_prefix_usage, - &lpm_tree->prefix_usage)) - return 0; - return PTR_ERR(new_tree); - } - - /* Prevent packet loss by overwriting existing binding */ - fib->lpm_tree = new_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); - if (err) - goto err_tree_bind; - mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); - - return 0; - -err_tree_bind: - fib->lpm_tree = lpm_tree; - mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); - return err; + mlxsw_sp_fib_destroy(vr->fib6); + vr->fib6 = NULL; + mlxsw_sp_fib_destroy(vr->fib4); + vr->fib4 = NULL; } static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) @@ -773,10 +773,105 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { - if (!vr->rif_count && list_empty(&vr->fib4->node_list)) + if (!vr->rif_count && list_empty(&vr->fib4->node_list) && + list_empty(&vr->fib6->node_list)) mlxsw_sp_vr_destroy(vr); } +static bool +mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto, u8 tree_id) +{ + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); + + if (!mlxsw_sp_vr_is_used(vr)) + return false; + if (fib->lpm_tree && fib->lpm_tree->id == tree_id) + return true; + return false; +} + +static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_lpm_tree *new_tree) +{ + struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; + int err; + + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) + return err; + fib->lpm_tree = new_tree; + mlxsw_sp_lpm_tree_hold(new_tree); + mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree); + return 0; +} + +static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_lpm_tree *new_tree) +{ + struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree; + enum mlxsw_sp_l3proto proto = fib->proto; + u8 old_id, new_id = new_tree->id; + struct mlxsw_sp_vr *vr; + int i, err; + + if (!old_tree) + goto no_replace; + old_id = old_tree->id; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + vr = &mlxsw_sp->router->vrs[i]; + if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id)) + continue; + err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp, + mlxsw_sp_vr_fib(vr, proto), + new_tree); + if (err) + goto err_tree_replace; + } + + return 0; + +err_tree_replace: + for (i--; i >= 0; i--) { + if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id)) + continue; + mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp, + mlxsw_sp_vr_fib(vr, proto), + old_tree); + } + return err; + +no_replace: + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id); + if (err) + return err; + fib->lpm_tree = new_tree; + mlxsw_sp_lpm_tree_hold(new_tree); + return 0; +} + +static void +mlxsw_sp_vrs_prefixes(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_l3proto proto, + struct mlxsw_sp_prefix_usage *req_prefix_usage) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i]; + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); + unsigned char prefix; + + if (!mlxsw_sp_vr_is_used(vr)) + continue; + mlxsw_sp_prefix_usage_for_each(prefix, &fib->prefix_usage) + mlxsw_sp_prefix_usage_set(req_prefix_usage, prefix); + } +} + static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_vr *vr; @@ -816,6 +911,374 @@ static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->router->vrs); } +static struct net_device * +__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + struct net *net = dev_net(ol_dev); + + return __dev_get_by_index(net, tun->parms.link); +} + +static u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev) +{ + struct net_device *d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev); + + if (d) + return l3mdev_fib_table(d) ? : RT_TABLE_MAIN; + else + return l3mdev_fib_table(ol_dev) ? : RT_TABLE_MAIN; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_rif_params *params); + +static struct mlxsw_sp_rif_ipip_lb * +mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + struct mlxsw_sp_rif_params_ipip_lb lb_params; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_rif *rif; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt]; + lb_params = (struct mlxsw_sp_rif_params_ipip_lb) { + .common.dev = ol_dev, + .common.lag = false, + .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), + }; + + rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common); + if (IS_ERR(rif)) + return ERR_CAST(rif); + return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + struct mlxsw_sp_ipip_entry *ret = NULL; + + ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL); + if (!ipip_entry) + return ERR_PTR(-ENOMEM); + + ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt, + ol_dev); + if (IS_ERR(ipip_entry->ol_lb)) { + ret = ERR_CAST(ipip_entry->ol_lb); + goto err_ol_ipip_lb_create; + } + + ipip_entry->ipipt = ipipt; + ipip_entry->ol_dev = ol_dev; + + return ipip_entry; + +err_ol_ipip_lb_create: + kfree(ipip_entry); + return ret; +} + +static void +mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp_ipip_entry *ipip_entry) +{ + WARN_ON(ipip_entry->ref_count > 0); + mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common); + kfree(ipip_entry); +} + +static __be32 +mlxsw_sp_ipip_netdev_saddr4(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return tun->parms.iph.saddr; +} + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_netdev_saddr4(ol_dev), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + }; + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev) +{ + struct ip_tunnel *tun = netdev_priv(ol_dev); + + return tun->parms.iph.daddr; +} + +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return (union mlxsw_sp_l3addr) { + .addr4 = mlxsw_sp_ipip_netdev_daddr4(ol_dev), + }; + case MLXSW_SP_L3_PROTO_IPV6: + break; + }; + + WARN_ON(1); + return (union mlxsw_sp_l3addr) { + .addr4 = 0, + }; +} + +static bool mlxsw_sp_l3addr_eq(const union mlxsw_sp_l3addr *addr1, + const union mlxsw_sp_l3addr *addr2) +{ + return !memcmp(addr1, addr2, sizeof(*addr1)); +} + +static bool +mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp, + const enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr saddr, + u32 ul_tb_id, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; + union mlxsw_sp_l3addr tun_saddr; + + if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) + return false; + + tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev); + return tun_ul_tb_id == ul_tb_id && + mlxsw_sp_l3addr_eq(&tun_saddr, &saddr); +} + +static int +mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 tunnel_index; + int err; + + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &tunnel_index); + if (err) + return err; + + ipip_entry->decap_fib_entry = fib_entry; + fib_entry->decap.ipip_entry = ipip_entry; + fib_entry->decap.tunnel_index = tunnel_index; + return 0; +} + +static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + /* Unlink this node from the IPIP entry that it's the decap entry of. */ + fib_entry->decap.ipip_entry->decap_fib_entry = NULL; + fib_entry->decap.ipip_entry = NULL; + mlxsw_sp_kvdl_free(mlxsw_sp, fib_entry->decap.tunnel_index); +} + +static struct mlxsw_sp_fib_node * +mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, + size_t addr_len, unsigned char prefix_len); +static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry); + +static void +mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry; + + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry); + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + + mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); +} + +static void +mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct mlxsw_sp_fib_entry *decap_fib_entry) +{ + if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry, + ipip_entry)) + return; + decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + + if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry)) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); +} + +/* Given an IPIP entry, find the corresponding decap route. */ +static struct mlxsw_sp_fib_entry * +mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + static struct mlxsw_sp_fib_node *fib_node; + const struct mlxsw_sp_ipip_ops *ipip_ops; + struct mlxsw_sp_fib_entry *fib_entry; + unsigned char saddr_prefix_len; + union mlxsw_sp_l3addr saddr; + struct mlxsw_sp_fib *ul_fib; + struct mlxsw_sp_vr *ul_vr; + const void *saddrp; + size_t saddr_len; + u32 ul_tb_id; + u32 saddr4; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + + ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev); + ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id); + if (!ul_vr) + return NULL; + + ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto); + saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto, + ipip_entry->ol_dev); + + switch (ipip_ops->ul_proto) { + case MLXSW_SP_L3_PROTO_IPV4: + saddr4 = be32_to_cpu(saddr.addr4); + saddrp = &saddr4; + saddr_len = 4; + saddr_prefix_len = 32; + break; + case MLXSW_SP_L3_PROTO_IPV6: + WARN_ON(1); + return NULL; + } + + fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len, + saddr_prefix_len); + if (!fib_node || list_empty(&fib_node->entry_list)) + return NULL; + + fib_entry = list_first_entry(&fib_node->entry_list, + struct mlxsw_sp_fib_entry, list); + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) + return NULL; + + return fib_entry; +} + +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_get(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct net_device *ol_dev) +{ + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev); + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct mlxsw_sp_fib_entry *decap_fib_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + enum mlxsw_sp_l3proto ul_proto; + union mlxsw_sp_l3addr saddr; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) { + if (ipip_entry->ol_dev == ol_dev) + goto inc_ref_count; + + /* The configuration where several tunnels have the same local + * address in the same underlay table needs special treatment in + * the HW. That is currently not implemented in the driver. + */ + ul_proto = router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto; + saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev); + if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr, + ul_tb_id, ipip_entry)) + return ERR_PTR(-EEXIST); + } + + ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev); + if (IS_ERR(ipip_entry)) + return ipip_entry; + + decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry); + if (decap_fib_entry) + mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry, + decap_fib_entry); + + list_add_tail(&ipip_entry->ipip_list_node, + &mlxsw_sp->router->ipip_list); + +inc_ref_count: + ++ipip_entry->ref_count; + return ipip_entry; +} + +static void +mlxsw_sp_ipip_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + if (--ipip_entry->ref_count == 0) { + list_del(&ipip_entry->ipip_list_node); + if (ipip_entry->decap_fib_entry) + mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry); + mlxsw_sp_ipip_entry_destroy(ipip_entry); + } +} + +static bool +mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr ul_dip, + struct mlxsw_sp_ipip_entry *ipip_entry) +{ + u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN; + enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt; + struct net_device *ipip_ul_dev; + + if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto) + return false; + + ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ipip_entry->ol_dev); + return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip, + ul_tb_id, ipip_entry) && + (!ipip_ul_dev || ipip_ul_dev == ul_dev); +} + +/* Given decap parameters, find the corresponding IPIP entry. */ +static struct mlxsw_sp_ipip_entry * +mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ul_dev, + enum mlxsw_sp_l3proto ul_proto, + union mlxsw_sp_l3addr ul_dip) +{ + struct mlxsw_sp_ipip_entry *ipip_entry; + + list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list, + ipip_list_node) + if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev, + ul_proto, ul_dip, + ipip_entry)) + return ipip_entry; + + return NULL; +} + struct mlxsw_sp_neigh_key { struct neighbour *n; }; @@ -831,6 +1294,8 @@ struct mlxsw_sp_neigh_entry { * this neigh entry */ struct list_head nexthop_neighs_list_node; + unsigned int counter_index; + bool counter_valid; }; static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { @@ -839,6 +1304,62 @@ static const struct rhashtable_params mlxsw_sp_neigh_ht_params = { .key_len = sizeof(struct mlxsw_sp_neigh_key), }; +struct mlxsw_sp_neigh_entry * +mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!neigh_entry) { + if (list_empty(&rif->neigh_list)) + return NULL; + else + return list_first_entry(&rif->neigh_list, + typeof(*neigh_entry), + rif_list_node); + } + if (neigh_entry->rif_list_node.next == &rif->neigh_list) + return NULL; + return list_next_entry(neigh_entry, rif_list_node); +} + +int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return neigh_entry->key.n->tbl->family; +} + +unsigned char * +mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + return neigh_entry->ha; +} + +u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return ntohl(*((__be32 *) n->primary_key)); +} + +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n; + + n = neigh_entry->key.n; + return (struct in6_addr *) &n->primary_key; +} + +int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + u64 *p_counter) +{ + if (!neigh_entry->counter_valid) + return -EINVAL; + + return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index, + p_counter, NULL); +} + static struct mlxsw_sp_neigh_entry * mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n, u16 rif) @@ -879,6 +1400,53 @@ mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_ht_params); } +static bool +mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct devlink *devlink; + const char *table_name; + + switch (mlxsw_sp_neigh_entry_type(neigh_entry)) { + case AF_INET: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4; + break; + case AF_INET6: + table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6; + break; + default: + WARN_ON(1); + return false; + } + + devlink = priv_to_devlink(mlxsw_sp->core); + return devlink_dpipe_table_counter_enabled(devlink, table_name); +} + +static void +mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry)) + return; + + if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index)) + return; + + neigh_entry->counter_valid = true; +} + +static void +mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry) +{ + if (!neigh_entry->counter_valid) + return; + mlxsw_sp_flow_counter_free(mlxsw_sp, + neigh_entry->counter_index); + neigh_entry->counter_valid = false; +} + static struct mlxsw_sp_neigh_entry * mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { @@ -898,6 +1466,7 @@ mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) if (err) goto err_neigh_entry_insert; + mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry); list_add(&neigh_entry->rif_list_node, &rif->neigh_list); return neigh_entry; @@ -912,6 +1481,7 @@ mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry) { list_del(&neigh_entry->rif_list_node); + mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry); mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry); mlxsw_sp_neigh_entry_free(neigh_entry); } @@ -929,8 +1499,15 @@ mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) static void mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp) { - unsigned long interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); + unsigned long interval; +#if IS_ENABLED(CONFIG_IPV6) + interval = min_t(unsigned long, + NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME), + NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME)); +#else + interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME); +#endif mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval); } @@ -965,6 +1542,44 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + struct net_device *dev; + struct neighbour *n; + struct in6_addr dip; + u16 rif; + + mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif, + (char *) &dip); + + if (!mlxsw_sp->router->rifs[rif]) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); + return; + } + + dev = mlxsw_sp->router->rifs[rif]->dev; + n = neigh_lookup(&nd_tbl, &dip, dev); + if (!n) { + netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n", + &dip); + return; + } + + netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); + neigh_event_send(n, NULL); + neigh_release(n); +} +#else +static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ +} +#endif + static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) @@ -988,6 +1603,15 @@ static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp, } +static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + int rec_index) +{ + /* One record contains one entry. */ + mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); +} + static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int rec_index) { @@ -997,7 +1621,8 @@ static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp, rec_index); break; case MLXSW_REG_RAUHTD_TYPE_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl, + rec_index); break; } } @@ -1022,22 +1647,20 @@ static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl) return false; } -static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +static int +__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp, + char *rauhtd_pl, + enum mlxsw_reg_rauhtd_type type) { - char *rauhtd_pl; - u8 num_rec; - int i, err; - - rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); - if (!rauhtd_pl) - return -ENOMEM; + int i, num_rec; + int err; /* Make sure the neighbour's netdev isn't removed in the * process. */ rtnl_lock(); do { - mlxsw_reg_rauhtd_pack(rauhtd_pl, MLXSW_REG_RAUHTD_TYPE_IPV4); + mlxsw_reg_rauhtd_pack(rauhtd_pl, type); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd), rauhtd_pl); if (err) { @@ -1051,6 +1674,27 @@ static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) } while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl)); rtnl_unlock(); + return err; +} + +static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_rauhtd_type type; + char *rauhtd_pl; + int err; + + rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL); + if (!rauhtd_pl) + return -ENOMEM; + + type = MLXSW_REG_RAUHTD_TYPE_IPV4; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); + if (err) + goto out; + + type = MLXSW_REG_RAUHTD_TYPE_IPV6; + err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type); +out: kfree(rauhtd_pl); return err; } @@ -1143,9 +1787,43 @@ mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, dip); + if (neigh_entry->counter_valid) + mlxsw_reg_rauht_pack_counter(rauht_pl, + neigh_entry->counter_index); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); +} + +static void +mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + enum mlxsw_reg_rauht_op op) +{ + struct neighbour *n = neigh_entry->key.n; + char rauht_pl[MLXSW_REG_RAUHT_LEN]; + const char *dip = n->primary_key; + + mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha, + dip); + if (neigh_entry->counter_valid) + mlxsw_reg_rauht_pack_counter(rauht_pl, + neigh_entry->counter_index); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); } +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry) +{ + struct neighbour *n = neigh_entry->key.n; + + /* Packets with a link-local destination address are trapped + * after LPM lookup and never reach the neighbour table, so + * there is no need to program such neighbours to the device. + */ + if (ipv6_addr_type((struct in6_addr *) &n->primary_key) & + IPV6_ADDR_LINKLOCAL) + return true; + return false; +} + static void mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_neigh_entry *neigh_entry, @@ -1154,11 +1832,29 @@ mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp, if (!adding && !neigh_entry->connected) return; neigh_entry->connected = adding; - if (neigh_entry->key.n->tbl == &arp_tbl) + if (neigh_entry->key.n->tbl->family == AF_INET) { mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry, mlxsw_sp_rauht_op(adding)); - else + } else if (neigh_entry->key.n->tbl->family == AF_INET6) { + if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry)) + return; + mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry, + mlxsw_sp_rauht_op(adding)); + } else { WARN_ON_ONCE(1); + } +} + +void +mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding) +{ + if (adding) + mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry); + else + mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry); + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true); } struct mlxsw_sp_neigh_event_work { @@ -1227,7 +1923,8 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, p = ptr; /* We don't care about changes in the default table. */ - if (!p->dev || p->tbl != &arp_tbl) + if (!p->dev || (p->tbl->family != AF_INET && + p->tbl->family != AF_INET6)) return NOTIFY_DONE; /* We are in atomic context and can't take RTNL mutex, @@ -1246,7 +1943,7 @@ int mlxsw_sp_router_netevent_event(struct notifier_block *unused, case NETEVENT_NEIGH_UPDATE: n = ptr; - if (n->tbl != &arp_tbl) + if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6) return NOTIFY_DONE; mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev); @@ -1307,27 +2004,23 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) rhashtable_destroy(&mlxsw_sp->router->neigh_ht); } -static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif *rif) -{ - char rauht_pl[MLXSW_REG_RAUHT_LEN]; - - mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, - rif->rif_index, rif->addr); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); -} - static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif) { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; - mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, - rif_list_node) + rif_list_node) { + mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false); mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); + } } +enum mlxsw_sp_nexthop_type { + MLXSW_SP_NEXTHOP_TYPE_ETH, + MLXSW_SP_NEXTHOP_TYPE_IPIP, +}; + struct mlxsw_sp_nexthop_key { struct fib_nh *fib_nh; }; @@ -1340,6 +2033,8 @@ struct mlxsw_sp_nexthop { */ struct rhash_head ht_node; struct mlxsw_sp_nexthop_key key; + unsigned char gw_addr[sizeof(struct in6_addr)]; + int ifindex; struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. @@ -1350,17 +2045,18 @@ struct mlxsw_sp_nexthop { update:1; /* set indicates that MAC of this neigh should be * updated in HW */ - struct mlxsw_sp_neigh_entry *neigh_entry; -}; - -struct mlxsw_sp_nexthop_group_key { - struct fib_info *fi; + enum mlxsw_sp_nexthop_type type; + union { + struct mlxsw_sp_neigh_entry *neigh_entry; + struct mlxsw_sp_ipip_entry *ipip_entry; + }; }; struct mlxsw_sp_nexthop_group { + void *priv; struct rhash_head ht_node; struct list_head fib_list; /* list of fib entries that use this group */ - struct mlxsw_sp_nexthop_group_key key; + struct neigh_table *neigh_tbl; u8 adj_index_valid:1, gateway:1; /* routes using the group use a gateway */ u32 adj_index; @@ -1370,15 +2066,154 @@ struct mlxsw_sp_nexthop_group { #define nh_rif nexthops[0].rif }; +static struct fib_info * +mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp) +{ + return nh_grp->priv; +} + +struct mlxsw_sp_nexthop_group_cmp_arg { + enum mlxsw_sp_l3proto proto; + union { + struct fib_info *fi; + struct mlxsw_sp_fib6_entry *fib6_entry; + }; +}; + +static bool +mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp, + const struct in6_addr *gw, int ifindex) +{ + int i; + + for (i = 0; i < nh_grp->count; i++) { + const struct mlxsw_sp_nexthop *nh; + + nh = &nh_grp->nexthops[i]; + if (nh->ifindex == ifindex && + ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr)) + return true; + } + + return false; +} + +static bool +mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + if (nh_grp->count != fib6_entry->nrt6) + return false; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct in6_addr *gw; + int ifindex; + + ifindex = mlxsw_sp_rt6->rt->dst.dev->ifindex; + gw = &mlxsw_sp_rt6->rt->rt6i_gateway; + if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex)) + return false; + } + + return true; +} + +static int +mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr) +{ + const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key; + const struct mlxsw_sp_nexthop_group *nh_grp = ptr; + + switch (cmp_arg->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp); + case MLXSW_SP_L3_PROTO_IPV6: + return !mlxsw_sp_nexthop6_group_cmp(nh_grp, + cmp_arg->fib6_entry); + default: + WARN_ON(1); + return 1; + } +} + +static int +mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp) +{ + return nh_grp->neigh_tbl->family; +} + +static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_sp_nexthop_group *nh_grp = data; + const struct mlxsw_sp_nexthop *nh; + struct fib_info *fi; + unsigned int val; + int i; + + switch (mlxsw_sp_nexthop_group_type(nh_grp)) { + case AF_INET: + fi = mlxsw_sp_nexthop4_group_fi(nh_grp); + return jhash(&fi, sizeof(fi), seed); + case AF_INET6: + val = nh_grp->count; + for (i = 0; i < nh_grp->count; i++) { + nh = &nh_grp->nexthops[i]; + val ^= nh->ifindex; + } + return jhash(&val, sizeof(val), seed); + default: + WARN_ON(1); + return 0; + } +} + +static u32 +mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) +{ + unsigned int val = fib6_entry->nrt6; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + struct net_device *dev; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + dev = mlxsw_sp_rt6->rt->dst.dev; + val ^= dev->ifindex; + } + + return jhash(&val, sizeof(val), seed); +} + +static u32 +mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed) +{ + const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data; + + switch (cmp_arg->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed); + case MLXSW_SP_L3_PROTO_IPV6: + return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed); + default: + WARN_ON(1); + return 0; + } +} + static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { - .key_offset = offsetof(struct mlxsw_sp_nexthop_group, key), .head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node), - .key_len = sizeof(struct mlxsw_sp_nexthop_group_key), + .hashfn = mlxsw_sp_nexthop_group_hash, + .obj_hashfn = mlxsw_sp_nexthop_group_hash_obj, + .obj_cmpfn = mlxsw_sp_nexthop_group_cmp, }; static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && + !nh_grp->gateway) + return 0; + return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht, &nh_grp->ht_node, mlxsw_sp_nexthop_group_ht_params); @@ -1387,16 +2222,38 @@ static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 && + !nh_grp->gateway) + return; + rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht, &nh_grp->ht_node, mlxsw_sp_nexthop_group_ht_params); } static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop_group_lookup(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group_key key) +mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp, + struct fib_info *fi) +{ + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4; + cmp_arg.fi = fi; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, + mlxsw_sp_nexthop_group_ht_params); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) { - return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, &key, + struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg; + + cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6; + cmp_arg.fib6_entry = fib6_entry; + return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht, + &cmp_arg, mlxsw_sp_nexthop_group_ht_params); } @@ -1473,15 +2330,26 @@ static int mlxsw_sp_nexthop_mac_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, char ratr_pl[MLXSW_REG_RATR_LEN]; mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, adj_index, neigh_entry->rif); + true, MLXSW_REG_RATR_TYPE_ETHERNET, + adj_index, neigh_entry->rif); mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); } +static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, + struct mlxsw_sp_nexthop *nh) +{ + const struct mlxsw_sp_ipip_ops *ipip_ops; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt]; + return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry); +} + static int -mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - bool reallocate) +mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + bool reallocate) { u32 adj_index = nh_grp->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; @@ -1497,8 +2365,16 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, } if (nh->update || reallocate) { - err = mlxsw_sp_nexthop_mac_update(mlxsw_sp, - adj_index, nh); + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + err = mlxsw_sp_nexthop_mac_update + (mlxsw_sp, adj_index, nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + err = mlxsw_sp_nexthop_ipip_update + (mlxsw_sp, adj_index, nh); + break; + } if (err) return err; nh->update = 0; @@ -1509,9 +2385,6 @@ mlxsw_sp_nexthop_group_mac_update(struct mlxsw_sp *mlxsw_sp, return 0; } -static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry); - static bool mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, const struct mlxsw_sp_fib_entry *fib_entry); @@ -1535,6 +2408,24 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp, } static void +mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op, int err); + +static void +mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp) +{ + enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE; + struct mlxsw_sp_fib_entry *fib_entry; + + list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { + if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node, + fib_entry)) + continue; + mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0); + } +} + +static void mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { @@ -1556,7 +2447,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - if (nh->should_offload ^ nh->offloaded) { + if (nh->should_offload != nh->offloaded) { offload_change = true; if (nh->should_offload) nh->update = 1; @@ -1568,8 +2459,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, /* Nothing was added or removed, so no need to reallocate. Just * update MAC on existing adjacency indexes. */ - err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, - false); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; @@ -1596,7 +2486,7 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, nh_grp->adj_index_valid = 1; nh_grp->adj_index = adj_index; nh_grp->ecmp_size = ecmp_size; - err = mlxsw_sp_nexthop_group_mac_update(mlxsw_sp, nh_grp, true); + err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; @@ -1621,6 +2511,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n"); goto set_trap; } + + /* Offload state within the group changed, so update the flags. */ + mlxsw_sp_nexthop_fib_entries_refresh(nh_grp); + return; set_trap: @@ -1640,9 +2534,9 @@ set_trap: static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, bool removing) { - if (!removing && !nh->should_offload) + if (!removing) nh->should_offload = 1; - else if (removing && nh->offloaded) + else if (nh->offloaded) nh->should_offload = 0; nh->update = 1; } @@ -1684,7 +2578,6 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh) { struct mlxsw_sp_neigh_entry *neigh_entry; - struct fib_nh *fib_nh = nh->key.fib_nh; struct neighbour *n; u8 nud_state, dead; int err; @@ -1693,13 +2586,14 @@ static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, return 0; /* Take a reference of neigh here ensuring that neigh would - * not be detructed before the nexthop entry is finished. + * not be destructed before the nexthop entry is finished. * The reference is taken either in neigh_lookup() or * in neigh_create() in case n is not found. */ - n = neigh_lookup(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev); if (!n) { - n = neigh_create(&arp_tbl, &fib_nh->nh_gw, fib_nh->nh_dev); + n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr, + nh->rif->dev); if (IS_ERR(n)) return PTR_ERR(n); neigh_event_send(n, NULL); @@ -1761,18 +2655,131 @@ static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp, neigh_release(n); } -static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp, - struct mlxsw_sp_nexthop *nh, - struct fib_nh *fib_nh) +static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev, + enum mlxsw_sp_ipip_type *p_type) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + const struct mlxsw_sp_ipip_ops *ipip_ops; + enum mlxsw_sp_ipip_type ipipt; + + for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) { + ipip_ops = router->ipip_ops_arr[ipipt]; + if (dev->type == ipip_ops->dev_type) { + if (p_type) + *p_type = ipipt; + return true; + } + } + return false; +} + +static int mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_ipip_type ipipt, + struct mlxsw_sp_nexthop *nh, + struct net_device *ol_dev) +{ + if (!nh->nh_grp->gateway || nh->ipip_entry) + return 0; + + nh->ipip_entry = mlxsw_sp_ipip_entry_get(mlxsw_sp, ipipt, ol_dev); + if (IS_ERR(nh->ipip_entry)) + return PTR_ERR(nh->ipip_entry); + + __mlxsw_sp_nexthop_neigh_update(nh, false); + return 0; +} + +static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry; + + if (!ipip_entry) + return; + + __mlxsw_sp_nexthop_neigh_update(nh, true); + mlxsw_sp_ipip_entry_put(mlxsw_sp, ipip_entry); + nh->ipip_entry = NULL; +} + +static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct fib_nh *fib_nh, + enum mlxsw_sp_ipip_type *p_ipipt) { struct net_device *dev = fib_nh->nh_dev; - struct in_device *in_dev; + + return dev && + fib_nh->nh_parent->fib_type == RTN_UNICAST && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt); +} + +static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + switch (nh->type) { + case MLXSW_SP_NEXTHOP_TYPE_ETH: + mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop_rif_fini(nh); + break; + case MLXSW_SP_NEXTHOP_TYPE_IPIP: + mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh); + break; + } +} + +static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct net_device *dev = fib_nh->nh_dev; + enum mlxsw_sp_ipip_type ipipt; struct mlxsw_sp_rif *rif; int err; + if (mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fib_nh, &ipipt) && + router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV4)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + } + + nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + mlxsw_sp_nexthop_rif_init(nh, rif); + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + if (err) + goto err_neigh_init; + + return 0; + +err_neigh_init: + mlxsw_sp_nexthop_rif_fini(nh); + return err; +} + +static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); +} + +static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + struct fib_nh *fib_nh) +{ + struct net_device *dev = fib_nh->nh_dev; + struct in_device *in_dev; + int err; + nh->nh_grp = nh_grp; nh->key.fib_nh = fib_nh; + memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw)); err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; @@ -1785,37 +2792,29 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, fib_nh->nh_flags & RTNH_F_LINKDOWN) return 0; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - return 0; - mlxsw_sp_nexthop_rif_init(nh, rif); - - err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); if (err) goto err_nexthop_neigh_init; return 0; err_nexthop_neigh_init: - mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); return err; } -static void mlxsw_sp_nexthop_fini(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop *nh) +static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) { - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); } -static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, - unsigned long event, struct fib_nh *fib_nh) +static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp, + unsigned long event, struct fib_nh *fib_nh) { struct mlxsw_sp_nexthop_key key; struct mlxsw_sp_nexthop *nh; - struct mlxsw_sp_rif *rif; if (mlxsw_sp->router->aborted) return; @@ -1825,18 +2824,12 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, if (WARN_ON_ONCE(!nh)) return; - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); - if (!rif) - return; - switch (event) { case FIB_EVENT_NH_ADD: - mlxsw_sp_nexthop_rif_init(nh, rif); - mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); break; case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh); break; } @@ -1849,14 +2842,20 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, *tmp; list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { - mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); - mlxsw_sp_nexthop_rif_fini(nh); + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); } } +static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, + const struct fib_info *fi) +{ + return fi->fib_nh->nh_scope == RT_SCOPE_LINK || + mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL); +} + static struct mlxsw_sp_nexthop_group * -mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) +mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) { struct mlxsw_sp_nexthop_group *nh_grp; struct mlxsw_sp_nexthop *nh; @@ -1870,17 +2869,19 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) nh_grp = kzalloc(alloc_size, GFP_KERNEL); if (!nh_grp) return ERR_PTR(-ENOMEM); + nh_grp->priv = fi; INIT_LIST_HEAD(&nh_grp->fib_list); - nh_grp->gateway = fi->fib_nh->nh_scope == RT_SCOPE_LINK; + nh_grp->neigh_tbl = &arp_tbl; + + nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi); nh_grp->count = fi->fib_nhs; - nh_grp->key.fi = fi; fib_info_hold(fi); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; fib_nh = &fi->fib_nh[i]; - err = mlxsw_sp_nexthop_init(mlxsw_sp, nh_grp, nh, fib_nh); + err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh); if (err) - goto err_nexthop_init; + goto err_nexthop4_init; } err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); if (err) @@ -1889,19 +2890,19 @@ mlxsw_sp_nexthop_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi) return nh_grp; err_nexthop_group_insert: -err_nexthop_init: +err_nexthop4_init: for (i--; i >= 0; i--) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } - fib_info_put(nh_grp->key.fi); + fib_info_put(fi); kfree(nh_grp); return ERR_PTR(err); } static void -mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_nexthop_group *nh_grp) +mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) { struct mlxsw_sp_nexthop *nh; int i; @@ -1909,25 +2910,23 @@ mlxsw_sp_nexthop_group_destroy(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); for (i = 0; i < nh_grp->count; i++) { nh = &nh_grp->nexthops[i]; - mlxsw_sp_nexthop_fini(mlxsw_sp, nh); + mlxsw_sp_nexthop4_fini(mlxsw_sp, nh); } mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); WARN_ON_ONCE(nh_grp->adj_index_valid); - fib_info_put(nh_grp->key.fi); + fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp)); kfree(nh_grp); } -static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - struct fib_info *fi) +static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + struct fib_info *fi) { - struct mlxsw_sp_nexthop_group_key key; struct mlxsw_sp_nexthop_group *nh_grp; - key.fi = fi; - nh_grp = mlxsw_sp_nexthop_group_lookup(mlxsw_sp, key); + nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi); if (!nh_grp) { - nh_grp = mlxsw_sp_nexthop_group_create(mlxsw_sp, fi); + nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi); if (IS_ERR(nh_grp)) return PTR_ERR(nh_grp); } @@ -1936,15 +2935,25 @@ static int mlxsw_sp_nexthop_group_get(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; list_del(&fib_entry->nexthop_group_node); if (!list_empty(&nh_grp->fib_list)) return; - mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); + mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp); +} + +static bool +mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib4_entry *fib4_entry; + + fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry, + common); + return !fib4_entry->tos; } static bool @@ -1952,29 +2961,133 @@ mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group; - if (fib_entry->params.tos) - return false; + switch (fib_entry->fib_node->fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + if (!mlxsw_sp_fib4_entry_should_offload(fib_entry)) + return false; + break; + case MLXSW_SP_L3_PROTO_IPV6: + break; + } switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: return !!nh_group->adj_index_valid; case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: return !!nh_group->nh_rif; + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + return true; default: return false; } } -static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +static struct mlxsw_sp_nexthop * +mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, + const struct mlxsw_sp_rt6 *mlxsw_sp_rt6) +{ + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + if (nh->rif && nh->rif->dev == rt->dst.dev && + ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, + &rt->rt6i_gateway)) + return nh; + continue; + } + + return NULL; +} + +static void +mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + int i; + + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || + fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) { + nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; + return; + } + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + if (nh->offloaded) + nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; + else + nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) { - fib_entry->offloaded = true; + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + int i; + + for (i = 0; i < nh_grp->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; + + nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL) { + list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, + list)->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + return; + } + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + struct mlxsw_sp_nexthop *nh; + + nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); + if (nh && nh->offloaded) + mlxsw_sp_rt6->rt->rt6i_nh_flags |= RTNH_F_OFFLOAD; + else + mlxsw_sp_rt6->rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void +mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry, + common); + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + rt->rt6i_nh_flags &= ~RTNH_F_OFFLOAD; + } +} + +static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) +{ switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - fib_info_offload_inc(fib_entry->nh_group->key.fi); + mlxsw_sp_fib4_entry_offload_set(fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_entry_offload_set(fib_entry); + break; } } @@ -1983,13 +3096,12 @@ mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) { switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: - fib_info_offload_dec(fib_entry->nh_group->key.fi); + mlxsw_sp_fib4_entry_offload_unset(fib_entry); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_entry_offload_unset(fib_entry); + break; } - - fib_entry->offloaded = false; } static void @@ -1998,17 +3110,13 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, { switch (op) { case MLXSW_REG_RALUE_OP_WRITE_DELETE: - if (!fib_entry->offloaded) - return; return mlxsw_sp_fib_entry_offload_unset(fib_entry); case MLXSW_REG_RALUE_OP_WRITE_WRITE: if (err) return; - if (mlxsw_sp_fib_entry_should_offload(fib_entry) && - !fib_entry->offloaded) + if (mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_set(fib_entry); - else if (!mlxsw_sp_fib_entry_should_offload(fib_entry) && - fib_entry->offloaded) + else if (!mlxsw_sp_fib_entry_should_offload(fib_entry)) mlxsw_sp_fib_entry_offload_unset(fib_entry); return; default: @@ -2016,13 +3124,37 @@ mlxsw_sp_fib_entry_offload_refresh(struct mlxsw_sp_fib_entry *fib_entry, } } -static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static void +mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl, + const struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - char ralue_pl[MLXSW_REG_RALUE_LEN]; struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; + enum mlxsw_reg_ralxx_protocol proto; + u32 *p_dip; + + proto = (enum mlxsw_reg_ralxx_protocol) fib->proto; + + switch (fib->proto) { + case MLXSW_SP_L3_PROTO_IPV4: + p_dip = (u32 *) fib_entry->fib_node->key.addr; + mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + *p_dip); + break; + case MLXSW_SP_L3_PROTO_IPV6: + mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id, + fib_entry->fib_node->key.prefix_len, + fib_entry->fib_node->key.addr); + break; + } +} + +static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + char ralue_pl[MLXSW_REG_RALUE_LEN]; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -2041,24 +3173,19 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; u16 trap_id = 0; u16 rif_index = 0; @@ -2070,42 +3197,53 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; char ralue_pl[MLXSW_REG_RALUE_LEN]; - u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) fib->proto, op, - fib->vr->id, fib_entry->fib_node->key.prefix_len, - *p_dip); + mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } -static int mlxsw_sp_fib_entry_op4(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, - enum mlxsw_reg_ralue_op op) +static int +mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) +{ + struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry; + const struct mlxsw_sp_ipip_ops *ipip_ops; + + if (WARN_ON(!ipip_entry)) + return -EINVAL; + + ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]; + return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op, + fib_entry->decap.tunnel_index); +} + +static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + enum mlxsw_reg_ralue_op op) { switch (fib_entry->type) { case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE: - return mlxsw_sp_fib_entry_op4_remote(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL: - return mlxsw_sp_fib_entry_op4_local(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op); case MLXSW_SP_FIB_ENTRY_TYPE_TRAP: - return mlxsw_sp_fib_entry_op4_trap(mlxsw_sp, fib_entry, op); + return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op); + case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP: + return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp, + fib_entry, op); } return -EINVAL; } @@ -2114,16 +3252,10 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - int err = -EINVAL; + int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op); - switch (fib_entry->fib_node->fib->proto) { - case MLXSW_SP_L3_PROTO_IPV4: - err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); - break; - case MLXSW_SP_L3_PROTO_IPV6: - return err; - } mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, err); + return err; } @@ -2146,11 +3278,23 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { + union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) }; + struct net_device *dev = fen_info->fi->fib_dev; + struct mlxsw_sp_ipip_entry *ipip_entry; struct fib_info *fi = fen_info->fi; switch (fen_info->type) { - case RTN_BROADCAST: /* fall through */ case RTN_LOCAL: + ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV4, dip); + if (ipip_entry) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP; + return mlxsw_sp_fib_entry_decap_init(mlxsw_sp, + fib_entry, + ipip_entry); + } + /* fall through */ + case RTN_BROADCAST: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; case RTN_UNREACHABLE: /* fall through */ @@ -2163,82 +3307,87 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; return 0; case RTN_UNICAST: - if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - else + if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi)) fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; return 0; default: return -EINVAL; } } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, const struct fib_entry_notifier_info *fen_info) { + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_entry *fib_entry; int err; - fib_entry = kzalloc(sizeof(*fib_entry), GFP_KERNEL); - if (!fib_entry) { - err = -ENOMEM; - goto err_fib_entry_alloc; - } + fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL); + if (!fib4_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib4_entry->common; err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry); if (err) goto err_fib4_entry_type_set; - err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fen_info->fi); + err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi); if (err) - goto err_nexthop_group_get; + goto err_nexthop4_group_get; - fib_entry->params.prio = fen_info->fi->fib_priority; - fib_entry->params.tb_id = fen_info->tb_id; - fib_entry->params.type = fen_info->type; - fib_entry->params.tos = fen_info->tos; + fib4_entry->prio = fen_info->fi->fib_priority; + fib4_entry->tb_id = fen_info->tb_id; + fib4_entry->type = fen_info->type; + fib4_entry->tos = fen_info->tos; fib_entry->fib_node = fib_node; - return fib_entry; + return fib4_entry; -err_nexthop_group_get: +err_nexthop4_group_get: err_fib4_entry_type_set: - kfree(fib_entry); -err_fib_entry_alloc: + kfree(fib4_entry); return ERR_PTR(err); } static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); - kfree(fib_entry); + mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common); + kfree(fib4_entry); } -static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info); - -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); - if (IS_ERR(fib_node)) + vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id); + if (!vr) return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id == fen_info->tb_id && - fib_entry->params.tos == fen_info->tos && - fib_entry->params.type == fen_info->type && - fib_entry->nh_group->key.fi == fen_info->fi) { - return fib_entry; + fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len); + if (!fib_node) + return NULL; + + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id == fen_info->tb_id && + fib4_entry->tos == fen_info->tos && + fib4_entry->type == fen_info->type && + mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) == + fen_info->fi) { + return fib4_entry; } } @@ -2311,6 +3460,67 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, struct mlxsw_sp_fib_entry, list) == fib_entry; } +static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + /* Since the tree is shared between all virtual routers we must + * make sure it contains all the required prefix lengths. This + * can be computed by either adding the new prefix length to the + * existing prefix usage of a bound tree, or by aggregating the + * prefix lengths across all virtual routers and adding the new + * one as well. + */ + if (fib->lpm_tree) + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, + &fib->lpm_tree->prefix_usage); + else + mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); + + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + return PTR_ERR(lpm_tree); + + if (fib->lpm_tree && fib->lpm_tree->id == lpm_tree->id) + return 0; + + err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); + if (err) + return err; + + return 0; +} + +static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } }; + struct mlxsw_sp_lpm_tree *lpm_tree; + + /* Aggregate prefix lengths across all virtual routers to make + * sure we only have used prefix lengths in the LPM tree. + */ + mlxsw_sp_vrs_prefixes(mlxsw_sp, fib->proto, &req_prefix_usage); + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + goto err_tree_get; + mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree); + +err_tree_get: + if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) + return; + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); + mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree); + fib->lpm_tree = NULL; +} + static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) { unsigned char prefix_len = fib_node->key.prefix_len; @@ -2333,8 +3543,6 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node, struct mlxsw_sp_fib *fib) { - struct mlxsw_sp_prefix_usage req_prefix_usage; - struct mlxsw_sp_lpm_tree *lpm_tree; int err; err = mlxsw_sp_fib_node_insert(fib, fib_node); @@ -2342,33 +3550,15 @@ static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, return err; fib_node->fib = fib; - mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage); - mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); - - if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { - err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, - &req_prefix_usage); - if (err) - goto err_tree_check; - } else { - lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, - fib->proto); - if (IS_ERR(lpm_tree)) - return PTR_ERR(lpm_tree); - fib->lpm_tree = lpm_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); - if (err) - goto err_tree_bind; - } + err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib, fib_node); + if (err) + goto err_fib_lpm_tree_link; mlxsw_sp_fib_node_prefix_inc(fib_node); return 0; -err_tree_bind: - fib->lpm_tree = NULL; - mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); -err_tree_check: +err_fib_lpm_tree_link: fib_node->fib = NULL; mlxsw_sp_fib_node_remove(fib, fib_node); return err; @@ -2377,46 +3567,34 @@ err_tree_check: static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree; struct mlxsw_sp_fib *fib = fib_node->fib; mlxsw_sp_fib_node_prefix_dec(fib_node); - - if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { - mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); - fib->lpm_tree = NULL; - mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); - } else { - mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage); - } - + mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib); fib_node->fib = NULL; mlxsw_sp_fib_node_remove(fib, fib_node); } static struct mlxsw_sp_fib_node * -mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, - const struct fib_entry_notifier_info *fen_info) +mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr, + size_t addr_len, unsigned char prefix_len, + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_fib_node *fib_node; struct mlxsw_sp_fib *fib; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id); + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id); if (IS_ERR(vr)) return ERR_CAST(vr); - fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); + fib = mlxsw_sp_vr_fib(vr, proto); - fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len); if (fib_node) return fib_node; - fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst, - sizeof(fen_info->dst), - fen_info->dst_len); + fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len); if (!fib_node) { err = -ENOMEM; goto err_fib_node_create; @@ -2435,8 +3613,8 @@ err_fib_node_create: return ERR_PTR(err); } -static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_node *fib_node) +static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) { struct mlxsw_sp_vr *vr = fib_node->fib->vr; @@ -2447,95 +3625,100 @@ static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(vr); } -static struct mlxsw_sp_fib_entry * +static struct mlxsw_sp_fib4_entry * mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, - const struct mlxsw_sp_fib_entry_params *params) + const struct mlxsw_sp_fib4_entry *new4_entry) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; - list_for_each_entry(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id > params->tb_id) + list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) { + if (fib4_entry->tb_id > new4_entry->tb_id) continue; - if (fib_entry->params.tb_id != params->tb_id) + if (fib4_entry->tb_id != new4_entry->tb_id) break; - if (fib_entry->params.tos > params->tos) + if (fib4_entry->tos > new4_entry->tos) continue; - if (fib_entry->params.prio >= params->prio || - fib_entry->params.tos < params->tos) - return fib_entry; + if (fib4_entry->prio >= new4_entry->prio || + fib4_entry->tos < new4_entry->tos) + return fib4_entry; } return NULL; } -static int mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib_entry *fib_entry, - struct mlxsw_sp_fib_entry *new_entry) +static int +mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry, + struct mlxsw_sp_fib4_entry *new4_entry) { struct mlxsw_sp_fib_node *fib_node; - if (WARN_ON(!fib_entry)) + if (WARN_ON(!fib4_entry)) return -EINVAL; - fib_node = fib_entry->fib_node; - list_for_each_entry_from(fib_entry, &fib_node->entry_list, list) { - if (fib_entry->params.tb_id != new_entry->params.tb_id || - fib_entry->params.tos != new_entry->params.tos || - fib_entry->params.prio != new_entry->params.prio) + fib_node = fib4_entry->common.fib_node; + list_for_each_entry_from(fib4_entry, &fib_node->entry_list, + common.list) { + if (fib4_entry->tb_id != new4_entry->tb_id || + fib4_entry->tos != new4_entry->tos || + fib4_entry->prio != new4_entry->prio) break; } - list_add_tail(&new_entry->list, &fib_entry->list); + list_add_tail(&new4_entry->common.list, &fib4_entry->common.list); return 0; } static int -mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *new_entry, +mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *fib4_entry; - fib_entry = mlxsw_sp_fib4_node_entry_find(fib_node, &new_entry->params); + fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry); if (append) - return mlxsw_sp_fib4_node_list_append(fib_entry, new_entry); - if (replace && WARN_ON(!fib_entry)) + return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry); + if (replace && WARN_ON(!fib4_entry)) return -EINVAL; /* Insert new entry before replaced one, so that we can later * remove the second. */ - if (fib_entry) { - list_add_tail(&new_entry->list, &fib_entry->list); + if (fib4_entry) { + list_add_tail(&new4_entry->common.list, + &fib4_entry->common.list); } else { - struct mlxsw_sp_fib_entry *last; + struct mlxsw_sp_fib4_entry *last; - list_for_each_entry(last, &fib_node->entry_list, list) { - if (new_entry->params.tb_id > last->params.tb_id) + list_for_each_entry(last, &fib_node->entry_list, common.list) { + if (new4_entry->tb_id > last->tb_id) break; - fib_entry = last; + fib4_entry = last; } - if (fib_entry) - list_add(&new_entry->list, &fib_entry->list); + if (fib4_entry) + list_add(&new4_entry->common.list, + &fib4_entry->common.list); else - list_add(&new_entry->list, &fib_node->entry_list); + list_add(&new4_entry->common.list, + &fib_node->entry_list); } return 0; } static void -mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib_entry *fib_entry) +mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry) { - list_del(&fib_entry->list); + list_del(&fib4_entry->common.list); } -static int -mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return 0; @@ -2552,11 +3735,11 @@ mlxsw_sp_fib4_node_entry_add(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); } -static void -mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_fib_node *fib_node, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { + struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry)) return; @@ -2574,54 +3757,53 @@ mlxsw_sp_fib4_node_entry_del(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace, bool append) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; int err; - err = mlxsw_sp_fib4_node_list_insert(fib_node, fib_entry, replace, - append); + err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append); if (err) return err; - err = mlxsw_sp_fib4_node_entry_add(mlxsw_sp, fib_node, fib_entry); + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common); if (err) - goto err_fib4_node_entry_add; + goto err_fib_node_entry_add; return 0; -err_fib4_node_entry_add: - mlxsw_sp_fib4_node_list_remove(fib_entry); +err_fib_node_entry_add: + mlxsw_sp_fib4_node_list_remove(fib4_entry); return err; } static void mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) + struct mlxsw_sp_fib4_entry *fib4_entry) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; + mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common); + mlxsw_sp_fib4_node_list_remove(fib4_entry); - mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); - mlxsw_sp_fib4_node_list_remove(fib_entry); + if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP) + mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common); } static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry, + struct mlxsw_sp_fib4_entry *fib4_entry, bool replace) { - struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - struct mlxsw_sp_fib_entry *replaced; + struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node; + struct mlxsw_sp_fib4_entry *replaced; if (!replace) return; /* We inserted the new entry before replaced one */ - replaced = list_next_entry(fib_entry, list); + replaced = list_next_entry(fib4_entry, common.list); mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced); mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } static int @@ -2629,76 +3811,774 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info, bool replace, bool append) { - struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib4_entry *fib4_entry; struct mlxsw_sp_fib_node *fib_node; int err; if (mlxsw_sp->router->aborted) return 0; - fib_node = mlxsw_sp_fib4_node_get(mlxsw_sp, fen_info); + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id, + &fen_info->dst, sizeof(fen_info->dst), + fen_info->dst_len, + MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(fib_node)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n"); return PTR_ERR(fib_node); } - fib_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); - if (IS_ERR(fib_entry)) { + fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info); + if (IS_ERR(fib4_entry)) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n"); - err = PTR_ERR(fib_entry); + err = PTR_ERR(fib4_entry); goto err_fib4_entry_create; } - err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib_entry, replace, + err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace, append); if (err) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n"); goto err_fib4_node_entry_link; } - mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib_entry, replace); + mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace); return 0; err_fib4_node_entry_link: - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); err_fib4_entry_create: - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); return err; } static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, struct fib_entry_notifier_info *fen_info) { + struct mlxsw_sp_fib4_entry *fib4_entry; + struct mlxsw_sp_fib_node *fib_node; + + if (mlxsw_sp->router->aborted) + return; + + fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); + if (WARN_ON(!fib4_entry)) + return; + fib_node = fib4_entry->common.fib_node; + + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static bool mlxsw_sp_fib6_rt_should_ignore(const struct rt6_info *rt) +{ + /* Packets with link-local destination IP arriving to the router + * are trapped to the CPU, so no need to program specific routes + * for them. + */ + if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LINKLOCAL) + return true; + + /* Multicast routes aren't supported, so ignore them. Neighbour + * Discovery packets are specifically trapped. + */ + if (ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_MULTICAST) + return true; + + /* Cloned routes are irrelevant in the forwarding path. */ + if (rt->rt6i_flags & RTF_CACHE) + return true; + + return false; +} + +static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL); + if (!mlxsw_sp_rt6) + return ERR_PTR(-ENOMEM); + + /* In case of route replace, replaced route is deleted with + * no notification. Take reference to prevent accessing freed + * memory. + */ + mlxsw_sp_rt6->rt = rt; + rt6_hold(rt); + + return mlxsw_sp_rt6; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void mlxsw_sp_rt6_release(struct rt6_info *rt) +{ + rt6_release(rt); +} +#else +static void mlxsw_sp_rt6_release(struct rt6_info *rt) +{ +} +#endif + +static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) +{ + mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt); + kfree(mlxsw_sp_rt6); +} + +static bool mlxsw_sp_fib6_rt_can_mp(const struct rt6_info *rt) +{ + /* RTF_CACHE routes are ignored */ + return (rt->rt6i_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; +} + +static struct rt6_info * +mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry) +{ + return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, + list)->rt; +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node, + const struct rt6_info *nrt, bool replace) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + + if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace) + return NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same + * virtual router. + */ + if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + continue; + if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + break; + if (rt->rt6i_metric < nrt->rt6i_metric) + continue; + if (rt->rt6i_metric == nrt->rt6i_metric && + mlxsw_sp_fib6_rt_can_mp(rt)) + return fib6_entry; + if (rt->rt6i_metric > nrt->rt6i_metric) + break; + } + + return NULL; +} + +static struct mlxsw_sp_rt6 * +mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry, + const struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + if (mlxsw_sp_rt6->rt == rt) + return mlxsw_sp_rt6; + } + + return NULL; +} + +static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt, + enum mlxsw_sp_ipip_type *ret) +{ + return rt->dst.dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->dst.dev, ret); +} + +static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + const struct rt6_info *rt) +{ + struct mlxsw_sp_router *router = mlxsw_sp->router; + struct net_device *dev = rt->dst.dev; + enum mlxsw_sp_ipip_type ipipt; + struct mlxsw_sp_rif *rif; + int err; + + if (mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, &ipipt) && + router->ipip_ops_arr[ipipt]->can_offload(mlxsw_sp, dev, + MLXSW_SP_L3_PROTO_IPV6)) { + nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP; + return mlxsw_sp_nexthop_ipip_init(mlxsw_sp, ipipt, nh, dev); + } + + nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH; + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + mlxsw_sp_nexthop_rif_init(nh, rif); + + err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); + if (err) + goto err_nexthop_neigh_init; + + return 0; + +err_nexthop_neigh_init: + mlxsw_sp_nexthop_rif_fini(nh); + return err; +} + +static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); +} + +static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp, + struct mlxsw_sp_nexthop *nh, + const struct rt6_info *rt) +{ + struct net_device *dev = rt->dst.dev; + + nh->nh_grp = nh_grp; + memcpy(&nh->gw_addr, &rt->rt6i_gateway, sizeof(nh->gw_addr)); + + if (!dev) + return 0; + nh->ifindex = dev->ifindex; + + return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt); +} + +static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh) +{ + mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh); +} + +static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt) +{ + return rt->rt6i_flags & RTF_GATEWAY || + mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); +} + +static struct mlxsw_sp_nexthop_group * +mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + struct mlxsw_sp_nexthop *nh; + size_t alloc_size; + int i = 0; + int err; + + alloc_size = sizeof(*nh_grp) + + fib6_entry->nrt6 * sizeof(struct mlxsw_sp_nexthop); + nh_grp = kzalloc(alloc_size, GFP_KERNEL); + if (!nh_grp) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&nh_grp->fib_list); +#if IS_ENABLED(CONFIG_IPV6) + nh_grp->neigh_tbl = &nd_tbl; +#endif + mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list, + struct mlxsw_sp_rt6, list); + nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt); + nh_grp->count = fib6_entry->nrt6; + for (i = 0; i < nh_grp->count; i++) { + struct rt6_info *rt = mlxsw_sp_rt6->rt; + + nh = &nh_grp->nexthops[i]; + err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt); + if (err) + goto err_nexthop6_init; + mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list); + } + + err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp); + if (err) + goto err_nexthop_group_insert; + + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + return nh_grp; + +err_nexthop_group_insert: +err_nexthop6_init: + for (i--; i >= 0; i--) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + kfree(nh_grp); + return ERR_PTR(err); +} + +static void +mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop_group *nh_grp) +{ + struct mlxsw_sp_nexthop *nh; + int i = nh_grp->count; + + mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp); + for (i--; i >= 0; i--) { + nh = &nh_grp->nexthops[i]; + mlxsw_sp_nexthop6_fini(mlxsw_sp, nh); + } + mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp); + WARN_ON(nh_grp->adj_index_valid); + kfree(nh_grp); +} + +static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp; + + nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry); + if (!nh_grp) { + nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry); + if (IS_ERR(nh_grp)) + return PTR_ERR(nh_grp); + } + + list_add_tail(&fib6_entry->common.nexthop_group_node, + &nh_grp->fib_list); + fib6_entry->common.nh_group = nh_grp; + + return 0; +} + +static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) +{ + struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + + list_del(&fib_entry->nexthop_group_node); + if (!list_empty(&nh_grp->fib_list)) + return; + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp); +} + +static int +mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group; + int err; + + fib6_entry->common.nh_group = NULL; + list_del(&fib6_entry->common.nexthop_group_node); + + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_get; + + /* In case this entry is offloaded, then the adjacency index + * currently associated with it in the device's table is that + * of the old group. Start using the new one instead. + */ + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_node_entry_add; + + if (list_empty(&old_nh_grp->fib_list)) + mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp); + + return 0; + +err_fib_node_entry_add: + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); +err_nexthop6_group_get: + list_add_tail(&fib6_entry->common.nexthop_group_node, + &old_nh_grp->fib_list); + fib6_entry->common.nh_group = old_nh_grp; + return err; +} + +static int +mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err; + + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); + if (IS_ERR(mlxsw_sp_rt6)) + return PTR_ERR(mlxsw_sp_rt6); + + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6++; + + err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_update; + + return 0; + +err_nexthop6_group_update: + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + return err; +} + +static void +mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + struct rt6_info *rt) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + + mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt); + if (WARN_ON(!mlxsw_sp_rt6)) + return; + + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); +} + +static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry, + const struct rt6_info *rt) +{ + /* Packets hitting RTF_REJECT routes need to be discarded by the + * stack. We can rely on their destination device not having a + * RIF (it's the loopback device) and can thus use action type + * local, which will cause them to be trapped with a lower + * priority than packets that need to be locally received. + */ + if (rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; + else if (rt->rt6i_flags & RTF_REJECT) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt)) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; +} + +static void +mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp; + + list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list, + list) { + fib6_entry->nrt6--; + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); + } +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_rt6 *mlxsw_sp_rt6; + int err; + + fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL); + if (!fib6_entry) + return ERR_PTR(-ENOMEM); + fib_entry = &fib6_entry->common; + + mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt); + if (IS_ERR(mlxsw_sp_rt6)) { + err = PTR_ERR(mlxsw_sp_rt6); + goto err_rt6_create; + } + + mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, mlxsw_sp_rt6->rt); + + INIT_LIST_HEAD(&fib6_entry->rt6_list); + list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list); + fib6_entry->nrt6 = 1; + err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry); + if (err) + goto err_nexthop6_group_get; + + fib_entry->fib_node = fib_node; + + return fib6_entry; + +err_nexthop6_group_get: + list_del(&mlxsw_sp_rt6->list); + mlxsw_sp_rt6_destroy(mlxsw_sp_rt6); +err_rt6_create: + kfree(fib6_entry); + return ERR_PTR(err); +} + +static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry); + WARN_ON(fib6_entry->nrt6); + kfree(fib6_entry); +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node, + const struct rt6_info *nrt, bool replace) +{ + struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + if (rt->rt6i_table->tb6_id > nrt->rt6i_table->tb6_id) + continue; + if (rt->rt6i_table->tb6_id != nrt->rt6i_table->tb6_id) + break; + if (replace && rt->rt6i_metric == nrt->rt6i_metric) { + if (mlxsw_sp_fib6_rt_can_mp(rt) == + mlxsw_sp_fib6_rt_can_mp(nrt)) + return fib6_entry; + if (mlxsw_sp_fib6_rt_can_mp(nrt)) + fallback = fallback ?: fib6_entry; + } + if (rt->rt6i_metric > nrt->rt6i_metric) + return fallback ?: fib6_entry; + } + + return fallback; +} + +static int +mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry, + bool replace) +{ + struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node; + struct rt6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry); + struct mlxsw_sp_fib6_entry *fib6_entry; + + fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, replace); + + if (replace && WARN_ON(!fib6_entry)) + return -EINVAL; + + if (fib6_entry) { + list_add_tail(&new6_entry->common.list, + &fib6_entry->common.list); + } else { + struct mlxsw_sp_fib6_entry *last; + + list_for_each_entry(last, &fib_node->entry_list, common.list) { + struct rt6_info *rt = mlxsw_sp_fib6_entry_rt(last); + + if (nrt->rt6i_table->tb6_id > rt->rt6i_table->tb6_id) + break; + fib6_entry = last; + } + + if (fib6_entry) + list_add(&new6_entry->common.list, + &fib6_entry->common.list); + else + list_add(&new6_entry->common.list, + &fib_node->entry_list); + } + + return 0; +} + +static void +mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry) +{ + list_del(&fib6_entry->common.list); +} + +static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + bool replace) +{ + int err; + + err = mlxsw_sp_fib6_node_list_insert(fib6_entry, replace); + if (err) + return err; + + err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common); + if (err) + goto err_fib_node_entry_add; + + return 0; + +err_fib_node_entry_add: + mlxsw_sp_fib6_node_list_remove(fib6_entry); + return err; +} + +static void +mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry) +{ + mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common); + mlxsw_sp_fib6_node_list_remove(fib6_entry); +} + +static struct mlxsw_sp_fib6_entry * +mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp, + const struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; + struct mlxsw_sp_vr *vr; + + vr = mlxsw_sp_vr_find(mlxsw_sp, rt->rt6i_table->tb6_id); + if (!vr) + return NULL; + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6); + + fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->rt6i_dst.addr, + sizeof(rt->rt6i_dst.addr), + rt->rt6i_dst.plen); + if (!fib_node) + return NULL; + + list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) { + struct rt6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry); + + if (rt->rt6i_table->tb6_id == iter_rt->rt6i_table->tb6_id && + rt->rt6i_metric == iter_rt->rt6i_metric && + mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt)) + return fib6_entry; + } + + return NULL; +} + +static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib6_entry *fib6_entry, + bool replace) +{ + struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node; + struct mlxsw_sp_fib6_entry *replaced; + + if (!replace) + return; + + replaced = list_next_entry(fib6_entry, common.list); + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); +} + +static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp, + struct rt6_info *rt, bool replace) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; + struct mlxsw_sp_fib_node *fib_node; + int err; + + if (mlxsw_sp->router->aborted) + return 0; + + if (rt->rt6i_src.plen) + return -EINVAL; + + if (mlxsw_sp_fib6_rt_should_ignore(rt)) + return 0; + + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->rt6i_table->tb6_id, + &rt->rt6i_dst.addr, + sizeof(rt->rt6i_dst.addr), + rt->rt6i_dst.plen, + MLXSW_SP_L3_PROTO_IPV6); + if (IS_ERR(fib_node)) + return PTR_ERR(fib_node); + + /* Before creating a new entry, try to append route to an existing + * multipath entry. + */ + fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace); + if (fib6_entry) { + err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt); + if (err) + goto err_fib6_entry_nexthop_add; + return 0; + } + + fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt); + if (IS_ERR(fib6_entry)) { + err = PTR_ERR(fib6_entry); + goto err_fib6_entry_create; + } + + err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, replace); + if (err) + goto err_fib6_node_entry_link; + + mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace); + + return 0; + +err_fib6_node_entry_link: + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); +err_fib6_entry_create: +err_fib6_entry_nexthop_add: + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + return err; +} + +static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp, + struct rt6_info *rt) +{ + struct mlxsw_sp_fib6_entry *fib6_entry; struct mlxsw_sp_fib_node *fib_node; if (mlxsw_sp->router->aborted) return; - fib_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info); - if (WARN_ON(!fib_entry)) + if (mlxsw_sp_fib6_rt_should_ignore(rt)) return; - fib_node = fib_entry->fib_node; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt); + if (WARN_ON(!fib6_entry)) + return; + + /* If route is part of a multipath entry, but not the last one + * removed, then only reduce its nexthop group. + */ + if (!list_is_singular(&fib6_entry->rt6_list)) { + mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt); + return; + } + + fib_node = fib6_entry->common.fib_node; + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); } -static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_ralxx_protocol proto, + u8 tree_id) { char ralta_pl[MLXSW_REG_RALTA_LEN]; char ralst_pl[MLXSW_REG_RALST_LEN]; int i, err; - mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); if (err) return err; - mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); if (err) return err; @@ -2708,20 +4588,14 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) char raltb_pl[MLXSW_REG_RALTB_LEN]; char ralue_pl[MLXSW_REG_RALUE_LEN]; - if (!mlxsw_sp_vr_is_used(vr)) - continue; - - mlxsw_reg_raltb_pack(raltb_pl, vr->id, - MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); if (err) return err; - mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, - MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0, - 0); + mlxsw_reg_ralue_pack(ralue_pl, proto, + MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -2732,17 +4606,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) return 0; } +static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +{ + enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4; + int err; + + err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN); + if (err) + return err; + + proto = MLXSW_REG_RALXX_PROTOCOL_IPV6; + return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto, + MLXSW_SP_LPM_TREE_MIN + 1); +} + static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_fib_entry *fib_entry, *tmp; + struct mlxsw_sp_fib4_entry *fib4_entry, *tmp; - list_for_each_entry_safe(fib_entry, tmp, &fib_node->entry_list, list) { - bool do_break = &tmp->list == &fib_node->entry_list; + list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list, + common.list) { + bool do_break = &tmp->common.list == &fib_node->entry_list; - mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib_entry); - mlxsw_sp_fib4_node_put(mlxsw_sp, fib_node); + mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry); + mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); /* Break when entry list is empty and node was freed. * Otherwise, we'll access freed memory in the next * iteration. @@ -2752,6 +4642,23 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, } } +static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_fib6_entry *fib6_entry, *tmp; + + list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list, + common.list) { + bool do_break = &tmp->common.list == &fib_node->entry_list; + + mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry); + mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry); + mlxsw_sp_fib_node_put(mlxsw_sp, fib_node); + if (do_break) + break; + } +} + static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { @@ -2760,7 +4667,7 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); break; case MLXSW_SP_L3_PROTO_IPV6: - WARN_ON_ONCE(1); + mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node); break; } } @@ -2791,10 +4698,17 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp_vr_is_used(vr)) continue; mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); + + /* If virtual router was only used for IPv4, then it's no + * longer used. + */ + if (!mlxsw_sp_vr_is_used(vr)) + continue; + mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6); } } -static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp) { int err; @@ -2811,6 +4725,7 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp_fib_event_work { struct work_struct work; union { + struct fib6_entry_notifier_info fen6_info; struct fib_entry_notifier_info fen_info; struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; @@ -2819,7 +4734,7 @@ struct mlxsw_sp_fib_event_work { unsigned long event; }; -static void mlxsw_sp_router_fib_event_work(struct work_struct *work) +static void mlxsw_sp_router_fib4_event_work(struct work_struct *work) { struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); @@ -2839,7 +4754,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info, replace, append); if (err) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_ENTRY_DEL: @@ -2850,13 +4765,13 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) case FIB_EVENT_RULE_DEL: rule = fib_work->fr_info.rule; if (!fib4_rule_default(rule) && !rule->l3mdev) - mlxsw_sp_router_fib4_abort(mlxsw_sp); + mlxsw_sp_router_fib_abort(mlxsw_sp); fib_rule_put(rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: - mlxsw_sp_nexthop_event(mlxsw_sp, fib_work->event, - fib_work->fnh_info.fib_nh); + mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event, + fib_work->fnh_info.fib_nh); fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); break; } @@ -2864,6 +4779,87 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) kfree(fib_work); } +static void mlxsw_sp_router_fib6_event_work(struct work_struct *work) +{ + struct mlxsw_sp_fib_event_work *fib_work = + container_of(work, struct mlxsw_sp_fib_event_work, work); + struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; + bool replace; + int err; + + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: + replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE; + err = mlxsw_sp_router_fib6_add(mlxsw_sp, + fib_work->fen6_info.rt, replace); + if (err) + mlxsw_sp_router_fib_abort(mlxsw_sp); + mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib6_del(mlxsw_sp, fib_work->fen6_info.rt); + mlxsw_sp_rt6_release(fib_work->fen6_info.rt); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rule = fib_work->fr_info.rule; + if (!fib6_rule_default(rule) && !rule->l3mdev) + mlxsw_sp_router_fib_abort(mlxsw_sp); + fib_rule_put(rule); + break; + } + rtnl_unlock(); + kfree(fib_work); +} + +static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_APPEND: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->fen_info, info, sizeof(fib_work->fen_info)); + /* Take referece on fib_info to prevent it from being + * freed while work is queued. Release it afterwards. + */ + fib_info_hold(fib_work->fen_info.fi); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + case FIB_EVENT_NH_ADD: /* fall through */ + case FIB_EVENT_NH_DEL: + memcpy(&fib_work->fnh_info, info, sizeof(fib_work->fnh_info)); + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; + } +} + +static void mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work, + struct fib_notifier_info *info) +{ + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: /* fall through */ + case FIB_EVENT_ENTRY_ADD: /* fall through */ + case FIB_EVENT_ENTRY_DEL: + memcpy(&fib_work->fen6_info, info, sizeof(fib_work->fen6_info)); + rt6_hold(fib_work->fen6_info.rt); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, info, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; + } +} + /* Called with rcu_read_lock() */ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, unsigned long event, void *ptr) @@ -2879,31 +4875,18 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, if (WARN_ON(!fib_work)) return NOTIFY_BAD; - INIT_WORK(&fib_work->work, mlxsw_sp_router_fib_event_work); router = container_of(nb, struct mlxsw_sp_router, fib_nb); fib_work->mlxsw_sp = router->mlxsw_sp; fib_work->event = event; - switch (event) { - case FIB_EVENT_ENTRY_REPLACE: /* fall through */ - case FIB_EVENT_ENTRY_APPEND: /* fall through */ - case FIB_EVENT_ENTRY_ADD: /* fall through */ - case FIB_EVENT_ENTRY_DEL: - memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info)); - /* Take referece on fib_info to prevent it from being - * freed while work is queued. Release it afterwards. - */ - fib_info_hold(fib_work->fen_info.fi); + switch (info->family) { + case AF_INET: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work); + mlxsw_sp_router_fib4_event(fib_work, info); break; - case FIB_EVENT_RULE_ADD: /* fall through */ - case FIB_EVENT_RULE_DEL: - memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info)); - fib_rule_get(fib_work->fr_info.rule); - break; - case FIB_EVENT_NH_ADD: /* fall through */ - case FIB_EVENT_NH_DEL: - memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); - fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + case AF_INET6: + INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work); + mlxsw_sp_router_fib6_event(fib_work, info); break; } @@ -2948,17 +4931,28 @@ static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); } -static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, - const struct in_device *in_dev, - unsigned long event) +static bool +mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev, + unsigned long event) { + struct inet6_dev *inet6_dev; + bool addr_list_empty = true; + struct in_device *idev; + switch (event) { case NETDEV_UP: - if (!rif) - return true; - return false; + return rif == NULL; case NETDEV_DOWN: - if (rif && !in_dev->ifa_list && + idev = __in_dev_get_rtnl(dev); + if (idev && idev->ifa_list) + addr_list_empty = false; + + inet6_dev = __in6_dev_get(dev); + if (addr_list_empty && inet6_dev && + !list_empty(&inet6_dev->addr_list)) + addr_list_empty = false; + + if (rif && addr_list_empty && !netif_is_l3_slave(rif->dev)) return true; /* It is possible we already removed the RIF ourselves @@ -2977,7 +4971,10 @@ mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp, { enum mlxsw_sp_fid_type type; - /* RIF type is derived from the type of the underlying FID */ + if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL)) + return MLXSW_SP_RIF_TYPE_IPIP_LB; + + /* Otherwise RIF type is derived from the type of the underlying FID. */ if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev))) type = MLXSW_SP_FID_TYPE_8021Q; else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev)) @@ -3036,6 +5033,16 @@ u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif) return rif->rif_index; } +u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->common.rif_index; +} + +u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) +{ + return lb_rif->ul_vr_id; +} + int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) { return rif->dev->ifindex; @@ -3047,9 +5054,9 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, { u32 tb_id = l3mdev_fib_table(params->dev); const struct mlxsw_sp_rif_ops *ops; + struct mlxsw_sp_fid *fid = NULL; enum mlxsw_sp_rif_type type; struct mlxsw_sp_rif *rif; - struct mlxsw_sp_fid *fid; struct mlxsw_sp_vr *vr; u16 rif_index; int err; @@ -3073,12 +5080,14 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; - fid = ops->fid_get(rif); - if (IS_ERR(fid)) { - err = PTR_ERR(fid); - goto err_fid_get; + if (ops->fid_get) { + fid = ops->fid_get(rif); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + goto err_fid_get; + } + rif->fid = fid; } - rif->fid = fid; if (ops->setup) ops->setup(rif, params); @@ -3087,22 +5096,15 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, if (err) goto err_configure; - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, params->dev->dev_addr, - mlxsw_sp_fid_index(fid), true); - if (err) - goto err_rif_fdb_op; - mlxsw_sp_rif_counters_alloc(rif); - mlxsw_sp_fid_rif_set(fid, rif); mlxsw_sp->router->rifs[rif_index] = rif; vr->rif_count++; return rif; -err_rif_fdb_op: - ops->deconfigure(rif); err_configure: - mlxsw_sp_fid_put(fid); + if (fid) + mlxsw_sp_fid_put(fid); err_fid_get: kfree(rif); err_rif_alloc: @@ -3123,12 +5125,11 @@ void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) vr->rif_count--; mlxsw_sp->router->rifs[rif->rif_index] = NULL; - mlxsw_sp_fid_rif_set(fid, NULL); mlxsw_sp_rif_counters_free(rif); - mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->dev->dev_addr, - mlxsw_sp_fid_index(fid), false); ops->deconfigure(rif); - mlxsw_sp_fid_put(fid); + if (fid) + /* Loopback RIFs are not associated with a FID. */ + mlxsw_sp_fid_put(fid); kfree(rif); mlxsw_sp_vr_put(vr); } @@ -3356,7 +5357,7 @@ int mlxsw_sp_inetaddr_event(struct notifier_block *unused, goto out; rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event)) + if (!mlxsw_sp_rif_should_config(rif, dev, event)) goto out; err = __mlxsw_sp_inetaddr_event(dev, event); @@ -3364,6 +5365,61 @@ out: return notifier_from_errno(err); } +struct mlxsw_sp_inet6addr_event_work { + struct work_struct work; + struct net_device *dev; + unsigned long event; +}; + +static void mlxsw_sp_inet6addr_event_work(struct work_struct *work) +{ + struct mlxsw_sp_inet6addr_event_work *inet6addr_work = + container_of(work, struct mlxsw_sp_inet6addr_event_work, work); + struct net_device *dev = inet6addr_work->dev; + unsigned long event = inet6addr_work->event; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + + rtnl_lock(); + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, dev, event)) + goto out; + + __mlxsw_sp_inetaddr_event(dev, event); +out: + rtnl_unlock(); + dev_put(dev); + kfree(inet6addr_work); +} + +/* Called with rcu_read_lock() */ +int mlxsw_sp_inet6addr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr; + struct mlxsw_sp_inet6addr_event_work *inet6addr_work; + struct net_device *dev = if6->idev->dev; + + if (!mlxsw_sp_port_dev_lower_find_rcu(dev)) + return NOTIFY_DONE; + + inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC); + if (!inet6addr_work) + return NOTIFY_BAD; + + INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work); + inet6addr_work->dev = dev; + inet6addr_work->event = event; + dev_hold(dev); + mlxsw_core_schedule_work(&inet6addr_work->work); + + return NOTIFY_DONE; +} + static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, const char *mac, int mtu) { @@ -3501,8 +5557,8 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) rif_subport = mlxsw_sp_rif_subport_rif(rif); mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF, - rif->rif_index, rif->vr_id, rif->dev->mtu, - rif->dev->dev_addr); + rif->rif_index, rif->vr_id, rif->dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag, rif_subport->lag ? rif_subport->lag_id : rif_subport->system_port, @@ -3513,11 +5569,32 @@ static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable) static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif) { - return mlxsw_sp_rif_subport_op(rif, true); + int err; + + err = mlxsw_sp_rif_subport_op(rif, true); + if (err) + return err; + + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + mlxsw_sp_fid_rif_set(rif->fid, rif); + return 0; + +err_rif_fdb_op: + mlxsw_sp_rif_subport_op(rif, false); + return err; } static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif) { + struct mlxsw_sp_fid *fid = rif->fid; + + mlxsw_sp_fid_rif_set(fid, NULL); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); mlxsw_sp_rif_subport_op(rif, false); } @@ -3544,7 +5621,8 @@ static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif, char ritr_pl[MLXSW_REG_RITR_LEN]; mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id, - rif->dev->mtu, rif->dev->dev_addr); + rif->dev->mtu); + mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr); mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); @@ -3565,25 +5643,48 @@ static int mlxsw_sp_rif_vlan_configure(struct mlxsw_sp_rif *rif) if (err) return err; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), true); if (err) goto err_fid_bc_flood_set; + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + mlxsw_sp_fid_rif_set(rif->fid, rif); return 0; +err_rif_fdb_op: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); return err; } static void mlxsw_sp_rif_vlan_deconfigure(struct mlxsw_sp_rif *rif) { - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_fid *fid = rif->fid; + mlxsw_sp_fid_rif_set(fid, NULL); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_VLAN_IF, vid, false); } @@ -3614,25 +5715,48 @@ static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif) if (err) return err; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), true); + if (err) + goto err_fid_mc_flood_set; + err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), true); if (err) goto err_fid_bc_flood_set; + err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(rif->fid), true); + if (err) + goto err_rif_fdb_op; + + mlxsw_sp_fid_rif_set(rif->fid, rif); return 0; +err_rif_fdb_op: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, + mlxsw_sp_router_port(mlxsw_sp), false); err_fid_bc_flood_set: + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); +err_fid_mc_flood_set: mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); return err; } static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif) { - struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; u16 fid_index = mlxsw_sp_fid_index(rif->fid); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_fid *fid = rif->fid; + mlxsw_sp_fid_rif_set(fid, NULL); + mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr, + mlxsw_sp_fid_index(fid), false); mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC, mlxsw_sp_router_port(mlxsw_sp), false); + mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC, + mlxsw_sp_router_port(mlxsw_sp), false); mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false); } @@ -3650,10 +5774,104 @@ static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = { .fid_get = mlxsw_sp_rif_fid_fid_get, }; +static struct mlxsw_sp_rif_ipip_lb * +mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif) +{ + return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common); +} + +static void +mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif, + const struct mlxsw_sp_rif_params *params) +{ + struct mlxsw_sp_rif_params_ipip_lb *params_lb; + struct mlxsw_sp_rif_ipip_lb *rif_lb; + + params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb, + common); + rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif); + rif_lb->lb_config = params_lb->lb_config; +} + +static int +mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, + struct mlxsw_sp_vr *ul_vr, bool enable) +{ + struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config; + struct mlxsw_sp_rif *rif = &lb_rif->common; + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + char ritr_pl[MLXSW_REG_RITR_LEN]; + u32 saddr4; + + switch (lb_cf.ul_protocol) { + case MLXSW_SP_L3_PROTO_IPV4: + saddr4 = be32_to_cpu(lb_cf.saddr.addr4); + mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF, + rif->rif_index, rif->vr_id, rif->dev->mtu); + mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt, + MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET, + ul_vr->id, saddr4, lb_cf.okey); + break; + + case MLXSW_SP_L3_PROTO_IPV6: + return -EAFNOSUPPORT; + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int +mlxsw_sp_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_vr *ul_vr; + int err; + + ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id); + if (IS_ERR(ul_vr)) + return PTR_ERR(ul_vr); + + err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, true); + if (err) + goto err_loopback_op; + + lb_rif->ul_vr_id = ul_vr->id; + ++ul_vr->rif_count; + return 0; + +err_loopback_op: + mlxsw_sp_vr_put(ul_vr); + return err; +} + +static void mlxsw_sp_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif); + struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; + struct mlxsw_sp_vr *ul_vr; + + ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id]; + mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr, false); + + --ul_vr->rif_count; + mlxsw_sp_vr_put(ul_vr); +} + +static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_ipip_lb_ops = { + .type = MLXSW_SP_RIF_TYPE_IPIP_LB, + .rif_size = sizeof(struct mlxsw_sp_rif_ipip_lb), + .setup = mlxsw_sp_rif_ipip_lb_setup, + .configure = mlxsw_sp_rif_ipip_lb_configure, + .deconfigure = mlxsw_sp_rif_ipip_lb_deconfigure, +}; + static const struct mlxsw_sp_rif_ops *mlxsw_sp_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_SUBPORT] = &mlxsw_sp_rif_subport_ops, [MLXSW_SP_RIF_TYPE_VLAN] = &mlxsw_sp_rif_vlan_ops, [MLXSW_SP_RIF_TYPE_FID] = &mlxsw_sp_rif_fid_ops, + [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp_rif_ipip_lb_ops, }; static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) @@ -3681,6 +5899,18 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) kfree(mlxsw_sp->router->rifs); } +static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr; + INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list); + return 0; +} + +static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp) +{ + WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list)); +} + static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) { struct mlxsw_sp_router *router; @@ -3704,7 +5934,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return -EIO; max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); - mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); if (err) @@ -3716,7 +5946,7 @@ static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; - mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_rgcr_pack(rgcr_pl, false, false); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); } @@ -3740,6 +5970,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_rifs_init; + err = mlxsw_sp_ipips_init(mlxsw_sp); + if (err) + goto err_ipips_init; + err = rhashtable_init(&mlxsw_sp->router->nexthop_ht, &mlxsw_sp_nexthop_ht_params); if (err) @@ -3781,6 +6015,8 @@ err_lpm_init: err_nexthop_group_ht_init: rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); err_nexthop_ht_init: + mlxsw_sp_ipips_fini(mlxsw_sp); +err_ipips_init: mlxsw_sp_rifs_fini(mlxsw_sp); err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); @@ -3797,6 +6033,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_lpm_fini(mlxsw_sp); rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht); rhashtable_destroy(&mlxsw_sp->router->nexthop_ht); + mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); kfree(mlxsw_sp->router); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index a3e8d2b25148..345fcc4f38e9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -36,15 +36,38 @@ #define _MLXSW_ROUTER_H_ #include "spectrum.h" +#include "reg.h" + +enum mlxsw_sp_l3proto { + MLXSW_SP_L3_PROTO_IPV4, + MLXSW_SP_L3_PROTO_IPV6, +}; + +union mlxsw_sp_l3addr { + __be32 addr4; + struct in6_addr addr6; +}; + +struct mlxsw_sp_rif_ipip_lb; +struct mlxsw_sp_rif_ipip_lb_config { + enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; + u32 okey; + enum mlxsw_sp_l3proto ul_protocol; /* Underlay. */ + union mlxsw_sp_l3addr saddr; +}; enum mlxsw_sp_rif_counter_dir { MLXSW_SP_RIF_COUNTER_INGRESS, MLXSW_SP_RIF_COUNTER_EGRESS, }; +struct mlxsw_sp_neigh_entry; + struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); +u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); +u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, @@ -56,5 +79,33 @@ void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_rif *rif, enum mlxsw_sp_rif_counter_dir dir); +struct mlxsw_sp_neigh_entry * +mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif, + struct mlxsw_sp_neigh_entry *neigh_entry); +int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry); +unsigned char * +mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry); +u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); +struct in6_addr * +mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry); + +#define mlxsw_sp_rif_neigh_for_each(neigh_entry, rif) \ + for (neigh_entry = mlxsw_sp_rif_neigh_next(rif, NULL); neigh_entry; \ + neigh_entry = mlxsw_sp_rif_neigh_next(rif, neigh_entry)) +int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + u64 *p_counter); +void +mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_neigh_entry *neigh_entry, + bool adding); +bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry); +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_saddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev); +union mlxsw_sp_l3addr +mlxsw_sp_ipip_netdev_daddr(enum mlxsw_sp_l3proto proto, + const struct net_device *ol_dev); +__be32 mlxsw_sp_ipip_netdev_daddr4(const struct net_device *ol_dev); #endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c index 74341fe0eb25..ab7a29846bfa 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchib.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c @@ -497,7 +497,7 @@ static void mlxsw_sib_fini(struct mlxsw_core *mlxsw_core) mlxsw_sib_ports_remove(mlxsw_sib); } -static struct mlxsw_config_profile mlxsw_sib_config_profile = { +static const struct mlxsw_config_profile mlxsw_sib_config_profile = { .used_max_system_port = 1, .max_system_port = 48000, .used_max_ib_mc = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 3b0f72455681..f3c29bbf07e2 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1674,7 +1674,7 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core) mlxsw_sx_ports_remove(mlxsw_sx); } -static struct mlxsw_config_profile mlxsw_sx_config_profile = { +static const struct mlxsw_config_profile mlxsw_sx_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, .used_max_mid = 1, diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 12b5ed58f3eb..f396a1fef633 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -61,11 +61,33 @@ enum { MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, MLXSW_TRAP_ID_LBERROR = 0x54, - MLXSW_TRAP_ID_OSPF = 0x55, + MLXSW_TRAP_ID_IPV4_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, + MLXSW_TRAP_ID_IPV6_UNSPECIFIED_ADDRESS = 0x60, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_DEST = 0x61, + MLXSW_TRAP_ID_IPV6_LINK_LOCAL_SRC = 0x62, + MLXSW_TRAP_ID_IPV6_ALL_NODES_LINK = 0x63, + MLXSW_TRAP_ID_IPV6_OSPF = 0x64, + MLXSW_TRAP_ID_IPV6_MLDV12_LISTENER_QUERY = 0x65, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_REPORT = 0x66, + MLXSW_TRAP_ID_IPV6_MLDV1_LISTENER_DONE = 0x67, + MLXSW_TRAP_ID_IPV6_MLDV2_LISTENER_REPORT = 0x68, + MLXSW_TRAP_ID_IPV6_DHCP = 0x69, + MLXSW_TRAP_ID_IPV6_ALL_ROUTERS_LINK = 0x6F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, - MLXSW_TRAP_ID_BGP_IPV4 = 0x88, + MLXSW_TRAP_ID_IPV4_BGP = 0x88, + MLXSW_TRAP_ID_IPV6_BGP = 0x89, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A, + MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C, + MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D, + MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E, MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, + MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91, + MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92, + MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6, + MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7, MLXSW_TRAP_ID_ACL0 = 0x1C0, MLXSW_TRAP_ID_MAX = 0x1FF |