diff options
Diffstat (limited to 'drivers/net/ethernet/mellanox')
69 files changed, 8028 insertions, 3854 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c4d714fcc7da..ffbcb27c05e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -117,7 +117,7 @@ static const char main_strings[][ETH_GSTRING_LEN] = { /* port statistics */ "tso_packets", "xmit_more", - "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed", + "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_pages", "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload", /* pf statistics */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 61420473fe5f..94fab20ef146 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -92,7 +92,9 @@ static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto, if (tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return mlx4_en_setup_tc(dev, tc->tc); + tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + return mlx4_en_setup_tc(dev, tc->mqprio->num_tc); } #ifdef CONFIG_RFS_ACCEL diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 9166d90e7328..e0eb695318e6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -213,6 +213,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.rx_chksum_good = 0; priv->port_stats.rx_chksum_none = 0; priv->port_stats.rx_chksum_complete = 0; + priv->port_stats.rx_alloc_pages = 0; priv->xdp_stats.rx_xdp_drop = 0; priv->xdp_stats.rx_xdp_tx = 0; priv->xdp_stats.rx_xdp_tx_full = 0; @@ -223,6 +224,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.rx_chksum_good += READ_ONCE(ring->csum_ok); priv->port_stats.rx_chksum_none += READ_ONCE(ring->csum_none); priv->port_stats.rx_chksum_complete += READ_ONCE(ring->csum_complete); + priv->port_stats.rx_alloc_pages += READ_ONCE(ring->rx_alloc_pages); priv->xdp_stats.rx_xdp_drop += READ_ONCE(ring->xdp_drop); priv->xdp_stats.rx_xdp_tx += READ_ONCE(ring->xdp_tx); priv->xdp_stats.rx_xdp_tx_full += READ_ONCE(ring->xdp_tx_full); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 867292880c07..aa074e57ce06 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -50,173 +50,62 @@ #include "mlx4_en.h" -static int mlx4_alloc_pages(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *page_alloc, - const struct mlx4_en_frag_info *frag_info, - gfp_t _gfp) +static int mlx4_alloc_page(struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frag, + gfp_t gfp) { - int order; struct page *page; dma_addr_t dma; - for (order = frag_info->order; ;) { - gfp_t gfp = _gfp; - - if (order) - gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC; - page = alloc_pages(gfp, order); - if (likely(page)) - break; - if (--order < 0 || - ((PAGE_SIZE << order) < frag_info->frag_size)) - return -ENOMEM; - } - dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, - frag_info->dma_dir); + page = alloc_page(gfp); + if (unlikely(!page)) + return -ENOMEM; + dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); if (unlikely(dma_mapping_error(priv->ddev, dma))) { - put_page(page); + __free_page(page); return -ENOMEM; } - page_alloc->page_size = PAGE_SIZE << order; - page_alloc->page = page; - page_alloc->dma = dma; - page_alloc->page_offset = 0; - /* Not doing get_page() for each frag is a big win - * on asymetric workloads. Note we can not use atomic_set(). - */ - page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1); + frag->page = page; + frag->dma = dma; + frag->page_offset = priv->rx_headroom; return 0; } static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, - struct mlx4_en_rx_alloc *ring_alloc, gfp_t gfp) { - struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; - const struct mlx4_en_frag_info *frag_info; - struct page *page; int i; - for (i = 0; i < priv->num_frags; i++) { - frag_info = &priv->frag_info[i]; - page_alloc[i] = ring_alloc[i]; - page_alloc[i].page_offset += frag_info->frag_stride; - - if (page_alloc[i].page_offset + frag_info->frag_stride <= - ring_alloc[i].page_size) - continue; - - if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], - frag_info, gfp))) - goto out; - } - - for (i = 0; i < priv->num_frags; i++) { - frags[i] = ring_alloc[i]; - frags[i].page_offset += priv->frag_info[i].rx_headroom; - rx_desc->data[i].addr = cpu_to_be64(frags[i].dma + - frags[i].page_offset); - ring_alloc[i] = page_alloc[i]; - } - - return 0; - -out: - while (i--) { - if (page_alloc[i].page != ring_alloc[i].page) { - dma_unmap_page(priv->ddev, page_alloc[i].dma, - page_alloc[i].page_size, - priv->frag_info[i].dma_dir); - page = page_alloc[i].page; - /* Revert changes done by mlx4_alloc_pages */ - page_ref_sub(page, page_alloc[i].page_size / - priv->frag_info[i].frag_stride - 1); - put_page(page); + for (i = 0; i < priv->num_frags; i++, frags++) { + if (!frags->page) { + if (mlx4_alloc_page(priv, frags, gfp)) + return -ENOMEM; + ring->rx_alloc_pages++; } - } - return -ENOMEM; -} - -static void mlx4_en_free_frag(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *frags, - int i) -{ - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; - - - if (next_frag_end > frags[i].page_size) - dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, - frag_info->dma_dir); - - if (frags[i].page) - put_page(frags[i].page); -} - -static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) -{ - int i; - struct mlx4_en_rx_alloc *page_alloc; - - for (i = 0; i < priv->num_frags; i++) { - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - - if (mlx4_alloc_pages(priv, &ring->page_alloc[i], - frag_info, GFP_KERNEL | __GFP_COLD)) - goto out; - - en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n", - i, ring->page_alloc[i].page_size, - page_ref_count(ring->page_alloc[i].page)); + rx_desc->data[i].addr = cpu_to_be64(frags->dma + + frags->page_offset); } return 0; - -out: - while (i--) { - struct page *page; - - page_alloc = &ring->page_alloc[i]; - dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, - priv->frag_info[i].dma_dir); - page = page_alloc->page; - /* Revert changes done by mlx4_alloc_pages */ - page_ref_sub(page, page_alloc->page_size / - priv->frag_info[i].frag_stride - 1); - put_page(page); - page_alloc->page = NULL; - } - return -ENOMEM; } -static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) +static void mlx4_en_free_frag(const struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frag) { - struct mlx4_en_rx_alloc *page_alloc; - int i; - - for (i = 0; i < priv->num_frags; i++) { - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - - page_alloc = &ring->page_alloc[i]; - en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n", - i, page_count(page_alloc->page)); - - dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, frag_info->dma_dir); - while (page_alloc->page_offset + frag_info->frag_stride < - page_alloc->page_size) { - put_page(page_alloc->page); - page_alloc->page_offset += frag_info->frag_stride; - } - page_alloc->page = NULL; + if (frag->page) { + dma_unmap_page(priv->ddev, frag->dma, + PAGE_SIZE, priv->dma_dir); + __free_page(frag->page); } + /* We need to clear all fields, otherwise a change of priv->log_rx_info + * could lead to see garbage later in frag->page. + */ + memset(frag, 0, sizeof(*frag)); } -static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, +static void mlx4_en_init_rx_desc(const struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; @@ -248,18 +137,23 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); - if (ring->page_cache.index > 0) { - frags[0] = ring->page_cache.buf[--ring->page_cache.index]; - rx_desc->data[0].addr = cpu_to_be64(frags[0].dma + - frags[0].page_offset); + /* XDP uses a single page per frame */ + if (!frags->page) { + ring->page_cache.index--; + frags->page = ring->page_cache.buf[ring->page_cache.index].page; + frags->dma = ring->page_cache.buf[ring->page_cache.index].dma; + } + frags->page_offset = XDP_PACKET_HEADROOM; + rx_desc->data[0].addr = cpu_to_be64(frags->dma + + XDP_PACKET_HEADROOM); return 0; } - return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp); + return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp); } -static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring) +static bool mlx4_en_is_ring_empty(const struct mlx4_en_rx_ring *ring) { return ring->prod == ring->cons; } @@ -269,7 +163,8 @@ static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); } -static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, +/* slow path */ +static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { @@ -279,7 +174,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, frags = ring->rx_info + (index << priv->log_rx_info); for (nr = 0; nr < priv->num_frags; nr++) { en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); - mlx4_en_free_frag(priv, frags, nr); + mlx4_en_free_frag(priv, frags + nr); } } @@ -335,12 +230,12 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, ring->cons, ring->prod); /* Unmap and free Rx buffers */ - while (!mlx4_en_is_ring_empty(ring)) { - index = ring->cons & ring->size_mask; + for (index = 0; index < ring->size; index++) { en_dbg(DRV, priv, "Processing descriptor:%d\n", index); mlx4_en_free_rx_desc(priv, ring, index); - ++ring->cons; } + ring->cons = 0; + ring->prod = 0; } void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) @@ -392,9 +287,9 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mlx4_en_rx_alloc)); - ring->rx_info = vmalloc_node(tmp, node); + ring->rx_info = vzalloc_node(tmp, node); if (!ring->rx_info) { - ring->rx_info = vmalloc(tmp); + ring->rx_info = vzalloc(tmp); if (!ring->rx_info) { err = -ENOMEM; goto err_ring; @@ -464,16 +359,6 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) /* Initialize all descriptors */ for (i = 0; i < ring->size; i++) mlx4_en_init_rx_desc(priv, ring, i); - - /* Initialize page allocators */ - err = mlx4_en_init_allocator(priv, ring); - if (err) { - en_err(priv, "Failed initializing ring allocator\n"); - if (ring->stride <= TXBB_SIZE) - ring->buf -= TXBB_SIZE; - ring_ind--; - goto err_allocator; - } } err = mlx4_en_fill_rx_buffers(priv); if (err) @@ -493,11 +378,9 @@ err_buffers: mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]); ring_ind = priv->rx_ring_num - 1; -err_allocator: while (ring_ind >= 0) { if (priv->rx_ring[ring_ind]->stride <= TXBB_SIZE) priv->rx_ring[ring_ind]->buf -= TXBB_SIZE; - mlx4_en_destroy_allocator(priv, priv->rx_ring[ring_ind]); ring_ind--; } return err; @@ -537,7 +420,9 @@ bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, if (cache->index >= MLX4_EN_CACHE_SIZE) return false; - cache->buf[cache->index++] = *frame; + cache->buf[cache->index].page = frame->page; + cache->buf[cache->index].dma = frame->dma; + cache->index++; return true; } @@ -567,136 +452,91 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, int i; for (i = 0; i < ring->page_cache.index; i++) { - struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i]; - - dma_unmap_page(priv->ddev, frame->dma, frame->page_size, - priv->frag_info[0].dma_dir); - put_page(frame->page); + dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma, + PAGE_SIZE, priv->dma_dir); + put_page(ring->page_cache.buf[i].page); } ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; - mlx4_en_destroy_allocator(priv, ring); } static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, struct sk_buff *skb, int length) { - struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; - struct mlx4_en_frag_info *frag_info; - int nr; + const struct mlx4_en_frag_info *frag_info = priv->frag_info; + unsigned int truesize = 0; + int nr, frag_size; + struct page *page; dma_addr_t dma; + bool release; /* Collect used fragments while replacing them in the HW descriptors */ - for (nr = 0; nr < priv->num_frags; nr++) { - frag_info = &priv->frag_info[nr]; - if (length <= frag_info->frag_prefix_size) - break; - if (unlikely(!frags[nr].page)) + for (nr = 0;; frags++) { + frag_size = min_t(int, length, frag_info->frag_size); + + page = frags->page; + if (unlikely(!page)) goto fail; - dma = be64_to_cpu(rx_desc->data[nr].addr); - dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size, - DMA_FROM_DEVICE); + dma = frags->dma; + dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset, + frag_size, priv->dma_dir); + + __skb_fill_page_desc(skb, nr, page, frags->page_offset, + frag_size); - __skb_fill_page_desc(skb, nr, frags[nr].page, - frags[nr].page_offset, - frag_info->frag_size); + truesize += frag_info->frag_stride; + if (frag_info->frag_stride == PAGE_SIZE / 2) { + frags->page_offset ^= PAGE_SIZE / 2; + release = page_count(page) != 1 || + page_is_pfmemalloc(page) || + page_to_nid(page) != numa_mem_id(); + } else { + u32 sz_align = ALIGN(frag_size, SMP_CACHE_BYTES); - skb->truesize += frag_info->frag_stride; - frags[nr].page = NULL; + frags->page_offset += sz_align; + release = frags->page_offset + frag_info->frag_size > PAGE_SIZE; + } + if (release) { + dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir); + frags->page = NULL; + } else { + page_ref_inc(page); + } + + nr++; + length -= frag_size; + if (!length) + break; + frag_info++; } - /* Adjust size of last fragment to match actual length */ - if (nr > 0) - skb_frag_size_set(&skb_frags_rx[nr - 1], - length - priv->frag_info[nr - 1].frag_prefix_size); + skb->truesize += truesize; return nr; fail: while (nr > 0) { nr--; - __skb_frag_unref(&skb_frags_rx[nr]); + __skb_frag_unref(skb_shinfo(skb)->frags + nr); } return 0; } - -static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, - struct mlx4_en_rx_alloc *frags, - unsigned int length) -{ - struct sk_buff *skb; - void *va; - int used_frags; - dma_addr_t dma; - - skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN); - if (unlikely(!skb)) { - en_dbg(RX_ERR, priv, "Failed allocating skb\n"); - return NULL; - } - skb_reserve(skb, NET_IP_ALIGN); - skb->len = length; - - /* Get pointer to first fragment so we could copy the headers into the - * (linear part of the) skb */ - va = page_address(frags[0].page) + frags[0].page_offset; - - if (length <= SMALL_PACKET_SIZE) { - /* We are copying all relevant data to the skb - temporarily - * sync buffers for the copy */ - dma = be64_to_cpu(rx_desc->data[0].addr); - dma_sync_single_for_cpu(priv->ddev, dma, length, - DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, va, length); - skb->tail += length; - } else { - unsigned int pull_len; - - /* Move relevant fragments to skb */ - used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, frags, - skb, length); - if (unlikely(!used_frags)) { - kfree_skb(skb); - return NULL; - } - skb_shinfo(skb)->nr_frags = used_frags; - - pull_len = eth_get_headlen(va, SMALL_PACKET_SIZE); - /* Copy headers into the skb linear buffer */ - memcpy(skb->data, va, pull_len); - skb->tail += pull_len; - - /* Skip headers in first fragment */ - skb_shinfo(skb)->frags[0].page_offset += pull_len; - - /* Adjust size of first fragment */ - skb_frag_size_sub(&skb_shinfo(skb)->frags[0], pull_len); - skb->data_len = length - pull_len; - } - return skb; -} - -static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb) +static void validate_loopback(struct mlx4_en_priv *priv, void *va) { + const unsigned char *data = va + ETH_HLEN; int i; - int offset = ETH_HLEN; - for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { - if (*(skb->data + offset) != (unsigned char) (i & 0xff)) - goto out_loopback; + for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++) { + if (data[i] != (unsigned char)i) + return; } /* Loopback found */ priv->loopback_ok = 1; - -out_loopback: - dev_kfree_skb_any(skb); } static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, @@ -801,7 +641,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mlx4_en_rx_alloc *frags; - struct mlx4_en_rx_desc *rx_desc; struct bpf_prog *xdp_prog; int doorbell_pending; struct sk_buff *skb; @@ -834,10 +673,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { + void *va; frags = ring->rx_info + (index << priv->log_rx_info); - rx_desc = ring->buf + (index << ring->log_stride); - + va = page_address(frags[0].page) + frags[0].page_offset; /* * make sure we read the CQE after we read the ownership bit */ @@ -860,16 +699,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * and not performing the selftest or flb disabled */ if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) { - struct ethhdr *ethh; + const struct ethhdr *ethh = va; dma_addr_t dma; /* Get pointer to first fragment since we haven't * skb yet and cast it to ethhdr struct */ - dma = be64_to_cpu(rx_desc->data[0].addr); + dma = frags[0].dma + frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), DMA_FROM_DEVICE); - ethh = (struct ethhdr *)(page_address(frags[0].page) + - frags[0].page_offset); if (is_multicast_ether_addr(ethh->h_dest)) { struct mlx4_mac_entry *entry; @@ -887,13 +724,16 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud } } + if (unlikely(priv->validate_loopback)) { + validate_loopback(priv, va); + goto next; + } + /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); length -= ring->fcs_del; - l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && - (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); /* A bpf program gets first chance to drop the packet. It may * read bytes but not past the end of the frag. @@ -904,13 +744,13 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud void *orig_data; u32 act; - dma = be64_to_cpu(rx_desc->data[0].addr); + dma = frags[0].dma + frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, priv->frag_info[0].frag_size, DMA_FROM_DEVICE); - xdp.data_hard_start = page_address(frags[0].page); - xdp.data = xdp.data_hard_start + frags[0].page_offset; + xdp.data_hard_start = va - frags[0].page_offset; + xdp.data = va; xdp.data_end = xdp.data + length; orig_data = xdp.data; @@ -920,6 +760,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length = xdp.data_end - xdp.data; frags[0].page_offset = xdp.data - xdp.data_hard_start; + va = xdp.data; } switch (act) { @@ -928,8 +769,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_TX: if (likely(!mlx4_en_xmit_frame(ring, frags, dev, length, cq->ring, - &doorbell_pending))) - goto consumed; + &doorbell_pending))) { + frags[0].page = NULL; + goto next; + } trace_xdp_exception(dev, xdp_prog, act); goto xdp_drop_no_cnt; /* Drop on xmit failure */ default: @@ -939,8 +782,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_DROP: ring->xdp_drop++; xdp_drop_no_cnt: - if (likely(mlx4_en_rx_recycle(ring, frags))) - goto consumed; goto next; } } @@ -948,129 +789,51 @@ xdp_drop_no_cnt: ring->bytes += length; ring->packets++; + skb = napi_get_frags(&cq->napi); + if (!skb) + goto next; + + if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { + timestamp = mlx4_en_get_cqe_ts(cqe); + mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb), + timestamp); + } + skb_record_rx_queue(skb, cq->ring); + if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && cqe->checksum == cpu_to_be16(0xffff)) { ip_summed = CHECKSUM_UNNECESSARY; + l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && + (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + if (l2_tunnel) + skb->csum_level = 1; ring->csum_ok++; } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + goto csum_none; } } else { if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6))) { - ip_summed = CHECKSUM_COMPLETE; - ring->csum_complete++; + if (check_csum(cqe, skb, va, dev->features)) { + goto csum_none; + } else { + ip_summed = CHECKSUM_COMPLETE; + ring->csum_complete++; + } } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + goto csum_none; } } } else { +csum_none: ip_summed = CHECKSUM_NONE; ring->csum_none++; } - - /* This packet is eligible for GRO if it is: - * - DIX Ethernet (type interpretation) - * - TCP/IP (v4) - * - without IP options - * - not an IP fragment - */ - if (dev->features & NETIF_F_GRO) { - struct sk_buff *gro_skb = napi_get_frags(&cq->napi); - if (!gro_skb) - goto next; - - nr = mlx4_en_complete_rx_desc(priv, - rx_desc, frags, gro_skb, - length); - if (!nr) - goto next; - - if (ip_summed == CHECKSUM_COMPLETE) { - void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); - if (check_csum(cqe, gro_skb, va, - dev->features)) { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; - ring->csum_complete--; - } - } - - skb_shinfo(gro_skb)->nr_frags = nr; - gro_skb->len = length; - gro_skb->data_len = length; - gro_skb->ip_summed = ip_summed; - - if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY) - gro_skb->csum_level = 1; - - if ((cqe->vlan_my_qpn & - cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && - (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { - u16 vid = be16_to_cpu(cqe->sl_vid); - - __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); - } else if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_SVLAN_PRESENT_MASK) && - (dev->features & NETIF_F_HW_VLAN_STAG_RX)) { - __vlan_hwaccel_put_tag(gro_skb, - htons(ETH_P_8021AD), - be16_to_cpu(cqe->sl_vid)); - } - - if (dev->features & NETIF_F_RXHASH) - skb_set_hash(gro_skb, - be32_to_cpu(cqe->immed_rss_invalid), - (ip_summed == CHECKSUM_UNNECESSARY) ? - PKT_HASH_TYPE_L4 : - PKT_HASH_TYPE_L3); - - skb_record_rx_queue(gro_skb, cq->ring); - - if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, - skb_hwtstamps(gro_skb), - timestamp); - } - - napi_gro_frags(&cq->napi); - goto next; - } - - /* GRO not possible, complete processing here */ - skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); - if (unlikely(!skb)) { - ring->dropped++; - goto next; - } - - if (unlikely(priv->validate_loopback)) { - validate_loopback(priv, skb); - goto next; - } - - if (ip_summed == CHECKSUM_COMPLETE) { - if (check_csum(cqe, skb, skb->data, dev->features)) { - ip_summed = CHECKSUM_NONE; - ring->csum_complete--; - ring->csum_none++; - } - } - skb->ip_summed = ip_summed; - skb->protocol = eth_type_trans(skb, dev); - skb_record_rx_queue(skb, cq->ring); - - if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY) - skb->csum_level = 1; - if (dev->features & NETIF_F_RXHASH) skb_set_hash(skb, be32_to_cpu(cqe->immed_rss_invalid), @@ -1078,36 +841,36 @@ xdp_drop_no_cnt: PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); - if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_CVLAN_PRESENT_MASK) && + + if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid)); - else if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_SVLAN_PRESENT_MASK) && + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + be16_to_cpu(cqe->sl_vid)); + else if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_SVLAN_PRESENT_MASK)) && (dev->features & NETIF_F_HW_VLAN_STAG_RX)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), be16_to_cpu(cqe->sl_vid)); - if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb), - timestamp); + nr = mlx4_en_complete_rx_desc(priv, frags, skb, length); + if (likely(nr)) { + skb_shinfo(skb)->nr_frags = nr; + skb->len = length; + skb->data_len = length; + napi_gro_frags(&cq->napi); + } else { + skb->vlan_tci = 0; + skb_clear_hash(skb); } - - napi_gro_receive(&cq->napi, skb); next: - for (nr = 0; nr < priv->num_frags; nr++) - mlx4_en_free_frag(priv, frags, nr); - -consumed: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; if (++polled == budget) - goto out; + break; } -out: rcu_read_unlock(); if (polled) { @@ -1178,13 +941,6 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) return done; } -static const int frag_sizes[] = { - FRAG_SZ0, - FRAG_SZ1, - FRAG_SZ2, - FRAG_SZ3 -}; - void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1195,33 +951,43 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) * This only works when num_frags == 1. */ if (priv->tx_ring_num[TX_XDP]) { - priv->frag_info[0].order = 0; priv->frag_info[0].frag_size = eff_mtu; - priv->frag_info[0].frag_prefix_size = 0; /* This will gain efficient xdp frame recycling at the * expense of more costly truesize accounting */ priv->frag_info[0].frag_stride = PAGE_SIZE; - priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL; - priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM; + priv->dma_dir = PCI_DMA_BIDIRECTIONAL; + priv->rx_headroom = XDP_PACKET_HEADROOM; i = 1; } else { - int buf_size = 0; + int frag_size_max = 2048, buf_size = 0; + + /* should not happen, right ? */ + if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1) * 2048) + frag_size_max = PAGE_SIZE; while (buf_size < eff_mtu) { - priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER; - priv->frag_info[i].frag_size = - (eff_mtu > buf_size + frag_sizes[i]) ? - frag_sizes[i] : eff_mtu - buf_size; - priv->frag_info[i].frag_prefix_size = buf_size; - priv->frag_info[i].frag_stride = - ALIGN(priv->frag_info[i].frag_size, - SMP_CACHE_BYTES); - priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE; - priv->frag_info[i].rx_headroom = 0; - buf_size += priv->frag_info[i].frag_size; + int frag_stride, frag_size = eff_mtu - buf_size; + int pad, nb; + + if (i < MLX4_EN_MAX_RX_FRAGS - 1) + frag_size = min(frag_size, frag_size_max); + + priv->frag_info[i].frag_size = frag_size; + frag_stride = ALIGN(frag_size, SMP_CACHE_BYTES); + /* We can only pack 2 1536-bytes frames in on 4K page + * Therefore, each frame would consume more bytes (truesize) + */ + nb = PAGE_SIZE / frag_stride; + pad = (PAGE_SIZE - nb * frag_stride) / nb; + pad &= ~(SMP_CACHE_BYTES - 1); + priv->frag_info[i].frag_stride = frag_stride + pad; + + buf_size += frag_size; i++; } + priv->dma_dir = PCI_DMA_FROMDEVICE; + priv->rx_headroom = 0; } priv->num_frags = i; @@ -1232,10 +998,9 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) eff_mtu, priv->num_frags); for (i = 0; i < priv->num_frags; i++) { en_err(priv, - " frag:%d - size:%d prefix:%d stride:%d\n", + " frag:%d - size:%d stride:%d\n", i, priv->frag_info[i].frag_size, - priv->frag_info[i].frag_prefix_size, priv->frag_info[i].frag_stride); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 95290e1fc9fe..17112faafbcc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -81,14 +81,11 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) { u32 loopback_ok = 0; int i; - bool gro_enabled; priv->loopback_ok = 0; priv->validate_loopback = 1; - gro_enabled = priv->dev->features & NETIF_F_GRO; mlx4_en_update_loopback_state(priv->dev, priv->dev->features); - priv->dev->features &= ~NETIF_F_GRO; /* xmit */ if (mlx4_en_test_loopback_xmit(priv)) { @@ -111,9 +108,6 @@ mlx4_en_test_loopback_exit: priv->validate_loopback = 0; - if (gro_enabled) - priv->dev->features |= NETIF_F_GRO; - mlx4_en_update_loopback_state(priv->dev, priv->dev->features); return !loopback_ok; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 3ed42199d3f1..3ba89bc43d74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -354,13 +354,11 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc frame = { .page = tx_info->page, .dma = tx_info->map0_dma, - .page_offset = XDP_PACKET_HEADROOM, - .page_size = PAGE_SIZE, }; if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { dma_unmap_page(priv->ddev, tx_info->map0_dma, - PAGE_SIZE, priv->frag_info[0].dma_dir); + PAGE_SIZE, priv->dma_dir); put_page(tx_info->page); } @@ -980,8 +978,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ring->tso_packets++; - i = ((skb->len - lso_header_size) / shinfo->gso_size) + - !!((skb->len - lso_header_size) % shinfo->gso_size); + i = shinfo->gso_segs; tx_info->nr_bytes = skb->len + (i - 1) * lso_header_size; ring->packets += i; } else { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 3629ce11a68b..39f401aa3047 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -102,17 +102,6 @@ /* Use the maximum between 16384 and a single page */ #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384) -#define MLX4_EN_ALLOC_PREFER_ORDER min_t(int, get_order(32768), \ - PAGE_ALLOC_COSTLY_ORDER) - -/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU - * and 4K allocations) */ -enum { - FRAG_SZ0 = 1536 - NET_IP_ALIGN, - FRAG_SZ1 = 4096, - FRAG_SZ2 = 4096, - FRAG_SZ3 = MLX4_EN_ALLOC_SIZE -}; #define MLX4_EN_MAX_RX_FRAGS 4 /* Maximum ring sizes */ @@ -264,13 +253,16 @@ struct mlx4_en_rx_alloc { struct page *page; dma_addr_t dma; u32 page_offset; - u32 page_size; }; #define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) + struct mlx4_en_page_cache { u32 index; - struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE]; + struct { + struct page *page; + dma_addr_t dma; + } buf[MLX4_EN_CACHE_SIZE]; }; struct mlx4_en_priv; @@ -335,7 +327,6 @@ struct mlx4_en_rx_desc { struct mlx4_en_rx_ring { struct mlx4_hwq_resources wqres; - struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; u32 size ; /* number of Rx descs*/ u32 actual_size; u32 size_mask; @@ -355,6 +346,7 @@ struct mlx4_en_rx_ring { unsigned long csum_ok; unsigned long csum_none; unsigned long csum_complete; + unsigned long rx_alloc_pages; unsigned long xdp_drop; unsigned long xdp_tx; unsigned long xdp_tx_full; @@ -472,11 +464,7 @@ struct mlx4_en_mc_list { struct mlx4_en_frag_info { u16 frag_size; - u16 frag_prefix_size; u32 frag_stride; - enum dma_data_direction dma_dir; - u16 order; - u16 rx_headroom; }; #ifdef CONFIG_MLX4_EN_DCB @@ -584,8 +572,10 @@ struct mlx4_en_priv { u32 rx_ring_num; u32 rx_skb_size; struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; - u16 num_frags; - u16 log_rx_info; + u8 num_frags; + u8 log_rx_info; + u8 dma_dir; + u16 rx_headroom; struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES]; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index 48641cb0367f..926f3c3f3665 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -37,7 +37,7 @@ struct mlx4_en_port_stats { unsigned long queue_stopped; unsigned long wake_queue; unsigned long tx_timeout; - unsigned long rx_alloc_failed; + unsigned long rx_alloc_pages; unsigned long rx_chksum_good; unsigned long rx_chksum_none; unsigned long rx_chksum_complete; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index d8d5d161b8c7..4aa29ee93013 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2749,7 +2749,7 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, int err; int index = vhcr->in_modifier; struct res_mtt *mtt; - struct res_mpt *mpt; + struct res_mpt *mpt = NULL; int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz; int phys; int id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 117170014e88..a84b652f9b54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -31,3 +31,10 @@ config MLX5_CORE_EN_DCB This flag is depended on the kernel's DCB support. If unsure, set to Y + +config MLX5_CORE_IPOIB + bool "Mellanox Technologies ConnectX-4 IPoIB offloads support" + depends on MLX5_CORE_EN + default y + ---help--- + MLX5 IPoIB offloads & acceleration support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9f43beb86250..9e644615f07a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -11,3 +11,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o + +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a380353a78c2..5bdaf3d545b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -279,6 +279,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_XRC_SRQ: case MLX5_CMD_OP_DESTROY_DCT: case MLX5_CMD_OP_DEALLOC_Q_COUNTER: + case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: case MLX5_CMD_OP_DEALLOC_PD: case MLX5_CMD_OP_DEALLOC_UAR: case MLX5_CMD_OP_DETACH_FROM_MCG: @@ -305,8 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: - case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: + case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -363,6 +364,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_Q_COUNTER: case MLX5_CMD_OP_SET_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: case MLX5_CMD_OP_ALLOC_PD: case MLX5_CMD_OP_ALLOC_UAR: case MLX5_CMD_OP_CONFIG_INT_MODERATION: @@ -414,10 +419,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: - case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -501,6 +503,12 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); MLX5_COMMAND_STR_CASE(ALLOC_PD); MLX5_COMMAND_STR_CASE(DEALLOC_PD); MLX5_COMMAND_STR_CASE(ALLOC_UAR); @@ -576,12 +584,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); - MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); - MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3d9490cd2db1..0099a3e397bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -37,6 +37,7 @@ #include <linux/timecounter.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> +#include <linux/crash_dump.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/qp.h> #include <linux/mlx5/cq.h> @@ -111,18 +112,13 @@ #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ -#define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_ICOSQ_MAX_WQEBBS \ (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) -#define MLX5E_XDP_IHS_DS_COUNT \ - DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT \ ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) -#define MLX5E_XDP_TX_WQEBBS \ - DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) #define MLX5E_NUM_MAIN_GROUPS 9 @@ -158,6 +154,14 @@ static inline int mlx5_max_log_rq_size(int wq_type) } } +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return is_kdump_kernel() ? + MLX5E_MIN_NUM_CHANNELS : + min_t(int, mdev->priv.eq_table.num_comp_vectors, + MLX5E_MAX_NUM_CHANNELS); +} + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -187,15 +191,15 @@ enum mlx5e_priv_flag { MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 1), }; -#define MLX5E_SET_PFLAG(priv, pflag, enable) \ +#define MLX5E_SET_PFLAG(params, pflag, enable) \ do { \ if (enable) \ - (priv)->params.pflags |= (pflag); \ + (params)->pflags |= (pflag); \ else \ - (priv)->params.pflags &= ~(pflag); \ + (params)->pflags &= ~(pflag); \ } while (0) -#define MLX5E_GET_PFLAG(priv, pflag) (!!((priv)->params.pflags & (pflag))) +#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag))) #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ @@ -218,7 +222,6 @@ struct mlx5e_params { bool rx_cqe_compress_def; struct mlx5e_cq_moder rx_cq_moderation; struct mlx5e_cq_moder tx_cq_moderation; - u16 min_rx_wqes; bool lro_en; u32 lro_wqe_sz; u16 tx_max_inline; @@ -227,9 +230,11 @@ struct mlx5e_params { u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; bool vlan_strip_disable; + bool scatter_fcs_en; bool rx_am_enabled; u32 lro_timeout; u32 pflags; + struct bpf_prog *xdp_prog; }; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -285,7 +290,6 @@ struct mlx5e_cq { struct napi_struct *napi; struct mlx5_core_cq mcq; struct mlx5e_channel *channel; - struct mlx5e_priv *priv; /* cqe decompression */ struct mlx5_cqe64 title; @@ -295,22 +299,163 @@ struct mlx5e_cq { u16 decmprs_wqe_counter; /* control */ + struct mlx5_core_dev *mdev; struct mlx5_frag_wq_ctrl wq_ctrl; } ____cacheline_aligned_in_smp; -struct mlx5e_rq; -typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe); -typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, - u16 ix); +struct mlx5e_tx_wqe_info { + struct sk_buff *skb; + u32 num_bytes; + u8 num_wqebbs; + u8 num_dma; +}; + +enum mlx5e_dma_map_type { + MLX5E_DMA_MAP_SINGLE, + MLX5E_DMA_MAP_PAGE +}; + +struct mlx5e_sq_dma { + dma_addr_t addr; + u32 size; + enum mlx5e_dma_map_type type; +}; + +enum { + MLX5E_SQ_STATE_ENABLED, +}; + +struct mlx5e_sq_wqe_info { + u8 opcode; + u8 num_wqebbs; +}; + +struct mlx5e_txqsq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + u32 dma_fifo_cc; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + struct mlx5e_sq_stats stats; + + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 dma_fifo_mask; + void __iomem *uar_map; + struct netdev_queue *txq; + u32 sqn; + u16 max_inline; + u8 min_inline_mode; + u16 edge; + struct device *pdev; + struct mlx5e_tstamp *tstamp; + __be32 mkey_be; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; + int txq_ix; + u32 rate_limit; +} ____cacheline_aligned_in_smp; + +struct mlx5e_xdpsq { + /* data path */ + + /* dirtied @rx completion */ + u16 cc; + u16 pc; + + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_dma_info *di; + bool doorbell; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + struct device *pdev; + __be32 mkey_be; + u8 min_inline_mode; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +struct mlx5e_icosq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + u16 prev_cc; -typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix); + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_sq_wqe_info *ico_wqe; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + u16 edge; + struct device *pdev; + __be32 mkey_be; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +static inline bool +mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) +{ + return (((wq->sz_m1 & (cc - pc)) >= n) || (cc == pc)); +} struct mlx5e_dma_info { struct page *page; dma_addr_t addr; }; +struct mlx5e_umr_dma_info { + __be64 *mtt; + dma_addr_t mtt_addr; + struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; + struct mlx5e_umr_wqe wqe; +}; + +struct mlx5e_mpw_info { + struct mlx5e_umr_dma_info umr; + u16 consumed_strides; + u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; +}; + struct mlx5e_rx_am_stats { int ppms; /* packets per msec */ int epms; /* events per msec */ @@ -347,6 +492,11 @@ struct mlx5e_page_cache { struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; }; +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); +typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16); +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; @@ -381,7 +531,10 @@ struct mlx5e_rq { u16 rx_headroom; struct mlx5e_rx_am am; /* Adaptive Moderation */ + + /* XDP */ struct bpf_prog *xdp_prog; + struct mlx5e_xdpsq xdpsq; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -390,118 +543,10 @@ struct mlx5e_rq { u32 mpwqe_num_strides; u32 rqn; struct mlx5e_channel *channel; - struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; } ____cacheline_aligned_in_smp; -struct mlx5e_umr_dma_info { - __be64 *mtt; - dma_addr_t mtt_addr; - struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; - struct mlx5e_umr_wqe wqe; -}; - -struct mlx5e_mpw_info { - struct mlx5e_umr_dma_info umr; - u16 consumed_strides; - u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; -}; - -struct mlx5e_tx_wqe_info { - u32 num_bytes; - u8 num_wqebbs; - u8 num_dma; -}; - -enum mlx5e_dma_map_type { - MLX5E_DMA_MAP_SINGLE, - MLX5E_DMA_MAP_PAGE -}; - -struct mlx5e_sq_dma { - dma_addr_t addr; - u32 size; - enum mlx5e_dma_map_type type; -}; - -enum { - MLX5E_SQ_STATE_ENABLED, - MLX5E_SQ_STATE_BF_ENABLE, -}; - -struct mlx5e_sq_wqe_info { - u8 opcode; - u8 num_wqebbs; -}; - -enum mlx5e_sq_type { - MLX5E_SQ_TXQ, - MLX5E_SQ_ICO, - MLX5E_SQ_XDP -}; - -struct mlx5e_sq { - /* data path */ - - /* dirtied @completion */ - u16 cc; - u32 dma_fifo_cc; - - /* dirtied @xmit */ - u16 pc ____cacheline_aligned_in_smp; - u32 dma_fifo_pc; - u16 bf_offset; - u16 prev_cc; - u8 bf_budget; - struct mlx5e_sq_stats stats; - - struct mlx5e_cq cq; - - /* pointers to per tx element info: write@xmit, read@completion */ - union { - struct { - struct sk_buff **skb; - struct mlx5e_sq_dma *dma_fifo; - struct mlx5e_tx_wqe_info *wqe_info; - } txq; - struct mlx5e_sq_wqe_info *ico_wqe; - struct { - struct mlx5e_sq_wqe_info *wqe_info; - struct mlx5e_dma_info *di; - bool doorbell; - } xdp; - } db; - - /* read only */ - struct mlx5_wq_cyc wq; - u32 dma_fifo_mask; - void __iomem *uar_map; - struct netdev_queue *txq; - u32 sqn; - u16 bf_buf_size; - u16 max_inline; - u8 min_inline_mode; - u16 edge; - struct device *pdev; - struct mlx5e_tstamp *tstamp; - __be32 mkey_be; - unsigned long state; - - /* control path */ - struct mlx5_wq_ctrl wq_ctrl; - struct mlx5_sq_bfreg bfreg; - struct mlx5e_channel *channel; - int tc; - u32 rate_limit; - u8 type; -} ____cacheline_aligned_in_smp; - -static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) -{ - return (((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n) || - (sq->cc == sq->pc)); -} - enum channel_flags { MLX5E_CHANNEL_NAPI_SCHED = 1, }; @@ -509,9 +554,8 @@ enum channel_flags { struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; - struct mlx5e_sq xdp_sq; - struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; - struct mlx5e_sq icosq; /* internal control operations */ + struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_icosq icosq; /* internal control operations */ bool xdp; struct napi_struct napi; struct device *pdev; @@ -522,10 +566,18 @@ struct mlx5e_channel { /* control */ struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct mlx5e_tstamp *tstamp; int ix; int cpu; }; +struct mlx5e_channels { + struct mlx5e_channel **c; + unsigned int num; + struct mlx5e_params params; +}; + enum mlx5e_traffic_types { MLX5E_TT_IPV4_TCP, MLX5E_TT_IPV6_TCP, @@ -675,34 +727,17 @@ enum { MLX5E_NIC_PRIO }; -struct mlx5e_profile { - void (*init)(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, void *ppriv); - void (*cleanup)(struct mlx5e_priv *priv); - int (*init_rx)(struct mlx5e_priv *priv); - void (*cleanup_rx)(struct mlx5e_priv *priv); - int (*init_tx)(struct mlx5e_priv *priv); - void (*cleanup_tx)(struct mlx5e_priv *priv); - void (*enable)(struct mlx5e_priv *priv); - void (*disable)(struct mlx5e_priv *priv); - void (*update_stats)(struct mlx5e_priv *priv); - int (*max_nch)(struct mlx5_core_dev *mdev); - int max_tc; -}; - struct mlx5e_priv { /* priv data path fields - start */ - struct mlx5e_sq **txq_to_sq_map; - int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; - struct bpf_prog *xdp_prog; + struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; + int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; /* priv data path fields - end */ unsigned long state; struct mutex state_lock; /* Protects Interface state */ struct mlx5e_rq drop_rq; - struct mlx5e_channel **channel; + struct mlx5e_channels channels; u32 tisn[MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; @@ -712,7 +747,6 @@ struct mlx5e_priv { struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; - struct mlx5e_params params; struct workqueue_struct *wq; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; @@ -732,9 +766,28 @@ struct mlx5e_priv { void *ppriv; }; +struct mlx5e_profile { + void (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, void *ppriv); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + struct { + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; + } rx_handlers; + int max_tc; +}; + void mlx5e_build_ptys2ethtool_map(void); -void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); @@ -744,7 +797,9 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); -void mlx5e_free_sq_descs(struct mlx5e_sq *sq); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle); @@ -792,7 +847,7 @@ void mlx5e_pps_event_handler(struct mlx5e_priv *priv, struct ptp_clock_event *event); int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); -void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); @@ -801,14 +856,40 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); -int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); +struct mlx5e_redirect_rqt_param { + bool is_rss; + union { + u32 rqn; /* Direct RQN (Non-RSS) */ + struct { + u8 hfunc; + struct mlx5e_channels *channels; + } rss; /* RSS data */ + }; +}; -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix); -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, - enum mlx5e_traffic_types tt); +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, + struct mlx5e_redirect_rqt_param rrp); +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, + enum mlx5e_traffic_types tt, + void *tirc); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); + +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs); +void mlx5e_close_channels(struct mlx5e_channels *chs); + +/* Function pointer to be used to modify WH settings while + * switching channels + */ +typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); +void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify); +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); + void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, u32 *indirection_rqt, int len, int num_channels); @@ -816,30 +897,43 @@ int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type); +void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u8 rq_type); -static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, - struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) +static inline +struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) { - u16 ofst = sq->bf_offset; + u16 pi = *pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + (*pc)++; + + return wqe; +} + +static inline +void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, + void __iomem *uar_map, + struct mlx5_wqe_ctrl_seg *ctrl) +{ + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; /* ensure wqe is visible to device before updating doorbell record */ dma_wmb(); - *sq->wq.db = cpu_to_be32(sq->pc); + *wq->db = cpu_to_be32(pc); /* ensure doorbell record is visible to device before ringing the * doorbell */ wmb(); - if (bf_sz) - __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz); - else - mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL); - /* flush the write-combining mapped buffer */ - wmb(); - sq->bf_offset ^= sq->bf_buf_size; + mlx5_write64((__be32 *)ctrl, uar_map, NULL); } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) @@ -895,44 +989,43 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); -int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, - bool enable_uc_lb); - -struct mlx5_eswitch_rep; -int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); -void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); -void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); -void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); -void mlx5e_update_hw_rep_counters(struct mlx5e_priv *priv); +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb); + +/* common netdev helpers */ +int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); + +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); -void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn); +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); + +int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, + u32 underlay_qpn, u32 *tisn); +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); + int mlx5e_create_tises(struct mlx5e_priv *priv); void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); -struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, - void *ppriv); -void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); -int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); -void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp); -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); +/* mlx5e generic netdev management API */ +struct net_device* +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, + void *ppriv); +int mlx5e_attach_netdev(struct mlx5e_priv *priv); +void mlx5e_detach_netdev(struct mlx5e_priv *priv); +void mlx5e_destroy_netdev(struct mlx5e_priv *priv); +void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 max_channels); -bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); -bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 68419a01db36..c8a005326e30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -174,13 +174,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, enum arfs_type type) { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; - struct mlx5_flow_destination dest; struct mlx5e_tir *tir = priv->indir_tir; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; int err = 0; @@ -325,10 +321,16 @@ static int arfs_create_table(struct mlx5e_priv *priv, { struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; + struct mlx5_flow_table_attr ft_attr = {}; int err; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL, 0); + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE; + ft_attr.level = MLX5E_ARFS_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -469,15 +471,11 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct arfs_rule *arfs_rule) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; struct mlx5_flow_handle *rule = NULL; struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); struct arfs_table *arfs_table; struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 37e66eef6fb5..e706a87fc8b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -90,6 +90,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) { struct mlx5e_priv *priv = netdev_priv(dev); struct hwtstamp_config config; + int err; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) return -EOPNOTSUPP; @@ -111,7 +112,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->params.rx_cqe_compress_def); + mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -129,7 +130,12 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: /* Disable CQE compression */ netdev_warn(dev, "Disabling cqe compression"); - mlx5e_modify_rx_cqe_compression_locked(priv, false); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + if (err) { + netdev_err(dev, "Failed disabling cqe compression err=%d\n", err); + mutex_unlock(&priv->state_lock); + return err; + } config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index bd898d8deda0..f1f17f7a3cd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -107,10 +107,18 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) goto err_dealloc_transport_domain; } + err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false); + if (err) { + mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err); + goto err_destroy_mkey; + } + INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); return 0; +err_destroy_mkey: + mlx5_core_destroy_mkey(mdev, &res->mkey); err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); err_dealloc_pd: @@ -122,23 +130,26 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) { struct mlx5e_resources *res = &mdev->mlx5e_res; + mlx5_free_bfreg(mdev, &res->bfreg); mlx5_core_destroy_mkey(mdev, &res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); } -int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, - bool enable_uc_lb) +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) { + struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; - void *in; + int err = -ENOMEM; + u32 tirn = 0; int inlen; - int err = 0; + void *in; + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); if (!in) - return -ENOMEM; + goto out; if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, @@ -147,13 +158,16 @@ int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { - err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen); + tirn = tir->tirn; + err = mlx5_core_modify_tir(mdev, tirn, in, inlen); if (err) goto out; } out: kvfree(in); + if (err) + netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index a004a5a1a4c2..ce7b09d72ff6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -42,8 +42,9 @@ static void mlx5e_get_drvinfo(struct net_device *dev, strlcpy(drvinfo->version, DRIVER_VERSION " (" DRIVER_RELDATE ")", sizeof(drvinfo->version)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%d", - fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); + "%d.%d.%04d (%.16s)", + fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), + mdev->board_id); strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), sizeof(drvinfo->bus_info)); } @@ -152,12 +153,9 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) } #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) -#define MLX5E_NUM_RQ_STATS(priv) \ - (NUM_RQ_STATS * priv->params.num_channels * \ - test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_RQ_STATS(priv) (NUM_RQ_STATS * (priv)->channels.num) #define MLX5E_NUM_SQ_STATS(priv) \ - (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ - test_bit(MLX5E_STATE_OPENED, &priv->state)) + (NUM_SQ_STATS * (priv)->channels.num * (priv)->channels.params.num_tc) #define MLX5E_NUM_PFC_COUNTERS(priv) \ ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ NUM_PPORT_PER_PRIO_PFC_COUNTERS) @@ -262,17 +260,17 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) return; /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) + for (i = 0; i < priv->channels.num; i++) for (j = 0; j < NUM_RQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < priv->channels.num; i++) for (j = 0; j < NUM_SQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, sq_stats_desc[j].format, - priv->channeltc_to_txq_map[i][tc]); + priv->channel_tc2txq[i][tc]); } static void mlx5e_get_strings(struct net_device *dev, @@ -303,6 +301,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *channels; struct mlx5_priv *mlx5_priv; int i, j, tc, prio, idx = 0; unsigned long pfc_combined; @@ -313,6 +312,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_update_stats(priv); + channels = &priv->channels; mutex_unlock(&priv->state_lock); for (i = 0; i < NUM_SW_COUNTERS; i++) @@ -382,16 +382,16 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, return; /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) + for (i = 0; i < channels->num; i++) for (j = 0; j < NUM_RQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel[i]->rq.stats, + MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats, rq_stats_desc, j); - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < channels->num; i++) for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel[i]->sq[tc].stats, + data[idx++] = MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats, sq_stats_desc, j); } @@ -406,8 +406,8 @@ static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return num_wqe; - stride_size = 1 << priv->params.mpwqe_log_stride_sz; - num_strides = 1 << priv->params.mpwqe_log_num_strides; + stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; + num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; wqe_size = stride_size * num_strides; packets_per_wqe = wqe_size / @@ -427,8 +427,8 @@ static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return num_packets; - stride_size = 1 << priv->params.mpwqe_log_stride_sz; - num_strides = 1 << priv->params.mpwqe_log_num_strides; + stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; + num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; wqe_size = stride_size * num_strides; num_packets = (1 << order_base_2(num_packets)); @@ -443,26 +443,25 @@ static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); - int rq_wq_type = priv->params.rq_wq_type; + int rq_wq_type = priv->channels.params.rq_wq_type; param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, 1 << mlx5_max_log_rq_size(rq_wq_type)); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, - 1 << priv->params.log_rq_size); - param->tx_pending = 1 << priv->params.log_sq_size; + 1 << priv->channels.params.log_rq_size); + param->tx_pending = 1 << priv->channels.params.log_sq_size; } static int mlx5e_set_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); - bool was_opened; - int rq_wq_type = priv->params.rq_wq_type; + int rq_wq_type = priv->channels.params.rq_wq_type; + struct mlx5e_channels new_channels = {}; u32 rx_pending_wqes; u32 min_rq_size; u32 max_rq_size; - u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; u32 num_mtts; @@ -500,7 +499,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, } num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes); - if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && !MLX5E_VALID_NUM_MTTS(num_mtts)) { netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", __func__, param->rx_pending); @@ -522,26 +521,29 @@ static int mlx5e_set_ringparam(struct net_device *dev, log_rq_size = order_base_2(rx_pending_wqes); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes); - if (log_rq_size == priv->params.log_rq_size && - log_sq_size == priv->params.log_sq_size && - min_rx_wqes == priv->params.min_rx_wqes) + if (log_rq_size == priv->channels.params.log_rq_size && + log_sq_size == priv->channels.params.log_sq_size) return 0; mutex_lock(&priv->state_lock); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(dev); + new_channels.params = priv->channels.params; + new_channels.params.log_rq_size = log_rq_size; + new_channels.params.log_sq_size = log_sq_size; - priv->params.log_rq_size = log_rq_size; - priv->params.log_sq_size = log_sq_size; - priv->params.min_rx_wqes = min_rx_wqes; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto unlock; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto unlock; - if (was_opened) - err = mlx5e_open_locked(dev); + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +unlock: mutex_unlock(&priv->state_lock); return err; @@ -553,7 +555,7 @@ static void mlx5e_get_channels(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); ch->max_combined = priv->profile->max_nch(priv->mdev); - ch->combined_count = priv->params.num_channels; + ch->combined_count = priv->channels.params.num_channels; } static int mlx5e_set_channels(struct net_device *dev, @@ -561,8 +563,8 @@ static int mlx5e_set_channels(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); unsigned int count = ch->combined_count; + struct mlx5e_channels new_channels = {}; bool arfs_enabled; - bool was_opened; int err = 0; if (!count) { @@ -571,27 +573,32 @@ static int mlx5e_set_channels(struct net_device *dev, return -EINVAL; } - if (priv->params.num_channels == count) + if (priv->channels.params.num_channels == count) return 0; mutex_lock(&priv->state_lock); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(dev); + new_channels.params = priv->channels.params; + new_channels.params.num_channels = count; + mlx5e_build_default_indir_rqt(priv->mdev, new_channels.params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + + /* Create fresh channels with new parameters */ + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; arfs_enabled = dev->features & NETIF_F_NTUPLE; if (arfs_enabled) mlx5e_arfs_disable(priv); - priv->params.num_channels = count; - mlx5e_build_default_indir_rqt(priv->mdev, priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); - - if (was_opened) - err = mlx5e_open_locked(dev); - if (err) - goto out; + /* Switch to new channels, set new parameters and close old ones */ + mlx5e_switch_priv_channels(priv, &new_channels, NULL); if (arfs_enabled) { err = mlx5e_arfs_enable(priv); @@ -614,49 +621,24 @@ static int mlx5e_get_coalesce(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; - coal->rx_coalesce_usecs = priv->params.rx_cq_moderation.usec; - coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts; - coal->tx_coalesce_usecs = priv->params.tx_cq_moderation.usec; - coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts; - coal->use_adaptive_rx_coalesce = priv->params.rx_am_enabled; + coal->rx_coalesce_usecs = priv->channels.params.rx_cq_moderation.usec; + coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts; + coal->tx_coalesce_usecs = priv->channels.params.tx_cq_moderation.usec; + coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts; + coal->use_adaptive_rx_coalesce = priv->channels.params.rx_am_enabled; return 0; } -static int mlx5e_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *coal) +static void +mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channel *c; - bool restart = - !!coal->use_adaptive_rx_coalesce != priv->params.rx_am_enabled; - bool was_opened; - int err = 0; int tc; int i; - if (!MLX5_CAP_GEN(mdev, cq_moderation)) - return -EOPNOTSUPP; - - mutex_lock(&priv->state_lock); - - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened && restart) { - mlx5e_close_locked(netdev); - priv->params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce; - } - - priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; - priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; - priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; - priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; - - if (!was_opened || restart) - goto out; - - for (i = 0; i < priv->params.num_channels; ++i) { - c = priv->channel[i]; + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; for (tc = 0; tc < c->num_tc; tc++) { mlx5_core_modify_cq_moderation(mdev, @@ -669,11 +651,50 @@ static int mlx5e_set_coalesce(struct net_device *netdev, coal->rx_coalesce_usecs, coal->rx_max_coalesced_frames); } +} -out: - if (was_opened && restart) - err = mlx5e_open_locked(netdev); +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err = 0; + bool reset; + + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + return -EOPNOTSUPP; + + mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + + new_channels.params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; + new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; + new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; + new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; + new_channels.params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + /* we are opened */ + + reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_am_enabled; + if (!reset) { + mlx5e_set_priv_channels_coalesce(priv, coal); + priv->channels.params = new_channels.params; + goto out; + } + + /* open fresh channels with new coal parameters */ + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +out: mutex_unlock(&priv->state_lock); return err; } @@ -968,7 +989,7 @@ static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - return sizeof(priv->params.toeplitz_hash_key); + return sizeof(priv->channels.params.toeplitz_hash_key); } static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) @@ -982,15 +1003,15 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, struct mlx5e_priv *priv = netdev_priv(netdev); if (indir) - memcpy(indir, priv->params.indirection_rqt, - sizeof(priv->params.indirection_rqt)); + memcpy(indir, priv->channels.params.indirection_rqt, + sizeof(priv->channels.params.indirection_rqt)); if (key) - memcpy(key, priv->params.toeplitz_hash_key, - sizeof(priv->params.toeplitz_hash_key)); + memcpy(key, priv->channels.params.toeplitz_hash_key, + sizeof(priv->channels.params.toeplitz_hash_key)); if (hfunc) - *hfunc = priv->params.rss_hfunc; + *hfunc = priv->channels.params.rss_hfunc; return 0; } @@ -1006,7 +1027,7 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); } } @@ -1030,25 +1051,37 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); - if (indir) { - u32 rqtn = priv->indir_rqt.rqtn; - - memcpy(priv->params.indirection_rqt, indir, - sizeof(priv->params.indirection_rqt)); - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); - } - if (hfunc != ETH_RSS_HASH_NO_CHANGE && - hfunc != priv->params.rss_hfunc) { - priv->params.rss_hfunc = hfunc; + hfunc != priv->channels.params.rss_hfunc) { + priv->channels.params.rss_hfunc = hfunc; hash_changed = true; } + if (indir) { + memcpy(priv->channels.params.indirection_rqt, indir, + sizeof(priv->channels.params.indirection_rqt)); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + u32 rqtn = priv->indir_rqt.rqtn; + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .hfunc = priv->channels.params.rss_hfunc, + .channels = &priv->channels, + }, + }, + }; + + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); + } + } + if (key) { - memcpy(priv->params.toeplitz_hash_key, key, - sizeof(priv->params.toeplitz_hash_key)); + memcpy(priv->channels.params.toeplitz_hash_key, key, + sizeof(priv->channels.params.toeplitz_hash_key)); hash_changed = hash_changed || - priv->params.rss_hfunc == ETH_RSS_HASH_TOP; + priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP; } if (hash_changed) @@ -1069,7 +1102,7 @@ static int mlx5e_get_rxnfc(struct net_device *netdev, switch (info->cmd) { case ETHTOOL_GRXRINGS: - info->data = priv->params.num_channels; + info->data = priv->channels.params.num_channels; break; case ETHTOOL_GRXCLSRLCNT: info->rule_cnt = priv->fs.ethtool.tot_num_rules; @@ -1097,7 +1130,7 @@ static int mlx5e_get_tunable(struct net_device *dev, switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: - *(u32 *)data = priv->params.tx_max_inline; + *(u32 *)data = priv->channels.params.tx_max_inline; break; default: err = -EINVAL; @@ -1113,9 +1146,11 @@ static int mlx5e_set_tunable(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - bool was_opened; - u32 val; + struct mlx5e_channels new_channels = {}; int err = 0; + u32 val; + + mutex_lock(&priv->state_lock); switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: @@ -1125,24 +1160,26 @@ static int mlx5e_set_tunable(struct net_device *dev, break; } - mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + new_channels.params.tx_max_inline = val; - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(dev); - - priv->params.tx_max_inline = val; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + break; + } - if (was_opened) - err = mlx5e_open_locked(dev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + break; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); - mutex_unlock(&priv->state_lock); break; default: err = -EINVAL; break; } + mutex_unlock(&priv->state_lock); return err; } @@ -1442,15 +1479,15 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; bool rx_mode_changed; u8 rx_cq_period_mode; int err = 0; - bool reset; rx_cq_period_mode = enable ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - rx_mode_changed = rx_cq_period_mode != priv->params.rx_cq_period_mode; + rx_mode_changed = rx_cq_period_mode != priv->channels.params.rx_cq_period_mode; if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) @@ -1459,16 +1496,51 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) if (!rx_mode_changed) return 0; - reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (reset) - mlx5e_close_locked(netdev); + new_channels.params = priv->channels.params; + mlx5e_set_rx_cq_mode_params(&new_channels.params, rx_cq_period_mode); - mlx5e_set_rx_cq_mode_params(&priv->params, rx_cq_period_mode); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } - if (reset) - err = mlx5e_open_locked(netdev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; - return err; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; +} + +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) +{ + bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); + struct mlx5e_channels new_channels = {}; + int err = 0; + + if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) + return new_val ? -EOPNOTSUPP : 0; + + if (curr_val == new_val) + return 0; + + new_channels.params = priv->channels.params; + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + + mlx5e_set_rq_type_params(priv->mdev, &new_channels.params, + new_channels.params.rq_wq_type); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; } static int set_pflag_rx_cqe_compress(struct net_device *netdev, @@ -1486,8 +1558,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, } mlx5e_modify_rx_cqe_compression_locked(priv, enable); - priv->params.rx_cqe_compress_def = enable; - mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); + priv->channels.params.rx_cqe_compress_def = enable; return 0; } @@ -1499,7 +1570,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); bool enable = !!(wanted_flags & flag); - u32 changes = wanted_flags ^ priv->params.pflags; + u32 changes = wanted_flags ^ priv->channels.params.pflags; int err; if (!(changes & flag)) @@ -1512,7 +1583,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev, return err; } - MLX5E_SET_PFLAG(priv, flag, enable); + MLX5E_SET_PFLAG(&priv->channels.params, flag, enable); return 0; } @@ -1541,7 +1612,7 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - return priv->params.pflags; + return priv->channels.params.pflags; } static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index f2762e45c8ae..576d6787b484 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -159,14 +159,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, struct mlx5_flow_spec *spec) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; struct mlx5_flow_handle **rule_p; + MLX5_DECLARE_FLOW_ACT(flow_act); int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -659,11 +655,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; + MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; @@ -800,7 +792,7 @@ err: return err; } -static void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; @@ -808,14 +800,19 @@ static void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) mlx5e_destroy_flow_table(&ttc->ft); } -static int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; int err; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL, 0); + ft_attr.max_fte = MLX5E_TTC_TABLE_SIZE; + ft_attr.level = MLX5E_TTC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.underlay_qpn = underlay_qpn; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -848,13 +845,9 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; struct mlx5_flow_table *ft = priv->fs.l2.ft.t; struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; int err = 0; u8 *mc_dmac; @@ -985,12 +978,16 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv) { struct mlx5e_l2_table *l2_table = &priv->fs.l2; struct mlx5e_flow_table *ft = &l2_table->ft; + struct mlx5_flow_table_attr ft_attr = {}; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL, 0); + ft_attr.max_fte = MLX5E_L2_TABLE_SIZE; + ft_attr.level = MLX5E_L2_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -1088,11 +1085,16 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) { struct mlx5e_flow_table *ft = &priv->fs.vlan.ft; + struct mlx5_flow_table_attr ft_attr = {}; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL, 0); + + ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE; + ft_attr.level = MLX5E_VLAN_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1145,7 +1147,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_ttc_table(priv); + err = mlx5e_create_ttc_table(priv, 0); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 26fc77e80f7b..85bf4a389295 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -390,7 +390,7 @@ static int validate_flow(struct mlx5e_priv *priv, if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) return -EINVAL; - if (fs->ring_cookie >= priv->params.num_channels && + if (fs->ring_cookie >= priv->channels.params.num_channels && fs->ring_cookie != RX_CLS_FLOW_DISC) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 15cc7b469d2e..a61b71b6fff3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -31,28 +31,24 @@ */ #include <net/tc_act/tc_gact.h> -#include <linux/crash_dump.h> #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> #include <linux/bpf.h> +#include "eswitch.h" #include "en.h" #include "en_tc.h" -#include "eswitch.h" +#include "en_rep.h" #include "vxlan.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; - bool am_enabled; }; struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; - u16 max_inline; - u8 min_inline_mode; - enum mlx5e_sq_type type; }; struct mlx5e_cq_param { @@ -79,49 +75,47 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, reg_umr_sq); } -void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u8 rq_type) { - priv->params.rq_wq_type = rq_type; - priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; - switch (priv->params.rq_wq_type) { + params->rq_wq_type = rq_type; + params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = is_kdump_kernel() ? + params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - priv->params.mpwqe_log_stride_sz = - MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ? - MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(priv->mdev) : - MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(priv->mdev); - priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - priv->params.mpwqe_log_stride_sz; + params->mpwqe_log_stride_sz = + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? + MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : + MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); + params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + params->mpwqe_log_stride_sz; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = is_kdump_kernel() ? + params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; /* Extra room needed for build_skb */ - priv->params.lro_wqe_sz -= MLX5_RX_HEADROOM + + params->lro_wqe_sz -= MLX5_RX_HEADROOM + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); - mlx5_core_info(priv->mdev, - "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(priv->params.log_rq_size), - BIT(priv->params.mpwqe_log_stride_sz), - MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS)); + mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(params->log_rq_size), + BIT(params->mpwqe_log_stride_sz), + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } -static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) +static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) && - !priv->xdp_prog ? + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && + !params->xdp_prog ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; - mlx5e_set_rq_type_params(priv, rq_type); + mlx5e_set_rq_type_params(mdev, params, rq_type); } static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -181,8 +175,10 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) int i, j; memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->params.num_channels; i++) { - rq_stats = &priv->channel[i]->rq.stats; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + rq_stats = &c->rq.stats; s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; @@ -204,8 +200,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_cache_empty += rq_stats->cache_empty; s->rx_cache_busy += rq_stats->cache_busy; - for (j = 0; j < priv->params.num_tc; j++) { - sq_stats = &priv->channel[i]->sq[j].stats; + for (j = 0; j < priv->channels.params.num_tc; j++) { + sq_stats = &c->sq[j].stats; s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; @@ -402,8 +398,10 @@ static inline int mlx5e_get_wqe_mtt_sz(void) MLX5_UMR_MTT_ALIGNMENT); } -static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, - struct mlx5e_umr_wqe *wqe, u16 ix) +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_icosq *sq, + struct mlx5e_umr_wqe *wqe, + u16 ix) { struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; @@ -493,11 +491,10 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) kfree(rq->mpwqe.info); } -static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv, +static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, u64 npages, u8 page_shift, struct mlx5_core_mkey *umr_mkey) { - struct mlx5_core_dev *mdev = priv->mdev; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; u32 *in; @@ -531,21 +528,20 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv, return err; } -static int mlx5e_create_rq_umr_mkey(struct mlx5e_rq *rq) +static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) { - struct mlx5e_priv *priv = rq->priv; - u64 num_mtts = MLX5E_REQUIRED_MTTS(BIT(priv->params.log_rq_size)); + u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->wq)); - return mlx5e_create_umr_mkey(priv, num_mtts, PAGE_SHIFT, &rq->umr_mkey); + return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey); } -static int mlx5e_create_rq(struct mlx5e_channel *c, - struct mlx5e_rq_param *param, - struct mlx5e_rq *rq) +static int mlx5e_alloc_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_rq_param *rqp, + struct mlx5e_rq *rq) { - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; + struct mlx5_core_dev *mdev = c->mdev; + void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; u32 frag_sz; @@ -554,9 +550,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, int err; int i; - param->wq.db_numa_node = cpu_to_node(c->cpu); + rqp->wq.db_numa_node = cpu_to_node(c->cpu); - err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, + err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); if (err) return err; @@ -565,15 +561,15 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->wq); - rq->wq_type = priv->params.rq_wq_type; + rq->wq_type = params->rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; - rq->tstamp = &priv->tstamp; + rq->tstamp = c->tstamp; rq->channel = c; rq->ix = c->ix; - rq->priv = c->priv; + rq->mdev = mdev; - rq->xdp_prog = priv->xdp_prog ? bpf_prog_inc(priv->xdp_prog) : NULL; + rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; if (IS_ERR(rq->xdp_prog)) { err = PTR_ERR(rq->xdp_prog); rq->xdp_prog = NULL; @@ -588,24 +584,26 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->rx_headroom = MLX5_RX_HEADROOM; } - switch (priv->params.rq_wq_type) { + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - if (mlx5e_is_vf_vport_rep(priv)) { - err = -EINVAL; - goto err_rq_wq_destroy; - } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); - rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe; + if (!rq->handle_rx_cqe) { + err = -EINVAL; + netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err); + goto err_rq_wq_destroy; + } + + rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz); + rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides); rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; byte_count = rq->buff.wqe_sz; - err = mlx5e_create_rq_umr_mkey(rq); + err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) goto err_rq_wq_destroy; rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); @@ -621,18 +619,20 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, err = -ENOMEM; goto err_rq_wq_destroy; } - - if (mlx5e_is_vf_vport_rep(priv)) - rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep; - else - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; - rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->buff.wqe_sz = (priv->params.lro_en) ? - priv->params.lro_wqe_sz : - MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe; + if (!rq->handle_rx_cqe) { + kfree(rq->dma_info); + err = -EINVAL; + netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err); + goto err_rq_wq_destroy; + } + + rq->buff.wqe_sz = params->lro_en ? + params->lro_wqe_sz : + MLX5E_SW2HW_MTU(c->netdev->mtu); byte_count = rq->buff.wqe_sz; /* calc the required page order */ @@ -656,8 +656,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } INIT_WORK(&rq->am.work, mlx5e_rx_am_work); - rq->am.mode = priv->params.rx_cq_period_mode; - + rq->am.mode = params->rx_cq_period_mode; rq->page_cache.head = 0; rq->page_cache.tail = 0; @@ -674,7 +673,7 @@ err_rq_wq_destroy: return err; } -static void mlx5e_destroy_rq(struct mlx5e_rq *rq) +static void mlx5e_free_rq(struct mlx5e_rq *rq) { int i; @@ -684,7 +683,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); - mlx5_core_destroy_mkey(rq->priv->mdev, &rq->umr_mkey); + mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ kfree(rq->dma_info); @@ -699,10 +698,10 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5_wq_destroy(&rq->wq_ctrl); } -static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +static int mlx5e_create_rq(struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) { - struct mlx5e_priv *priv = rq->priv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *mdev = rq->mdev; void *in; void *rqc; @@ -723,7 +722,6 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); - MLX5_SET(rqc, rqc, vsd, priv->params.vlan_strip_disable); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); @@ -742,8 +740,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5e_channel *c = rq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *mdev = c->mdev; void *in; void *rqc; @@ -767,7 +764,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, return err; } -static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; @@ -787,6 +784,35 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS); + MLX5_SET(rqc, rqc, scatter_fcs, enable); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5_core_dev *mdev = c->mdev; + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); MLX5_SET(rqc, rqc, vsd, vsd); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); @@ -798,25 +824,28 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) return err; } -static void mlx5e_disable_rq(struct mlx5e_rq *rq) +static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { - mlx5_core_destroy_rq(rq->priv->mdev, rq->rqn); + mlx5_core_destroy_rq(rq->mdev, rq->rqn); } static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) { unsigned long exp_time = jiffies + msecs_to_jiffies(20000); struct mlx5e_channel *c = rq->channel; - struct mlx5e_priv *priv = c->priv; + struct mlx5_wq_ll *wq = &rq->wq; + u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5_wq_ll_get_size(wq)); while (time_before(jiffies, exp_time)) { - if (wq->cur_sz >= priv->params.min_rx_wqes) + if (wq->cur_sz >= min_wqes) return 0; msleep(20); } + netdev_warn(c->netdev, "Failed to get min RX wqes on RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", + rq->rqn, wq->cur_sz, min_wqes); return -ETIMEDOUT; } @@ -842,83 +871,128 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } static int mlx5e_open_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { - struct mlx5e_sq *sq = &c->icosq; - u16 pi = sq->pc & sq->wq.sz_m1; int err; - err = mlx5e_create_rq(c, param, rq); + err = mlx5e_alloc_rq(c, params, param, rq); if (err) return err; - err = mlx5e_enable_rq(rq, param); + err = mlx5e_create_rq(rq, param); if (err) - goto err_destroy_rq; + goto err_free_rq; - set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) - goto err_disable_rq; + goto err_destroy_rq; - if (param->am_enabled) + if (params->rx_am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; - mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ - return 0; -err_disable_rq: - clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - mlx5e_disable_rq(rq); err_destroy_rq: mlx5e_destroy_rq(rq); +err_free_rq: + mlx5e_free_rq(rq); return err; } -static void mlx5e_close_rq(struct mlx5e_rq *rq) +static void mlx5e_activate_rq(struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *sq = &rq->channel->icosq; + u16 pi = sq->pc & sq->wq.sz_m1; + struct mlx5e_tx_wqe *nopwqe; + + set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nopwqe->ctrl); +} + +static void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - cancel_work_sync(&rq->am.work); +} - mlx5e_disable_rq(rq); - mlx5e_free_rx_descs(rq); +static void mlx5e_close_rq(struct mlx5e_rq *rq) +{ + cancel_work_sync(&rq->am.work); mlx5e_destroy_rq(rq); + mlx5e_free_rx_descs(rq); + mlx5e_free_rq(rq); } -static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) +static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) { - kfree(sq->db.xdp.di); - kfree(sq->db.xdp.wqe_info); + kfree(sq->db.di); } -static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, + sq->db.di = kzalloc_node(sizeof(*sq->db.di) * wq_sz, GFP_KERNEL, numa); - sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, - GFP_KERNEL, numa); - if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) { - mlx5e_free_sq_xdp_db(sq); + if (!sq->db.di) { + mlx5e_free_xdpsq_db(sq); return -ENOMEM; } return 0; } -static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) +static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + int err; + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); + if (err) + return err; + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; + + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq) +{ + mlx5e_free_xdpsq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq) { kfree(sq->db.ico_wqe); } -static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) { u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); @@ -930,155 +1004,128 @@ static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) return 0; } -static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq) +static int mlx5e_alloc_icosq(struct mlx5e_channel *c, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq) { - kfree(sq->db.txq.wqe_info); - kfree(sq->db.txq.dma_fifo); - kfree(sq->db.txq.skb); -} + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + int err; -static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa) -{ - int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.bfreg.map; - sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb), - GFP_KERNEL, numa); - sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo), - GFP_KERNEL, numa); - sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info), - GFP_KERNEL, numa); - if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) { - mlx5e_free_sq_txq_db(sq); - return -ENOMEM; - } + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); + if (err) + return err; + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - sq->dma_fifo_mask = df_sz - 1; + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + sq->edge = (sq->wq.sz_m1 + 1) - MLX5E_ICOSQ_MAX_WQEBBS; return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; } -static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +static void mlx5e_free_icosq(struct mlx5e_icosq *sq) { - switch (sq->type) { - case MLX5E_SQ_TXQ: - mlx5e_free_sq_txq_db(sq); - break; - case MLX5E_SQ_ICO: - mlx5e_free_sq_ico_db(sq); - break; - case MLX5E_SQ_XDP: - mlx5e_free_sq_xdp_db(sq); - break; - } + mlx5e_free_icosq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); } -static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) { - switch (sq->type) { - case MLX5E_SQ_TXQ: - return mlx5e_alloc_sq_txq_db(sq, numa); - case MLX5E_SQ_ICO: - return mlx5e_alloc_sq_ico_db(sq, numa); - case MLX5E_SQ_XDP: - return mlx5e_alloc_sq_xdp_db(sq, numa); - } - - return 0; + kfree(sq->db.wqe_info); + kfree(sq->db.dma_fifo); } -static int mlx5e_sq_get_max_wqebbs(u8 sq_type) +static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) { - switch (sq_type) { - case MLX5E_SQ_ICO: - return MLX5E_ICOSQ_MAX_WQEBBS; - case MLX5E_SQ_XDP: - return MLX5E_XDP_TX_WQEBBS; + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->db.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.dma_fifo), + GFP_KERNEL, numa); + sq->db.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.wqe_info), + GFP_KERNEL, numa); + if (!sq->db.dma_fifo || !sq->db.wqe_info) { + mlx5e_free_txqsq_db(sq); + return -ENOMEM; } - return MLX5_SEND_WQE_MAX_WQEBBS; + + sq->dma_fifo_mask = df_sz - 1; + + return 0; } -static int mlx5e_create_sq(struct mlx5e_channel *c, - int tc, - struct mlx5e_sq_param *param, - struct mlx5e_sq *sq) +static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq) { - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - - void *sqc = param->sqc; - void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; int err; - sq->type = param->type; sq->pdev = c->pdev; - sq->tstamp = &priv->tstamp; + sq->tstamp = c->tstamp; sq->mkey_be = c->mkey_be; sq->channel = c; - sq->tc = tc; - - err = mlx5_alloc_bfreg(mdev, &sq->bfreg, MLX5_CAP_GEN(mdev, bf), false); - if (err) - return err; + sq->txq_ix = txq_ix; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->max_inline = params->tx_max_inline; + sq->min_inline_mode = params->tx_min_inline_mode; - sq->uar_map = sq->bfreg.map; param->wq.db_numa_node = cpu_to_node(c->cpu); - - err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, - &sq->wq_ctrl); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) - goto err_unmap_free_uar; - - sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - if (sq->bfreg.wc) - set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state); - - sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; - sq->max_inline = param->max_inline; - sq->min_inline_mode = param->min_inline_mode; + return err; + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; - if (sq->type == MLX5E_SQ_TXQ) { - int txq_ix; - - txq_ix = c->ix + tc * priv->params.num_channels; - sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); - priv->txq_to_sq_map[txq_ix] = sq; - } - - sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type); - sq->bf_budget = MLX5E_SQ_BF_BUDGET; + sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; return 0; err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); -err_unmap_free_uar: - mlx5_free_bfreg(mdev, &sq->bfreg); - return err; } -static void mlx5e_destroy_sq(struct mlx5e_sq *sq) +static void mlx5e_free_txqsq(struct mlx5e_txqsq *sq) { - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - - mlx5e_free_sq_db(sq); + mlx5e_free_txqsq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); - mlx5_free_bfreg(priv->mdev, &sq->bfreg); } -static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) -{ - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; +struct mlx5e_create_sq_param { + struct mlx5_wq_ctrl *wq_ctrl; + u32 cqn; + u32 tisn; + u8 tis_lst_sz; + u8 min_inline_mode; +}; +static int mlx5e_create_sq(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) +{ void *in; void *sqc; void *wq; @@ -1086,7 +1133,7 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) int err; inlen = MLX5_ST_SZ_BYTES(create_sq_in) + - sizeof(u64) * sq->wq_ctrl.buf.npages; + sizeof(u64) * csp->wq_ctrl->buf.npages; in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; @@ -1095,40 +1142,40 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) wq = MLX5_ADDR_OF(sqc, sqc, wq); memcpy(sqc, param->sqc, sizeof(param->sqc)); - - MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? - 0 : priv->tisn[sq->tc]); - MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz); + MLX5_SET(sqc, sqc, tis_num_0, csp->tisn); + MLX5_SET(sqc, sqc, cqn, csp->cqn); if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) - MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); + MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); - MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, sq->bfreg.index); - MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); - mlx5_fill_page_array(&sq->wq_ctrl.buf, - (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + mlx5_fill_page_array(&csp->wq_ctrl->buf, (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); - err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + err = mlx5_core_create_sq(mdev, in, inlen, sqn); kvfree(in); return err; } -static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, - int next_state, bool update_rl, int rl_index) -{ - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + bool rl_update; + int rl_index; +}; +static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) +{ void *in; void *sqc; int inlen; @@ -1141,68 +1188,94 @@ static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); - MLX5_SET(modify_sq_in, in, sq_state, curr_state); - MLX5_SET(sqc, sqc, state, next_state); - if (update_rl && next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); + MLX5_SET(sqc, sqc, state, p->next_state); + if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); - MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); } - err = mlx5_core_modify_sq(mdev, sq->sqn, in, inlen); + err = mlx5_core_modify_sq(mdev, sqn, in, inlen); kvfree(in); return err; } -static void mlx5e_disable_sq(struct mlx5e_sq *sq) +static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) { - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - - mlx5_core_destroy_sq(mdev, sq->sqn); - if (sq->rate_limit) - mlx5_rl_remove_rate(mdev, sq->rate_limit); + mlx5_core_destroy_sq(mdev, sqn); } -static int mlx5e_open_sq(struct mlx5e_channel *c, - int tc, - struct mlx5e_sq_param *param, - struct mlx5e_sq *sq) +static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) { + struct mlx5e_modify_sq_param msp = {0}; int err; - err = mlx5e_create_sq(c, tc, param, sq); + err = mlx5e_create_sq(mdev, param, csp, sqn); if (err) return err; - err = mlx5e_enable_sq(sq, param); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + err = mlx5e_modify_sq(mdev, *sqn, &msp); if (err) - goto err_destroy_sq; + mlx5e_destroy_sq(mdev, *sqn); - set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, - false, 0); + return err; +} + +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_txqsq *sq, u32 rate); + +static int mlx5e_open_txqsq(struct mlx5e_channel *c, + u32 tisn, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq) +{ + struct mlx5e_create_sq_param csp = {}; + u32 tx_rate; + int err; + + err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq); if (err) - goto err_disable_sq; + return err; - if (sq->txq) { - netdev_tx_reset_queue(sq->txq); - netif_tx_start_queue(sq->txq); - } + csp.tisn = tisn; + csp.tis_lst_sz = 1; + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_txqsq; + + tx_rate = c->priv->tx_rates[sq->txq_ix]; + if (tx_rate) + mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); return 0; -err_disable_sq: +err_free_txqsq: clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - mlx5e_disable_sq(sq); -err_destroy_sq: - mlx5e_destroy_sq(sq); + mlx5e_free_txqsq(sq); return err; } +static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +{ + sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); +} + static inline void netif_tx_disable_queue(struct netdev_queue *txq) { __netif_tx_lock_bh(txq); @@ -1210,43 +1283,153 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) __netif_tx_unlock_bh(txq); } -static void mlx5e_close_sq(struct mlx5e_sq *sq) +static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) { + struct mlx5e_channel *c = sq->channel; + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); /* prevent netif_tx_wake_queue */ - napi_synchronize(&sq->channel->napi); + napi_synchronize(&c->napi); - if (sq->txq) { - netif_tx_disable_queue(sq->txq); + netif_tx_disable_queue(sq->txq); - /* last doorbell out, godspeed .. */ - if (mlx5e_sq_has_room_for(sq, 1)) { - sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL; - mlx5e_send_nop(sq, true); - } + /* last doorbell out, godspeed .. */ + if (mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1)) { + struct mlx5e_tx_wqe *nop; + + sq->db.wqe_info[(sq->pc & sq->wq.sz_m1)].skb = NULL; + nop = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nop->ctrl); } +} + +static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5_core_dev *mdev = c->mdev; - mlx5e_disable_sq(sq); - mlx5e_free_sq_descs(sq); - mlx5e_destroy_sq(sq); + mlx5e_destroy_sq(mdev, sq->sqn); + if (sq->rate_limit) + mlx5_rl_remove_rate(mdev, sq->rate_limit); + mlx5e_free_txqsq_descs(sq); + mlx5e_free_txqsq(sq); } -static int mlx5e_create_cq(struct mlx5e_channel *c, - struct mlx5e_cq_param *param, - struct mlx5e_cq *cq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq) +{ + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_alloc_icosq(c, param, sq); + if (err) + return err; + + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = params->tx_min_inline_mode; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_icosq; + + return 0; + +err_free_icosq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_icosq(sq); + + return err; +} + +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + napi_synchronize(&c->napi); + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_icosq(sq); +} + +static int mlx5e_open_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq) +{ + unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; + struct mlx5e_create_sq_param csp = {}; + unsigned int inline_hdr_sz = 0; + int err; + int i; + + err = mlx5e_alloc_xdpsq(c, params, param, sq); + if (err) + return err; + + csp.tis_lst_sz = 1; + csp.tisn = c->priv->tisn[0]; /* tc = 0 */ + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_xdpsq; + + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + ds_cnt++; + } + + /* Pre initialize fixed WQE fields */ + for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); + dseg->lkey = sq->mkey_be; + } + + return 0; + +err_free_xdpsq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_xdpsq(sq); + + return err; +} + +static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + napi_synchronize(&c->napi); + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_xdpsq_descs(sq); + mlx5e_free_xdpsq(sq); +} + +static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) { - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; int eqn_not_used; unsigned int irqn; int err; u32 i; - param->wq.buf_numa_node = cpu_to_node(c->cpu); - param->wq.db_numa_node = cpu_to_node(c->cpu); - param->eq_ix = c->ix; - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) @@ -1254,8 +1437,6 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - cq->napi = &c->napi; - mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; @@ -1272,21 +1453,38 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, cqe->op_own = 0xf1; } - cq->channel = c; - cq->priv = priv; + cq->mdev = mdev; return 0; } -static void mlx5e_destroy_cq(struct mlx5e_cq *cq) +static int mlx5e_alloc_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5_core_dev *mdev = c->priv->mdev; + int err; + + param->wq.buf_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = cpu_to_node(c->cpu); + param->eq_ix = c->ix; + + err = mlx5e_alloc_cq_common(mdev, param, cq); + + cq->napi = &c->napi; + cq->channel = c; + + return err; +} + +static void mlx5e_free_cq(struct mlx5e_cq *cq) { mlx5_cqwq_destroy(&cq->wq_ctrl); } -static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) +static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { - struct mlx5e_priv *priv = cq->priv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *mdev = cq->mdev; struct mlx5_core_cq *mcq = &cq->mcq; void *in; @@ -1330,47 +1528,41 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) return 0; } -static void mlx5e_disable_cq(struct mlx5e_cq *cq) +static void mlx5e_destroy_cq(struct mlx5e_cq *cq) { - struct mlx5e_priv *priv = cq->priv; - struct mlx5_core_dev *mdev = priv->mdev; - - mlx5_core_destroy_cq(mdev, &cq->mcq); + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); } static int mlx5e_open_cq(struct mlx5e_channel *c, + struct mlx5e_cq_moder moder, struct mlx5e_cq_param *param, - struct mlx5e_cq *cq, - struct mlx5e_cq_moder moderation) + struct mlx5e_cq *cq) { + struct mlx5_core_dev *mdev = c->mdev; int err; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - err = mlx5e_create_cq(c, param, cq); + err = mlx5e_alloc_cq(c, param, cq); if (err) return err; - err = mlx5e_enable_cq(cq, param); + err = mlx5e_create_cq(cq, param); if (err) - goto err_destroy_cq; + goto err_free_cq; if (MLX5_CAP_GEN(mdev, cq_moderation)) - mlx5_core_modify_cq_moderation(mdev, &cq->mcq, - moderation.usec, - moderation.pkts); + mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts); return 0; -err_destroy_cq: - mlx5e_destroy_cq(cq); +err_free_cq: + mlx5e_free_cq(cq); return err; } static void mlx5e_close_cq(struct mlx5e_cq *cq) { - mlx5e_disable_cq(cq); mlx5e_destroy_cq(cq); + mlx5e_free_cq(cq); } static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) @@ -1379,15 +1571,15 @@ static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_params *params, struct mlx5e_channel_param *cparam) { - struct mlx5e_priv *priv = c->priv; int err; int tc; for (tc = 0; tc < c->num_tc; tc++) { - err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, - priv->params.tx_cq_moderation); + err = mlx5e_open_cq(c, params->tx_cq_moderation, + &cparam->tx_cq, &c->sq[tc].cq); if (err) goto err_close_tx_cqs; } @@ -1410,13 +1602,17 @@ static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) } static int mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_params *params, struct mlx5e_channel_param *cparam) { int err; int tc; - for (tc = 0; tc < c->num_tc; tc++) { - err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); + for (tc = 0; tc < params->num_tc; tc++) { + int txq_ix = c->ix + tc * params->num_channels; + + err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix, + params, &cparam->sq, &c->sq[tc]); if (err) goto err_close_sqs; } @@ -1425,7 +1621,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, err_close_sqs: for (tc--; tc >= 0; tc--) - mlx5e_close_sq(&c->sq[tc]); + mlx5e_close_txqsq(&c->sq[tc]); return err; } @@ -1435,23 +1631,15 @@ static void mlx5e_close_sqs(struct mlx5e_channel *c) int tc; for (tc = 0; tc < c->num_tc; tc++) - mlx5e_close_sq(&c->sq[tc]); -} - -static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) -{ - int i; - - for (i = 0; i < priv->profile->max_tc; i++) - priv->channeltc_to_txq_map[ix][i] = - ix + i * priv->params.num_channels; + mlx5e_close_txqsq(&c->sq[tc]); } static int mlx5e_set_sq_maxrate(struct net_device *dev, - struct mlx5e_sq *sq, u32 rate) + struct mlx5e_txqsq *sq, u32 rate) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_modify_sq_param msp = {0}; u16 rl_index = 0; int err; @@ -1474,8 +1662,11 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev, } } - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_RDY, true, rl_index); + msp.curr_state = MLX5_SQC_STATE_RDY; + msp.next_state = MLX5_SQC_STATE_RDY; + msp.rl_index = rl_index; + msp.rl_update = true; + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); if (err) { netdev_err(dev, "Failed configuring rate %u: %d\n", rate, err); @@ -1493,7 +1684,7 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_sq *sq = priv->txq_to_sq_map[index]; + struct mlx5e_txqsq *sq = priv->txq2sq[index]; int err = 0; if (!mlx5_rl_is_supported(mdev)) { @@ -1520,114 +1711,87 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) -{ - return is_kdump_kernel() ? - MLX5E_MIN_NUM_CHANNELS : - min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); -} - static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { - struct mlx5e_cq_moder icosq_cq_moder = {0, 0}; + struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - struct mlx5e_cq_moder rx_cq_profile; int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; - struct mlx5e_sq *sq; int err; - int i; c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) return -ENOMEM; c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; c->ix = ix; c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); - c->num_tc = priv->params.num_tc; - c->xdp = !!priv->xdp_prog; - - if (priv->params.rx_am_enabled) - rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode); - else - rx_cq_profile = priv->params.rx_cq_moderation; - - mlx5e_build_channeltc_to_txq_map(priv, ix); + c->num_tc = params->num_tc; + c->xdp = !!params->xdp_prog; netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, icosq_cq_moder); + err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); if (err) goto err_napi_del; - err = mlx5e_open_tx_cqs(c, cparam); + err = mlx5e_open_tx_cqs(c, params, cparam); if (err) goto err_close_icosq_cq; - err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, - rx_cq_profile); + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq); if (err) goto err_close_tx_cqs; /* XDP SQ CQ params are same as normal TXQ sq CQ params */ - err = c->xdp ? mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, - priv->params.tx_cq_moderation) : 0; + err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation, + &cparam->tx_cq, &c->rq.xdpsq.cq) : 0; if (err) goto err_close_rx_cq; napi_enable(&c->napi); - err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); if (err) goto err_disable_napi; - err = mlx5e_open_sqs(c, cparam); + err = mlx5e_open_sqs(c, params, cparam); if (err) goto err_close_icosq; - for (i = 0; i < priv->params.num_tc; i++) { - u32 txq_ix = priv->channeltc_to_txq_map[ix][i]; - - if (priv->tx_rates[txq_ix]) { - sq = priv->txq_to_sq_map[txq_ix]; - mlx5e_set_sq_maxrate(priv->netdev, sq, - priv->tx_rates[txq_ix]); - } - } - - err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq) : 0; + err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq) : 0; if (err) goto err_close_sqs; - err = mlx5e_open_rq(c, &cparam->rq, &c->rq); + err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq); if (err) goto err_close_xdp_sq; - netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); *cp = c; return 0; err_close_xdp_sq: if (c->xdp) - mlx5e_close_sq(&c->xdp_sq); + mlx5e_close_xdpsq(&c->rq.xdpsq); err_close_sqs: mlx5e_close_sqs(c); err_close_icosq: - mlx5e_close_sq(&c->icosq); + mlx5e_close_icosq(&c->icosq); err_disable_napi: napi_disable(&c->napi); if (c->xdp) - mlx5e_close_cq(&c->xdp_sq.cq); + mlx5e_close_cq(&c->rq.xdpsq.cq); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); @@ -1645,16 +1809,35 @@ err_napi_del: return err; } +static void mlx5e_activate_channel(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->sq[tc]); + mlx5e_activate_rq(&c->rq); + netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); +} + +static void mlx5e_deactivate_channel(struct mlx5e_channel *c) +{ + int tc; + + mlx5e_deactivate_rq(&c->rq); + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->sq[tc]); +} + static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); if (c->xdp) - mlx5e_close_sq(&c->xdp_sq); + mlx5e_close_xdpsq(&c->rq.xdpsq); mlx5e_close_sqs(c); - mlx5e_close_sq(&c->icosq); + mlx5e_close_icosq(&c->icosq); napi_disable(&c->napi); if (c->xdp) - mlx5e_close_cq(&c->xdp_sq.cq); + mlx5e_close_cq(&c->rq.xdpsq.cq); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); @@ -1664,17 +1847,16 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) } static void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - switch (priv->params.rq_wq_type) { + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - MLX5_SET(wq, wq, log_wqe_num_of_strides, - priv->params.mpwqe_log_num_strides - 9); - MLX5_SET(wq, wq, log_wqe_stride_size, - priv->params.mpwqe_log_stride_sz - 6); + MLX5_SET(wq, wq, log_wqe_num_of_strides, params->mpwqe_log_num_strides - 9); + MLX5_SET(wq, wq, log_wqe_stride_size, params->mpwqe_log_stride_sz - 6); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ @@ -1683,14 +1865,14 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); + MLX5_SET(wq, wq, log_wq_sz, params->log_rq_size); MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); + MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); + MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; - - param->am_enabled = priv->params.rx_am_enabled; } static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) @@ -1715,17 +1897,14 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, } static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); - - param->max_inline = priv->params.tx_max_inline; - param->min_inline_mode = priv->params.tx_min_inline_mode; - param->type = MLX5E_SQ_TXQ; + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1737,37 +1916,36 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_cq_param *param) { void *cqc = param->cqc; u8 log_cq_size; - switch (priv->params.rq_wq_type) { + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = priv->params.log_rq_size + - priv->params.mpwqe_log_num_strides; + log_cq_size = params->log_rq_size + params->mpwqe_log_num_strides; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - log_cq_size = priv->params.log_rq_size; + log_cq_size = params->log_rq_size; } MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); MLX5_SET(cqc, cqc, cqe_comp_en, 1); } mlx5e_build_common_cq_param(priv, param); - - param->cq_period_mode = priv->params.rx_cq_period_mode; } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_cq_param *param) { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); mlx5e_build_common_cq_param(priv, param); @@ -1775,8 +1953,8 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, } static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - struct mlx5e_cq_param *param, - u8 log_wq_size) + u8 log_wq_size, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -1788,8 +1966,8 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, } static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param, - u8 log_wq_size) + u8 log_wq_size, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -1798,162 +1976,119 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, log_wq_size); MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - - param->type = MLX5E_SQ_ICO; } static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); - - param->max_inline = priv->params.tx_max_inline; - param->min_inline_mode = priv->params.tx_min_inline_mode; - param->type = MLX5E_SQ_XDP; + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); } -static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) { u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - mlx5e_build_rq_param(priv, &cparam->rq); - mlx5e_build_sq_param(priv, &cparam->sq); - mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq); - mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); - mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); - mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); - mlx5e_build_ico_cq_param(priv, &cparam->icosq_cq, icosq_log_wq_sz); + mlx5e_build_rq_param(priv, params, &cparam->rq); + mlx5e_build_sq_param(priv, params, &cparam->sq); + mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); + mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq); } -static int mlx5e_open_channels(struct mlx5e_priv *priv) +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) { struct mlx5e_channel_param *cparam; - int nch = priv->params.num_channels; int err = -ENOMEM; int i; - int j; - - priv->channel = kcalloc(nch, sizeof(struct mlx5e_channel *), - GFP_KERNEL); - priv->txq_to_sq_map = kcalloc(nch * priv->params.num_tc, - sizeof(struct mlx5e_sq *), GFP_KERNEL); + chs->num = chs->params.num_channels; + chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); cparam = kzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); + if (!chs->c || !cparam) + goto err_free; - if (!priv->channel || !priv->txq_to_sq_map || !cparam) - goto err_free_txq_to_sq_map; - - mlx5e_build_channel_param(priv, cparam); - - for (i = 0; i < nch; i++) { - err = mlx5e_open_channel(priv, i, cparam, &priv->channel[i]); - if (err) - goto err_close_channels; - } - - for (j = 0; j < nch; j++) { - err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq); + mlx5e_build_channel_param(priv, &chs->params, cparam); + for (i = 0; i < chs->num; i++) { + err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]); if (err) goto err_close_channels; } - /* FIXME: This is a W/A for tx timeout watch dog false alarm when - * polling for inactive tx queues. - */ - netif_tx_start_all_queues(priv->netdev); - kfree(cparam); return 0; err_close_channels: for (i--; i >= 0; i--) - mlx5e_close_channel(priv->channel[i]); + mlx5e_close_channel(chs->c[i]); -err_free_txq_to_sq_map: - kfree(priv->txq_to_sq_map); - kfree(priv->channel); +err_free: + kfree(chs->c); kfree(cparam); - + chs->num = 0; return err; } -static void mlx5e_close_channels(struct mlx5e_priv *priv) +static void mlx5e_activate_channels(struct mlx5e_channels *chs) { int i; - /* FIXME: This is a W/A only for tx timeout watch dog false alarm when - * polling for inactive tx queues. - */ - netif_tx_stop_all_queues(priv->netdev); - netif_tx_disable(priv->netdev); - - for (i = 0; i < priv->params.num_channels; i++) - mlx5e_close_channel(priv->channel[i]); - - kfree(priv->txq_to_sq_map); - kfree(priv->channel); + for (i = 0; i < chs->num; i++) + mlx5e_activate_channel(chs->c[i]); } -static int mlx5e_rx_hash_fn(int hfunc) +static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) { - return (hfunc == ETH_RSS_HASH_TOP) ? - MLX5_RX_HASH_FN_TOEPLITZ : - MLX5_RX_HASH_FN_INVERTED_XOR8; -} - -static int mlx5e_bits_invert(unsigned long a, int size) -{ - int inv = 0; + int err = 0; int i; - for (i = 0; i < size; i++) - inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + for (i = 0; i < chs->num; i++) { + err = mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq); + if (err) + break; + } - return inv; + return err; } -static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) +static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) { int i; - for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { - int ix = i; - u32 rqn; - - if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR) - ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); - - ix = priv->params.indirection_rqt[ix]; - rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[ix]->rq.rqn : - priv->drop_rq.rqn; - MLX5_SET(rqtc, rqtc, rq_num[i], rqn); - } + for (i = 0; i < chs->num; i++) + mlx5e_deactivate_channel(chs->c[i]); } -static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, - int ix) +void mlx5e_close_channels(struct mlx5e_channels *chs) { - u32 rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[ix]->rq.rqn : - priv->drop_rq.rqn; + int i; - MLX5_SET(rqtc, rqtc, rq_num[0], rqn); + for (i = 0; i < chs->num; i++) + mlx5e_close_channel(chs->c[i]); + + kfree(chs->c); + chs->num = 0; } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, - int ix, struct mlx5e_rqt *rqt) +static int +mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; int inlen; int err; u32 *in; + int i; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); @@ -1965,10 +2100,8 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - if (sz > 1) /* RSS */ - mlx5e_fill_indir_rqt_rqns(priv, rqtc); - else - mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); + for (i = 0; i < sz; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn); err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); if (!err) @@ -1984,11 +2117,15 @@ void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } -static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) +int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) { struct mlx5e_rqt *rqt = &priv->indir_rqt; + int err; - return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); + err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt); + if (err) + mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err); + return err; } int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) @@ -1999,7 +2136,7 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { rqt = &priv->direct_tir[ix].rqt; - err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); + err = mlx5e_create_rqt(priv, 1 /*size */, rqt); if (err) goto err_destroy_rqts; } @@ -2007,13 +2144,64 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) return 0; err_destroy_rqts: + mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); for (ix--; ix >= 0; ix--) mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); return err; } -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +} + +static int mlx5e_rx_hash_fn(int hfunc) +{ + return (hfunc == ETH_RSS_HASH_TOP) ? + MLX5_RX_HASH_FN_TOEPLITZ : + MLX5_RX_HASH_FN_INVERTED_XOR8; +} + +static int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + +static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz, + struct mlx5e_redirect_rqt_param rrp, void *rqtc) +{ + int i; + + for (i = 0; i < sz; i++) { + u32 rqn; + + if (rrp.is_rss) { + int ix = i; + + if (rrp.rss.hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(i, ilog2(sz)); + + ix = priv->channels.params.indirection_rqt[ix]; + rqn = rrp.rss.channels->c[ix]->rq.rqn; + } else { + rqn = rrp.rqn; + } + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); + } +} + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, + struct mlx5e_redirect_rqt_param rrp) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; @@ -2029,41 +2217,86 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - if (sz > 1) /* RSS */ - mlx5e_fill_indir_rqt_rqns(priv, rqtc); - else - mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); - + mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc); err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen); kvfree(in); - return err; } -static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) +static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix, + struct mlx5e_redirect_rqt_param rrp) +{ + if (!rrp.is_rss) + return rrp.rqn; + + if (ix >= rrp.rss.channels->num) + return priv->drop_rq.rqn; + + return rrp.rss.channels->c[ix]->rq.rqn; +} + +static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, + struct mlx5e_redirect_rqt_param rrp) { u32 rqtn; int ix; if (priv->indir_rqt.enabled) { + /* RSS RQ table */ rqtn = priv->indir_rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); } - for (ix = 0; ix < priv->params.num_channels; ix++) { + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + struct mlx5e_redirect_rqt_param direct_rrp = { + .is_rss = false, + { + .rqn = mlx5e_get_direct_rqn(priv, ix, rrp) + }, + }; + + /* Direct RQ Tables */ if (!priv->direct_tir[ix].rqt.enabled) continue; + rqtn = priv->direct_tir[ix].rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, 1, ix); + mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); } } -static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) +static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) +{ + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .channels = chs, + .hfunc = chs->params.rss_hfunc, + } + }, + }; + + mlx5e_redirect_rqts(priv, rrp); +} + +static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) +{ + struct mlx5e_redirect_rqt_param drop_rrp = { + .is_rss = false, + { + .rqn = priv->drop_rq.rqn, + }, + }; + + mlx5e_redirect_rqts(priv, drop_rrp); +} + +static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) { - if (!priv->params.lro_en) + if (!params->lro_en) return; #define ROUGH_MAX_L2_L3_HDR_SZ 256 @@ -2072,13 +2305,13 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); MLX5_SET(tirc, tirc, lro_max_ip_payload_size, - (priv->params.lro_wqe_sz - - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); - MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout); + (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); } -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, - enum mlx5e_traffic_types tt) +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, + enum mlx5e_traffic_types tt, + void *tirc) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); @@ -2094,16 +2327,15 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) - MLX5_SET(tirc, tirc, rx_hash_fn, - mlx5e_rx_hash_fn(priv->params.rss_hfunc)); - if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc)); + if (params->rss_hfunc == ETH_RSS_HASH_TOP) { void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - memcpy(rss_key, priv->params.toeplitz_hash_key, len); + memcpy(rss_key, params->toeplitz_hash_key, len); } switch (tt) { @@ -2208,7 +2440,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) MLX5_SET(modify_tir_in, in, bitmask.lro, 1); tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - mlx5e_build_tir_ctx_lro(tirc, priv); + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, @@ -2258,9 +2490,9 @@ static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu) *mtu = MLX5E_HW2SW_MTU(hw_mtu); } -static int mlx5e_set_dev_port_mtu(struct net_device *netdev) +static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) { - struct mlx5e_priv *priv = netdev_priv(netdev); + struct net_device *netdev = priv->netdev; u16 mtu; int err; @@ -2280,8 +2512,8 @@ static int mlx5e_set_dev_port_mtu(struct net_device *netdev) static void mlx5e_netdev_set_tcs(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - int nch = priv->params.num_channels; - int ntc = priv->params.num_tc; + int nch = priv->channels.params.num_channels; + int ntc = priv->channels.params.num_tc; int tc; netdev_reset_tc(netdev); @@ -2298,53 +2530,116 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_tc_queue(netdev, tc, nch, 0); } +static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv) +{ + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + int i, tc; + + for (i = 0; i < priv->channels.num; i++) + for (tc = 0; tc < priv->profile->max_tc; tc++) + priv->channel_tc2txq[i][tc] = i + tc * priv->channels.num; + + for (i = 0; i < priv->channels.num; i++) { + c = priv->channels.c[i]; + for (tc = 0; tc < c->num_tc; tc++) { + sq = &c->sq[tc]; + priv->txq2sq[sq->txq_ix] = sq; + } + } +} + +static bool mlx5e_is_eswitch_vport_mngr(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_GEN(mdev, vport_group_manager) && + MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH); +} + +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) +{ + int num_txqs = priv->channels.num * priv->channels.params.num_tc; + struct net_device *netdev = priv->netdev; + + mlx5e_netdev_set_tcs(netdev); + netif_set_real_num_tx_queues(netdev, num_txqs); + netif_set_real_num_rx_queues(netdev, priv->channels.num); + + mlx5e_build_channels_tx_maps(priv); + mlx5e_activate_channels(&priv->channels); + netif_tx_start_all_queues(priv->netdev); + + if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + mlx5e_add_sqs_fwd_rules(priv); + + mlx5e_wait_channels_min_rx_wqes(&priv->channels); + mlx5e_redirect_rqts_to_channels(priv, &priv->channels); +} + +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) +{ + mlx5e_redirect_rqts_to_drop(priv); + + if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* FIXME: This is a W/A only for tx timeout watch dog false alarm when + * polling for inactive tx queues. + */ + netif_tx_stop_all_queues(priv->netdev); + netif_tx_disable(priv->netdev); + mlx5e_deactivate_channels(&priv->channels); +} + +void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) +{ + struct net_device *netdev = priv->netdev; + int new_num_txqs; + + new_num_txqs = new_chs->num * new_chs->params.num_tc; + + netif_carrier_off(netdev); + + if (new_num_txqs < netdev->real_num_tx_queues) + netif_set_real_num_tx_queues(netdev, new_num_txqs); + + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + + priv->channels = *new_chs; + + /* New channels are ready to roll, modify HW settings if needed */ + if (hw_modify) + hw_modify(priv); + + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); + + mlx5e_update_carrier(priv); +} + int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - int num_txqs; int err; set_bit(MLX5E_STATE_OPENED, &priv->state); - mlx5e_netdev_set_tcs(netdev); - - num_txqs = priv->params.num_channels * priv->params.num_tc; - netif_set_real_num_tx_queues(netdev, num_txqs); - netif_set_real_num_rx_queues(netdev, priv->params.num_channels); - - err = mlx5e_open_channels(priv); - if (err) { - netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", - __func__, err); + err = mlx5e_open_channels(priv, &priv->channels); + if (err) goto err_clear_state_opened_flag; - } - - err = mlx5e_refresh_tirs_self_loopback(priv->mdev, false); - if (err) { - netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", - __func__, err); - goto err_close_channels; - } - mlx5e_redirect_rqts(priv); + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); mlx5e_update_carrier(priv); mlx5e_timestamp_init(priv); -#ifdef CONFIG_RFS_ACCEL - priv->netdev->rx_cpu_rmap = priv->mdev->rmap; -#endif + if (priv->profile->update_stats) queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - err = mlx5e_add_sqs_fwd_rules(priv); - if (err) - goto err_close_channels; - } return 0; -err_close_channels: - mlx5e_close_channels(priv); err_clear_state_opened_flag: clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; @@ -2365,7 +2660,6 @@ int mlx5e_open(struct net_device *netdev) int mlx5e_close_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. @@ -2375,13 +2669,10 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - mlx5e_remove_sqs_fwd_rules(priv); - mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); - mlx5e_redirect_rqts(priv); - mlx5e_close_channels(priv); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); return 0; } @@ -2401,11 +2692,10 @@ int mlx5e_close(struct net_device *netdev) return err; } -static int mlx5e_create_drop_rq(struct mlx5e_priv *priv, - struct mlx5e_rq *rq, - struct mlx5e_rq_param *param) +static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, + struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) { - struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); int err; @@ -2417,111 +2707,85 @@ static int mlx5e_create_drop_rq(struct mlx5e_priv *priv, if (err) return err; - rq->priv = priv; + rq->mdev = mdev; return 0; } -static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, - struct mlx5e_cq *cq, - struct mlx5e_cq_param *param) +static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, + struct mlx5e_cq *cq, + struct mlx5e_cq_param *param) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_core_cq *mcq = &cq->mcq; - int eqn_not_used; - unsigned int irqn; - int err; - - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, - &cq->wq_ctrl); - if (err) - return err; - - mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - - mcq->cqe_sz = 64; - mcq->set_ci_db = cq->wq_ctrl.db.db; - mcq->arm_db = cq->wq_ctrl.db.db + 1; - *mcq->set_ci_db = 0; - *mcq->arm_db = 0; - mcq->vector = param->eq_ix; - mcq->comp = mlx5e_completion_event; - mcq->event = mlx5e_cq_error_event; - mcq->irqn = irqn; - - cq->priv = priv; - - return 0; + return mlx5e_alloc_cq_common(mdev, param, cq); } -static int mlx5e_open_drop_rq(struct mlx5e_priv *priv) +static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev, + struct mlx5e_rq *drop_rq) { - struct mlx5e_cq_param cq_param; - struct mlx5e_rq_param rq_param; - struct mlx5e_rq *rq = &priv->drop_rq; - struct mlx5e_cq *cq = &priv->drop_rq.cq; + struct mlx5e_cq_param cq_param = {}; + struct mlx5e_rq_param rq_param = {}; + struct mlx5e_cq *cq = &drop_rq->cq; int err; - memset(&cq_param, 0, sizeof(cq_param)); - memset(&rq_param, 0, sizeof(rq_param)); mlx5e_build_drop_rq_param(&rq_param); - err = mlx5e_create_drop_cq(priv, cq, &cq_param); + err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param); if (err) return err; - err = mlx5e_enable_cq(cq, &cq_param); + err = mlx5e_create_cq(cq, &cq_param); if (err) - goto err_destroy_cq; + goto err_free_cq; - err = mlx5e_create_drop_rq(priv, rq, &rq_param); + err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param); if (err) - goto err_disable_cq; + goto err_destroy_cq; - err = mlx5e_enable_rq(rq, &rq_param); + err = mlx5e_create_rq(drop_rq, &rq_param); if (err) - goto err_destroy_rq; + goto err_free_rq; return 0; -err_destroy_rq: - mlx5e_destroy_rq(&priv->drop_rq); - -err_disable_cq: - mlx5e_disable_cq(&priv->drop_rq.cq); +err_free_rq: + mlx5e_free_rq(drop_rq); err_destroy_cq: - mlx5e_destroy_cq(&priv->drop_rq.cq); + mlx5e_destroy_cq(cq); + +err_free_cq: + mlx5e_free_cq(cq); return err; } -static void mlx5e_close_drop_rq(struct mlx5e_priv *priv) +static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) { - mlx5e_disable_rq(&priv->drop_rq); - mlx5e_destroy_rq(&priv->drop_rq); - mlx5e_disable_cq(&priv->drop_rq.cq); - mlx5e_destroy_cq(&priv->drop_rq.cq); + mlx5e_destroy_rq(drop_rq); + mlx5e_free_rq(drop_rq); + mlx5e_destroy_cq(&drop_rq->cq); + mlx5e_free_cq(&drop_rq->cq); } -static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) +int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, + u32 underlay_qpn, u32 *tisn) { - struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); MLX5_SET(tisc, tisc, prio, tc << 1); + MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); - return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); + return mlx5_core_create_tis(mdev, in, sizeof(in), tisn); } -static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc) +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) { - mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); + mlx5_core_destroy_tis(mdev, tisn); } int mlx5e_create_tises(struct mlx5e_priv *priv) @@ -2530,7 +2794,7 @@ int mlx5e_create_tises(struct mlx5e_priv *priv) int tc; for (tc = 0; tc < priv->profile->max_tc; tc++) { - err = mlx5e_create_tis(priv, tc); + err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]); if (err) goto err_close_tises; } @@ -2539,7 +2803,7 @@ int mlx5e_create_tises(struct mlx5e_priv *priv) err_close_tises: for (tc--; tc >= 0; tc--) - mlx5e_destroy_tis(priv, tc); + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); return err; } @@ -2549,34 +2813,34 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) int tc; for (tc = 0; tc < priv->profile->max_tc; tc++) - mlx5e_destroy_tis(priv, tc); + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } -static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, - enum mlx5e_traffic_types tt) +static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) { MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); - mlx5e_build_tir_ctx_lro(tirc, priv); + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); } -static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, - u32 rqtn) +static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) { MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); - mlx5e_build_tir_ctx_lro(tirc, priv); + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, rqtn); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); } -static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) { struct mlx5e_tir *tir; void *tirc; @@ -2594,7 +2858,7 @@ static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) memset(in, 0, inlen); tir = &priv->indir_tir[tt]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_indir_tir_ctx(priv, tirc, tt); + mlx5e_build_indir_tir_ctx(priv, tt, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_tirs; @@ -2605,6 +2869,7 @@ static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) return 0; err_destroy_tirs: + mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); for (tt--; tt >= 0; tt--) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); @@ -2632,8 +2897,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) memset(in, 0, inlen); tir = &priv->direct_tir[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_direct_tir_ctx(priv, tirc, - priv->direct_tir[ix].rqt.rqtn); + mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_ch_tirs; @@ -2644,6 +2908,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) return 0; err_destroy_ch_tirs: + mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err); for (ix--; ix >= 0; ix--) mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); @@ -2652,7 +2917,7 @@ err_destroy_ch_tirs: return err; } -static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) { int i; @@ -2669,16 +2934,27 @@ void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); } -int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) +static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) { int err = 0; int i; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - return 0; + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable); + if (err) + return err; + } - for (i = 0; i < priv->params.num_channels; i++) { - err = mlx5e_modify_rq_vsd(&priv->channel[i]->rq, vsd); + return 0; +} + +static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) +{ + int err = 0; + int i; + + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd); if (err) return err; } @@ -2689,7 +2965,7 @@ int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool was_opened; + struct mlx5e_channels new_channels = {}; int err = 0; if (tc && tc != MLX5E_MAX_NUM_TC) @@ -2697,17 +2973,21 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) mutex_lock(&priv->state_lock); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(priv->netdev); + new_channels.params = priv->channels.params; + new_channels.params.num_tc = tc ? tc : 1; - priv->params.num_tc = tc ? tc : 1; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } - if (was_opened) - err = mlx5e_open_locked(priv->netdev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +out: mutex_unlock(&priv->state_lock); - return err; } @@ -2737,7 +3017,9 @@ mqprio: if (tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return mlx5e_setup_tc(dev, tc->tc); + tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + return mlx5e_setup_tc(dev, tc->mqprio->num_tc); } static void @@ -2822,26 +3104,31 @@ typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); static int set_feature_lro(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - int err; + struct mlx5e_channels new_channels = {}; + int err = 0; + bool reset; mutex_lock(&priv->state_lock); - if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) - mlx5e_close_locked(priv->netdev); + reset = (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST); + reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); - priv->params.lro_en = enable; - err = mlx5e_modify_tirs_lro(priv); - if (err) { - netdev_err(netdev, "lro modify failed, %d\n", err); - priv->params.lro_en = !enable; + new_channels.params = priv->channels.params; + new_channels.params.lro_en = enable; + + if (!reset) { + priv->channels.params = new_channels.params; + err = mlx5e_modify_tirs_lro(priv); + goto out; } - if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) - mlx5e_open_locked(priv->netdev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro); +out: mutex_unlock(&priv->state_lock); - return err; } @@ -2878,23 +3165,44 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable) return mlx5_set_port_fcs(mdev, !enable); } -static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +static int set_feature_rx_fcs(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; mutex_lock(&priv->state_lock); - priv->params.vlan_strip_disable = !enable; - err = mlx5e_modify_rqs_vsd(priv, !enable); + priv->channels.params.scatter_fcs_en = enable; + err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable); if (err) - priv->params.vlan_strip_disable = enable; + priv->channels.params.scatter_fcs_en = !enable; mutex_unlock(&priv->state_lock); return err; } +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + mutex_lock(&priv->state_lock); + + priv->channels.params.vlan_strip_disable = !enable; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_modify_channels_vsd(&priv->channels, !enable); + if (err) + priv->channels.params.vlan_strip_disable = enable; + +unlock: + mutex_unlock(&priv->state_lock); + + return err; +} + #ifdef CONFIG_RFS_ACCEL static int set_feature_arfs(struct net_device *netdev, bool enable) { @@ -2947,6 +3255,8 @@ static int mlx5e_set_features(struct net_device *netdev, set_feature_tc_num_filters); err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, set_feature_rx_all); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXFCS, + set_feature_rx_fcs); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); #ifdef CONFIG_RFS_ACCEL @@ -2960,28 +3270,38 @@ static int mlx5e_set_features(struct net_device *netdev, static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool was_opened; + struct mlx5e_channels new_channels = {}; + int curr_mtu; int err = 0; bool reset; mutex_lock(&priv->state_lock); - reset = !priv->params.lro_en && - (priv->params.rq_wq_type != + reset = !priv->channels.params.lro_en && + (priv->channels.params.rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened && reset) - mlx5e_close_locked(netdev); + reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); + curr_mtu = netdev->mtu; netdev->mtu = new_mtu; - mlx5e_set_dev_port_mtu(netdev); - if (was_opened && reset) - err = mlx5e_open_locked(netdev); + if (!reset) { + mlx5e_set_dev_port_mtu(priv); + goto out; + } - mutex_unlock(&priv->state_lock); + new_channels.params = priv->channels.params; + err = mlx5e_open_channels(priv, &new_channels); + if (err) { + netdev->mtu = curr_mtu; + goto out; + } + + mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_set_dev_port_mtu); +out: + mutex_unlock(&priv->state_lock); return err; } @@ -3186,8 +3506,8 @@ static void mlx5e_tx_timeout(struct net_device *dev) netdev_err(dev, "TX timeout detected\n"); - for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) { - struct mlx5e_sq *sq = priv->txq_to_sq_map[i]; + for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { + struct mlx5e_txqsq *sq = priv->txq2sq[i]; if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) continue; @@ -3219,7 +3539,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* no need for full reset when exchanging programs */ - reset = (!priv->xdp_prog || !prog); + reset = (!priv->channels.params.xdp_prog || !prog); if (was_opened && reset) mlx5e_close_locked(netdev); @@ -3227,7 +3547,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* num_channels is invariant here, so we can take the * batched reference right upfront. */ - prog = bpf_prog_add(prog, priv->params.num_channels); + prog = bpf_prog_add(prog, priv->channels.num); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto unlock; @@ -3237,12 +3557,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* exchange programs, extra prog reference we got from caller * as long as we don't fail from this point onwards. */ - old_prog = xchg(&priv->xdp_prog, prog); + old_prog = xchg(&priv->channels.params.xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); if (reset) /* change RQ type according to priv->xdp_prog */ - mlx5e_set_rq_priv_params(priv); + mlx5e_set_rq_params(priv->mdev, &priv->channels.params); if (was_opened && reset) mlx5e_open_locked(netdev); @@ -3253,8 +3573,8 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* exchanging programs w/o reset, we update ref counts on behalf * of the channels RQs here. */ - for (i = 0; i < priv->params.num_channels; i++) { - struct mlx5e_channel *c = priv->channel[i]; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); napi_synchronize(&c->napi); @@ -3280,7 +3600,7 @@ static bool mlx5e_xdp_attached(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - return !!priv->xdp_prog; + return !!priv->channels.params.xdp_prog; } static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) @@ -3303,10 +3623,12 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) static void mlx5e_netpoll(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *chs = &priv->channels; + int i; - for (i = 0; i < priv->params.num_channels; i++) - napi_schedule(&priv->channel[i]->napi); + for (i = 0; i < chs->num; i++) + napi_schedule(&chs->c[i]->napi); } #endif @@ -3463,6 +3785,12 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) (pci_bw < 40000) && (pci_bw < link_speed)); } +static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw) +{ + return !(link_speed && pci_bw && + (pci_bw <= 16000) && (pci_bw < link_speed)); +} + void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) { params->rx_cq_period_mode = cq_period_mode; @@ -3475,6 +3803,13 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) params->rx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + if (params->rx_am_enabled) + params->rx_cq_moderation = + mlx5e_am_get_def_profile(params->rx_cq_period_mode); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, + params->rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); } u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) @@ -3489,75 +3824,81 @@ u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); } -static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 max_channels) { - struct mlx5e_priv *priv = netdev_priv(netdev); + u8 cq_period_mode = 0; u32 link_speed = 0; u32 pci_bw = 0; - u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? - MLX5_CQ_PERIOD_MODE_START_FROM_CQE : - MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; + params->num_channels = max_channels; + params->num_tc = 1; - priv->params.lro_timeout = - mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + mlx5e_get_max_linkspeed(mdev, &link_speed); + mlx5e_get_pci_bw(mdev, &pci_bw); + mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); - priv->params.log_sq_size = is_kdump_kernel() ? + /* SQ */ + params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ - priv->params.rx_cqe_compress_def = false; + params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && - MLX5_CAP_GEN(mdev, vport_group_manager)) { - mlx5e_get_max_linkspeed(mdev, &link_speed); - mlx5e_get_pci_bw(mdev, &pci_bw); - mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); - priv->params.rx_cqe_compress_def = - cqe_compress_heuristic(link_speed, pci_bw); - } - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, - priv->params.rx_cqe_compress_def); - - mlx5e_set_rq_priv_params(priv); - if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - priv->params.lro_en = true; - - priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); - - priv->params.tx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - priv->params.tx_cq_moderation.pkts = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode); - if (priv->params.tx_min_inline_mode == MLX5_INLINE_MODE_NONE && + MLX5_CAP_GEN(mdev, vport_group_manager)) + params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + + /* RQ */ + mlx5e_set_rq_params(mdev, params); + + /* HW LRO */ + /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + params->lro_en = hw_lro_heuristic(link_speed, pci_bw); + params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + + /* CQ moderation params */ + cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, cq_period_mode); + + params->tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + params->tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + + /* TX inline */ + params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + if (params->tx_min_inline_mode == MLX5_INLINE_MODE_NONE && !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - priv->params.tx_min_inline_mode = MLX5_INLINE_MODE_L2; + params->tx_min_inline_mode = MLX5_INLINE_MODE_L2; - priv->params.num_tc = 1; - priv->params.rss_hfunc = ETH_RSS_HASH_XOR; + /* RSS */ + params->rss_hfunc = ETH_RSS_HASH_XOR; + netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt, + MLX5E_INDIR_RQT_SIZE, max_channels); +} - netdev_rss_key_fill(priv->params.toeplitz_hash_key, - sizeof(priv->params.toeplitz_hash_key)); +static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; - /* Initialize pflags */ - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, - priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); mutex_init(&priv->state_lock); @@ -3642,13 +3983,19 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (fcs_supported) netdev->hw_features |= NETIF_F_RXALL; + if (MLX5_CAP_ETH(mdev, scatter_fcs)) + netdev->hw_features |= NETIF_F_RXFCS; + netdev->features = netdev->hw_features; - if (!priv->params.lro_en) + if (!priv->channels.params.lro_en) netdev->features &= ~NETIF_F_LRO; if (fcs_enabled) netdev->features &= ~NETIF_F_RXALL; + if (!priv->channels.params.scatter_fcs_en) + netdev->features &= ~NETIF_F_RXFCS; + #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && FT_CAP(modify_root) && @@ -3708,39 +4055,30 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_vxlan_cleanup(priv); - if (priv->xdp_prog) - bpf_prog_put(priv->xdp_prog); + if (priv->channels.params.xdp_prog) + bpf_prog_put(priv->channels.params.xdp_prog); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; int err; - int i; - err = mlx5e_create_indirect_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create indirect rqts failed, %d\n", err); + err = mlx5e_create_indirect_rqt(priv); + if (err) return err; - } err = mlx5e_create_direct_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + if (err) goto err_destroy_indirect_rqts; - } err = mlx5e_create_indirect_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create indirect tirs failed, %d\n", err); + if (err) goto err_destroy_direct_rqts; - } err = mlx5e_create_direct_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + if (err) goto err_destroy_indirect_tirs; - } err = mlx5e_create_flow_steering(priv); if (err) { @@ -3761,8 +4099,7 @@ err_destroy_direct_tirs: err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - for (i = 0; i < priv->profile->max_nch(mdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); return err; @@ -3770,14 +4107,11 @@ err_destroy_indirect_rqts: static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { - int i; - mlx5e_tc_cleanup(priv); mlx5e_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); - for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); mlx5e_destroy_rqt(priv, &priv->indir_rqt); } @@ -3801,21 +4135,22 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep rep; + u16 max_mtu; + + mlx5e_init_l2_addr(priv); + + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu); + mlx5e_set_dev_port_mtu(priv); mlx5_lag_add(mdev, netdev); mlx5e_enable_async_events(priv); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); - rep.load = mlx5e_nic_rep_load; - rep.unload = mlx5e_nic_rep_unload; - rep.vport = FDB_UPLINK_VPORT; - rep.netdev = netdev; - mlx5_eswitch_register_vport_rep(esw, 0, &rep); - } + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5e_register_vport_reps(priv); if (netdev->reg_state != NETREG_REGISTERED) return; @@ -3828,16 +4163,29 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) } queue_work(priv->wq, &priv->set_rx_mode_work); + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); } static void mlx5e_nic_disable(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; + + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); queue_work(priv->wq, &priv->set_rx_mode_work); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) - mlx5_eswitch_unregister_vport_rep(esw, 0); + mlx5e_unregister_vport_reps(priv); + mlx5e_disable_async_events(priv); mlx5_lag_remove(mdev); } @@ -3853,9 +4201,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .disable = mlx5e_nic_disable, .update_stats = mlx5e_update_stats, .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = MLX5E_MAX_NUM_TC, }; +/* mlx5e generic netdev management API (move to en_common.c) */ + struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, void *ppriv) @@ -3872,6 +4224,10 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, return NULL; } +#ifdef CONFIG_RFS_ACCEL + netdev->rx_cpu_rmap = mdev->rmap; +#endif + profile->init(mdev, netdev, profile, ppriv); netif_carrier_off(netdev); @@ -3891,14 +4247,12 @@ err_cleanup_nic: return NULL; } -int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +int mlx5e_attach_netdev(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; const struct mlx5e_profile *profile; - struct mlx5e_priv *priv; - u16 max_mtu; int err; - priv = netdev_priv(netdev); profile = priv->profile; clear_bit(MLX5E_STATE_DESTROYING, &priv->state); @@ -3906,7 +4260,7 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) if (err) goto out; - err = mlx5e_open_drop_rq(priv); + err = mlx5e_open_drop_rq(mdev, &priv->drop_rq); if (err) { mlx5_core_err(mdev, "open drop rq failed, %d\n", err); goto err_cleanup_tx; @@ -3918,28 +4272,13 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) mlx5e_create_q_counter(priv); - mlx5e_init_l2_addr(priv); - - /* MTU range: 68 - hw-specific max */ - netdev->min_mtu = ETH_MIN_MTU; - mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu); - - mlx5e_set_dev_port_mtu(netdev); - if (profile->enable) profile->enable(priv); - rtnl_lock(); - if (netif_running(netdev)) - mlx5e_open(netdev); - netif_device_attach(netdev); - rtnl_unlock(); - return 0; err_close_drop_rq: - mlx5e_close_drop_rq(priv); + mlx5e_close_drop_rq(&priv->drop_rq); err_cleanup_tx: profile->cleanup_tx(priv); @@ -3948,66 +4287,34 @@ out: return err; } -static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - u8 mac[ETH_ALEN]; - - if (!MLX5_CAP_GEN(mdev, vport_group_manager)) - return; - - mlx5_query_nic_vport_mac_address(mdev, 0, mac); - - for (vport = 1; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep rep; - - rep.load = mlx5e_vport_rep_load; - rep.unload = mlx5e_vport_rep_unload; - rep.vport = vport; - ether_addr_copy(rep.hw_id, mac); - mlx5_eswitch_register_vport_rep(esw, vport, &rep); - } -} - -static void mlx5e_unregister_vport_rep(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - - if (!MLX5_CAP_GEN(mdev, vport_group_manager)) - return; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); -} - -void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +void mlx5e_detach_netdev(struct mlx5e_priv *priv) { - struct mlx5e_priv *priv = netdev_priv(netdev); const struct mlx5e_profile *profile = priv->profile; set_bit(MLX5E_STATE_DESTROYING, &priv->state); - rtnl_lock(); - if (netif_running(netdev)) - mlx5e_close(netdev); - netif_device_detach(netdev); - rtnl_unlock(); - if (profile->disable) profile->disable(priv); flush_workqueue(priv->wq); mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); - mlx5e_close_drop_rq(priv); + mlx5e_close_drop_rq(&priv->drop_rq); profile->cleanup_tx(priv); cancel_delayed_work_sync(&priv->update_stats_work); } +void mlx5e_destroy_netdev(struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; + struct net_device *netdev = priv->netdev; + + destroy_workqueue(priv->wq); + if (profile->cleanup) + profile->cleanup(priv); + free_netdev(netdev); +} + /* mlx5e_attach and mlx5e_detach scope should be only creating/destroying * hardware contexts and to connect it to the current netdev. */ @@ -4024,13 +4331,12 @@ static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) if (err) return err; - err = mlx5e_attach_netdev(mdev, netdev); + err = mlx5e_attach_netdev(priv); if (err) { mlx5e_destroy_mdev_resources(mdev); return err; } - mlx5e_register_vport_rep(mdev); return 0; } @@ -4042,8 +4348,7 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) if (!netif_device_present(netdev)) return; - mlx5e_unregister_vport_rep(mdev); - mlx5e_detach_netdev(mdev, netdev); + mlx5e_detach_netdev(priv); mlx5e_destroy_mdev_resources(mdev); } @@ -4051,7 +4356,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; int total_vfs = MLX5_TOTAL_VPORTS(mdev); - void *ppriv = NULL; + struct mlx5e_rep_priv *rpriv = NULL; void *priv; int vport; int err; @@ -4061,10 +4366,17 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (err) return NULL; - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - ppriv = &esw->offloads.vport_reps[0]; + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) { + mlx5_core_warn(mdev, + "Not creating net device, Failed to alloc rep priv data\n"); + return NULL; + } + rpriv->rep = &esw->offloads.vport_reps[0]; + } - netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); goto err_unregister_reps; @@ -4090,33 +4402,25 @@ err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: - mlx5e_destroy_netdev(mdev, priv); + mlx5e_destroy_netdev(priv); err_unregister_reps: for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); + kfree(rpriv); return NULL; } -void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) -{ - const struct mlx5e_profile *profile = priv->profile; - struct net_device *netdev = priv->netdev; - - destroy_workqueue(priv->wq); - if (profile->cleanup) - profile->cleanup(priv); - free_netdev(netdev); -} - static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { struct mlx5e_priv *priv = vpriv; + void *ppriv = priv->ppriv; unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); - mlx5e_destroy_netdev(mdev, priv); + mlx5e_destroy_netdev(priv); + kfree(ppriv); } static void *mlx5e_get_netdev(void *vpriv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f621373bd7a5..79462c0368a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -34,10 +34,14 @@ #include <linux/mlx5/fs.h> #include <net/switchdev.h> #include <net/pkt_cls.h> +#include <net/netevent.h> +#include <net/arp.h> #include "eswitch.h" #include "en.h" +#include "en_rep.h" #include "en_tc.h" +#include "fs_core.h" static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -75,7 +79,8 @@ static void mlx5e_rep_get_strings(struct net_device *dev, static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct rtnl_link_stats64 *vport_stats; struct ifla_vf_stats vf_stats; int err; @@ -102,14 +107,16 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) int i, j; memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->params.num_channels; i++) { - rq_stats = &priv->channel[i]->rq.stats; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + rq_stats = &c->rq.stats; s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; - for (j = 0; j < priv->params.num_tc; j++) { - sq_stats = &priv->channel[i]->sq[j].stats; + for (j = 0; j < priv->channels.params.num_tc; j++) { + sq_stats = &c->sq[j].stats; s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; @@ -163,7 +170,8 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw->mode == SRIOV_NONE) @@ -182,66 +190,426 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) } int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) - { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5e_channel *c; - int n, tc, err, num_sqs = 0; + int n, tc, num_sqs = 0; + int err = -ENOMEM; u16 *sqs; - sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL); + sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(u16), GFP_KERNEL); if (!sqs) - return -ENOMEM; + goto out; - for (n = 0; n < priv->params.num_channels; n++) { - c = priv->channel[n]; + for (n = 0; n < priv->channels.num; n++) { + c = priv->channels.c[n]; for (tc = 0; tc < c->num_tc; tc++) sqs[num_sqs++] = c->sq[tc].sqn; } err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs); - kfree(sqs); + +out: + if (err) + netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err); return err; } -int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) { - struct net_device *netdev = rep->netdev; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + mlx5_eswitch_sqs2vport_stop(esw, rep); +} + +static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) +{ +#if IS_ENABLED(CONFIG_IPV6) + unsigned long ipv6_interval = NEIGH_VAR(&ipv6_stub->nd_tbl->parms, + DELAY_PROBE_TIME); +#else + unsigned long ipv6_interval = ~0UL; +#endif + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, + DELAY_PROBE_TIME); + struct net_device *netdev = rpriv->rep->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - return mlx5e_add_sqs_fwd_rules(priv); - return 0; + rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); + mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); } -void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - mlx5_eswitch_sqs2vport_stop(esw, rep); + mlx5_fc_queue_stats_work(priv->mdev, + &neigh_update->neigh_stats_work, + neigh_update->min_interval); } -void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +static void mlx5e_rep_neigh_stats_work(struct work_struct *work) { - struct net_device *netdev = rep->netdev; + struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, + neigh_update.neigh_stats_work.work); + struct net_device *netdev = rpriv->rep->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe; - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_remove_sqs_fwd_rules(priv); + rtnl_lock(); + if (!list_empty(&rpriv->neigh_update.neigh_list)) + mlx5e_rep_queue_neigh_stats_work(priv); - /* clean (and re-init) existing uplink offloaded TC rules */ - mlx5e_tc_cleanup(priv); - mlx5e_tc_init(priv); + list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list) + mlx5e_tc_update_neigh_used_value(nhe); + + rtnl_unlock(); +} + +static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + refcount_inc(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) + kfree(nhe); +} + +static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]) +{ + struct ethhdr *eth = (struct ethhdr *)e->encap_header; + + ASSERT_RTNL(); + + if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_tc_encap_flows_del(priv, e); + + if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { + ether_addr_copy(e->h_dest, ha); + ether_addr_copy(eth->h_dest, ha); + + mlx5e_tc_encap_flows_add(priv, e); + } +} + +static void mlx5e_rep_neigh_update(struct work_struct *work) +{ + struct mlx5e_neigh_hash_entry *nhe = + container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); + struct neighbour *n = nhe->n; + struct mlx5e_encap_entry *e; + unsigned char ha[ETH_ALEN]; + struct mlx5e_priv *priv; + bool neigh_connected; + bool encap_connected; + u8 nud_state, dead; + + rtnl_lock(); + + /* If these parameters are changed after we release the lock, + * we'll receive another event letting us know about it. + * We use this lock to avoid inconsistency between the neigh validity + * and it's hw address. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + + neigh_connected = (nud_state & NUD_VALID) && !dead; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + priv = netdev_priv(e->out_dev); + + if (encap_connected != neigh_connected || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + } + mlx5e_rep_neigh_entry_release(nhe); + rtnl_unlock(); + neigh_release(n); +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); + +static int mlx5e_rep_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + neigh_update.netevent_nb); + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct net_device *netdev = rpriv->rep->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + struct mlx5e_neigh m_neigh = {}; + struct neigh_parms *p; + struct neighbour *n; + bool found = false; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + n = ptr; +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + m_neigh.dev = n->dev; + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + /* We are in atomic context and can't take RTNL mutex, so use + * spin_lock_bh to lookup the neigh table. bh is used since + * netevent can be called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + if (!nhe) { + spin_unlock_bh(&neigh_update->encap_lock); + return NOTIFY_DONE; + } + + /* This assignment is valid as long as the the neigh reference + * is taken + */ + nhe->n = n; + + /* Take a reference to ensure the neighbour and mlx5 encap + * entry won't be destructed until we drop the reference in + * delayed work. + */ + neigh_hold(n); + mlx5e_rep_neigh_entry_hold(nhe); + + if (!queue_work(priv->wq, &nhe->neigh_update_work)) { + mlx5e_rep_neigh_entry_release(nhe); + neigh_release(n); + } + spin_unlock_bh(&neigh_update->encap_lock); + break; + + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We check the device is present since we don't care about + * changes in the default table, we only care about changes + * done per device delay prob time parameter. + */ +#if IS_ENABLED(CONFIG_IPV6) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) +#else + if (!p->dev || p->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use spin_lock_bh to walk the neigh list and look for + * the relevant device. bh is used since netevent can be + * called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) { + if (p->dev == nhe->m_neigh.dev) { + found = true; + break; + } + } + spin_unlock_bh(&neigh_update->encap_lock); + if (!found) + return NOTIFY_DONE; + + neigh_update->min_interval = min_t(unsigned long, + NEIGH_VAR(p, DELAY_PROBE_TIME), + neigh_update->min_interval); + mlx5_fc_update_sampling_interval(priv->mdev, + neigh_update->min_interval); + break; + } + return NOTIFY_DONE; +} + +static const struct rhashtable_params mlx5e_neigh_ht_params = { + .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), + .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), + .key_len = sizeof(struct mlx5e_neigh), + .automatic_shrinking = true, +}; + +static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + int err; + + err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); + if (err) + return err; + + INIT_LIST_HEAD(&neigh_update->neigh_list); + spin_lock_init(&neigh_update->encap_lock); + INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, + mlx5e_rep_neigh_stats_work); + mlx5e_rep_neigh_update_init_interval(rpriv); + + rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); + if (err) + goto out_err; + return 0; + +out_err: + rhashtable_destroy(&neigh_update->neigh_ht); + return err; +} + +static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_priv *priv = netdev_priv(rpriv->rep->netdev); + + unregister_netevent_notifier(&neigh_update->netevent_nb); + + flush_workqueue(priv->wq); /* flush neigh update works */ + + cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + + rhashtable_destroy(&neigh_update->neigh_ht); +} + +static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + if (err) + return err; + + list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + + return err; +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + spin_lock_bh(&rpriv->neigh_update.encap_lock); + + list_del(&nhe->neigh_list); + + rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + spin_unlock_bh(&rpriv->neigh_update.encap_lock); +} + +/* This function must only be called under RTNL lock or under the + * representor's encap_lock in case RTNL mutex can't be held. + */ +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); +} + +static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe) +{ + int err; + + *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); + if (!*nhe) + return -ENOMEM; + + memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); + INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); + INIT_LIST_HEAD(&(*nhe)->encap_list); + refcount_set(&(*nhe)->refcnt, 1); + + err = mlx5e_rep_neigh_entry_insert(priv, *nhe); + if (err) + goto out_free; + return 0; + +out_free: + kfree(*nhe); + return err; +} + +static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + /* The neigh hash entry must be removed from the hash table regardless + * of the reference count value, so it won't be found by the next + * neigh notification call. The neigh hash entry reference count is + * incremented only during creation and neigh notification calls and + * protects from freeing the nhe struct. + */ + mlx5e_rep_neigh_entry_remove(priv, nhe); + mlx5e_rep_neigh_entry_release(nhe); +} + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + int err; + + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + if (!nhe) { + err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); + if (err) + return err; + } + list_add(&e->encap_list, &nhe->encap_list); + return 0; +} + +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + + list_del(&e->encap_list); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + + if (list_empty(&nhe->encap_list)) + mlx5e_rep_neigh_entry_destroy(priv, nhe); } static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; @@ -259,7 +627,8 @@ static int mlx5e_rep_open(struct net_device *dev) static int mlx5e_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; (void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN); @@ -271,7 +640,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; int ret; ret = snprintf(buf, len, "%d", rep->vport - 1); @@ -314,18 +684,25 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; - if (rep && rep->vport == FDB_UPLINK_VPORT && esw->mode == SRIOV_OFFLOADS) + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return false; + + rep = rpriv->rep; + if (esw->mode == SRIOV_OFFLOADS && + rep && rep->vport == FDB_UPLINK_VPORT) return true; return false; } -bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) +static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; if (rep && rep->vport != FDB_UPLINK_VPORT) return true; @@ -397,42 +774,23 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_get_offload_stats = mlx5e_get_offload_stats, }; -static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) { - struct mlx5e_priv *priv = netdev_priv(netdev); u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->params.log_sq_size = - MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; - priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; - - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); - - priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); - - priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - priv->params.num_tc = 1; - - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; - - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; + params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; - mutex_init(&priv->state_lock); + params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, cq_period_mode); - INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); + params->num_tc = 1; + params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; } static void mlx5e_build_rep_netdev(struct net_device *netdev) @@ -458,30 +816,39 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, void *ppriv) { - mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv); + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + + mutex_init(&priv->state_lock); + + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + + priv->channels.params.num_channels = profile->max_nch(mdev); + mlx5e_build_rep_params(mdev, &priv->channels.params); mlx5e_build_rep_netdev(netdev); } static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_flow_handle *flow_rule; int err; - int i; + + mlx5e_init_l2_addr(priv); err = mlx5e_create_direct_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + if (err) return err; - } err = mlx5e_create_direct_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + if (err) goto err_destroy_direct_rqts; - } flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, rep->vport, @@ -503,21 +870,19 @@ err_del_flow_rule: err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_direct_rqts: - for (i = 0; i < priv->params.num_channels; i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); return err; } static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = priv->ppriv; - int i; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; mlx5e_tc_cleanup(priv); mlx5_del_flow_rules(rep->vport_rx_rule); mlx5e_destroy_direct_tirs(priv); - for (i = 0; i < priv->params.num_channels; i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); } static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) @@ -546,56 +911,181 @@ static struct mlx5e_profile mlx5e_rep_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .update_stats = mlx5e_rep_update_stats, .max_nch = mlx5e_get_rep_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */, .max_tc = 1, }; -int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +/* e-Switch vport representors */ + +static int +mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = netdev_priv(rep->netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + int err; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_add_sqs_fwd_rules(priv); + if (err) + return err; + } + + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_remove_sqs; + + return 0; + +err_remove_sqs: + mlx5e_remove_sqs_fwd_rules(priv); + return err; +} + +static void +mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { + struct mlx5e_priv *priv = netdev_priv(rep->netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* clean (and re-init) existing uplink offloaded TC rules */ + mlx5e_tc_cleanup(priv); + mlx5e_tc_init(priv); + + mlx5e_rep_neigh_cleanup(rpriv); +} + +static int +mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; struct net_device *netdev; int err; - netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", rep->vport); + kfree(rpriv); return -EINVAL; } rep->netdev = netdev; + rpriv->rep = rep; - err = mlx5e_attach_netdev(esw->dev, netdev); + err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { pr_warn("Failed to attach representor netdev for vport %d\n", rep->vport); goto err_destroy_netdev; } + err = mlx5e_rep_neigh_init(rpriv); + if (err) { + pr_warn("Failed to initialized neighbours handling for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_detach_netdev; + goto err_neigh_cleanup; } return 0; +err_neigh_cleanup: + mlx5e_rep_neigh_cleanup(rpriv); + err_detach_netdev: - mlx5e_detach_netdev(esw->dev, netdev); + mlx5e_detach_netdev(netdev_priv(netdev)); err_destroy_netdev: - mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev)); - + mlx5e_destroy_netdev(netdev_priv(netdev)); + kfree(rpriv); return err; } -void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +static void +mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct net_device *netdev = rep->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + void *ppriv = priv->ppriv; + + unregister_netdev(rep->netdev); + + mlx5e_rep_neigh_cleanup(rpriv); + mlx5e_detach_netdev(priv); + mlx5e_destroy_netdev(priv); + kfree(ppriv); /* mlx5e_rep_priv */ +} + +static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(mdev, 0, mac); + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep rep; + + rep.load = mlx5e_vport_rep_load; + rep.unload = mlx5e_vport_rep_unload; + rep.vport = vport; + ether_addr_copy(rep.hw_id, mac); + mlx5_eswitch_register_vport_rep(esw, vport, &rep); + } +} + +static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); +} + +void mlx5e_register_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; + + mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); + rep.load = mlx5e_nic_rep_load; + rep.unload = mlx5e_nic_rep_unload; + rep.vport = FDB_UPLINK_VPORT; + rep.netdev = priv->netdev; + mlx5_eswitch_register_vport_rep(esw, 0, &rep); /* UPLINK PF vport*/ + + mlx5e_rep_register_vf_vports(priv); /* VFs vports */ +} + +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; - unregister_netdev(netdev); - mlx5e_detach_netdev(esw->dev, netdev); - mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev)); + mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ + mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h new file mode 100644 index 000000000000..a0a1a7a1d6c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_REP_H__ +#define __MLX5E_REP_H__ + +#include <net/ip_tunnels.h> +#include <linux/rhashtable.h> +#include "eswitch.h" +#include "en.h" + +struct mlx5e_neigh_update_table { + struct rhashtable neigh_ht; + /* Save the neigh hash entries in a list in addition to the hash table + * (neigh_ht). In order to iterate easily over the neigh entries. + * Used for stats query. + */ + struct list_head neigh_list; + /* protect lookup/remove operations */ + spinlock_t encap_lock; + struct notifier_block netevent_nb; + struct delayed_work neigh_stats_work; + unsigned long min_interval; /* jiffies */ +}; + +struct mlx5e_rep_priv { + struct mlx5_eswitch_rep *rep; + struct mlx5e_neigh_update_table neigh_update; +}; + +struct mlx5e_neigh { + struct net_device *dev; + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; + int family; +}; + +struct mlx5e_neigh_hash_entry { + struct rhash_head rhash_node; + struct mlx5e_neigh m_neigh; + + /* Save the neigh hash entry in a list on the representor in + * addition to the hash table. In order to iterate easily over the + * neighbour entries. Used for stats query. + */ + struct list_head neigh_list; + + /* encap list sharing the same neigh */ + struct list_head encap_list; + + /* valid only when the neigh reference is taken during + * neigh_update_work workqueue callback. + */ + struct neighbour *n; + struct work_struct neigh_update_work; + + /* neigh hash entry can be deleted only when the refcount is zero. + * refcount is needed to avoid neigh hash entry removal by TC, while + * it's used by the neigh notification call. + */ + refcount_t refcnt; + + /* Save the last reported time offloaded trafic pass over one of the + * neigh hash entry flows. Use it to periodically update the neigh + * 'used' value and avoid neigh deleting by the kernel. + */ + unsigned long reported_lastuse; +}; + +enum { + /* set when the encap entry is successfully offloaded into HW */ + MLX5_ENCAP_ENTRY_VALID = BIT(0), +}; + +struct mlx5e_encap_entry { + /* neigh hash entry list of encaps sharing the same neigh */ + struct list_head encap_list; + struct mlx5e_neigh m_neigh; + /* a node of the eswitch encap hash table which keeping all the encap + * entries + */ + struct hlist_node encap_hlist; + struct list_head flows; + u32 encap_id; + struct ip_tunnel_info tun_info; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + + struct net_device *out_dev; + int tunnel_type; + u8 flags; + char *encap_header; + int encap_size; +}; + +void mlx5e_register_vport_reps(struct mlx5e_priv *priv); +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); + +int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); +bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +#endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index bafcb349a50c..7b1566f0ae58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -39,6 +39,8 @@ #include "en.h" #include "en_tc.h" #include "eswitch.h" +#include "en_rep.h" +#include "ipoib.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -156,28 +158,6 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; } -void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val) -{ - bool was_opened; - - if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) - return; - - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val) - return; - - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(priv->netdev); - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, val); - mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); - - if (was_opened) - mlx5e_open_locked(priv->netdev); - -} - #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, @@ -331,7 +311,7 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev, static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - struct mlx5e_sq *sq = &rq->channel->icosq; + struct mlx5e_icosq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB); @@ -341,7 +321,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; sq->db.ico_wqe[pi].num_wqebbs = 1; - mlx5e_send_nop(sq, false); + mlx5e_post_nop(wq, sq->sqn, &sq->pc); } wqe = mlx5_wq_cyc_get_wqe(wq, pi); @@ -353,7 +333,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); } static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, @@ -637,37 +617,36 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); } -static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_tx_wqe *wqe; - u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ + u16 pi = (sq->pc - 1) & wq->sz_m1; /* last pi */ wqe = mlx5_wq_cyc_get_wqe(wq, pi); - wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl); } static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, const struct xdp_buff *xdp) { - struct mlx5e_sq *sq = &rq->channel->xdp_sq; + struct mlx5e_xdpsq *sq = &rq->xdpsq; struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = sq->pc & wq->sz_m1; + u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg; - u8 ds_cnt = MLX5E_XDP_TX_DS_COUNT; ptrdiff_t data_offset = xdp->data - xdp->data_hard_start; dma_addr_t dma_addr = di->addr + data_offset; unsigned int dma_len = xdp->data_end - xdp->data; + prefetchw(wqe); + if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { rq->stats.xdp_drop++; @@ -675,48 +654,42 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, return false; } - if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { - if (sq->db.xdp.doorbell) { + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { + if (sq->db.doorbell) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); - sq->db.xdp.doorbell = false; + sq->db.doorbell = false; } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); return false; } - dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, - PCI_DMA_TODEVICE); + dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE); - memset(wqe, 0, sizeof(*wqe)); + cseg->fm_ce_se = 0; dseg = (struct mlx5_wqe_data_seg *)eseg + 1; + /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; - - ds_cnt += MLX5E_XDP_IHS_DS_COUNT; dseg++; } /* write the dma part */ dseg->addr = cpu_to_be64(dma_addr); dseg->byte_count = cpu_to_be32(dma_len); - dseg->lkey = sq->mkey_be; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->db.xdp.di[pi] = *di; - wi->opcode = MLX5_OPCODE_SEND; - wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; - sq->pc += MLX5E_XDP_TX_WQEBBS; + sq->db.di[pi] = *di; + sq->pc++; - sq->db.xdp.doorbell = true; + sq->db.doorbell = true; rq->stats.xdp_tx++; return true; } @@ -837,7 +810,8 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct net_device *netdev = rq->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; __be16 wqe_counter_be; @@ -932,7 +906,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto mpwrq_cqe_out; } - prefetch(skb->data); + prefetchw(skb->data); cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); @@ -950,7 +924,7 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; + struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; int work_done = 0; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) @@ -977,9 +951,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } - if (xdp_sq->db.xdp.doorbell) { - mlx5e_xmit_xdp_doorbell(xdp_sq); - xdp_sq->db.xdp.doorbell = false; + if (xdpsq->db.doorbell) { + mlx5e_xmit_xdp_doorbell(xdpsq); + xdpsq->db.doorbell = false; } mlx5_cqwq_update_db_record(&cq->wq); @@ -989,3 +963,152 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) return work_done; } + +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_xdpsq *sq; + struct mlx5e_rq *rq; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_xdpsq, cq); + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + rq = container_of(sq, struct mlx5e_rq, xdpsq); + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_dma_info *di; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + di = &sq->db.di[ci]; + + sqcc++; + /* Recycle RX page */ + mlx5e_page_release(rq, di, true); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_rq *rq = container_of(sq, struct mlx5e_rq, xdpsq); + struct mlx5e_dma_info *di; + u16 ci; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + di = &sq->db.di[ci]; + sq->cc++; + + mlx5e_page_release(rq, di, false); + } +} + +#ifdef CONFIG_MLX5_CORE_IPOIB + +#define MLX5_IB_GRH_DGID_OFFSET 24 +#define MLX5_IB_GRH_BYTES 40 +#define MLX5_IPOIB_ENCAP_LEN 4 +#define MLX5_GID_SIZE 16 + +static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct net_device *netdev = rq->netdev; + u8 *dgid; + u8 g; + + g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; + dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; + if ((!g) || dgid[0] != 0xff) + skb->pkt_type = PACKET_HOST; + else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + + /* TODO: IB/ipoib: Allow mcast packets from other VFs + * 68996a6e760e5c74654723eeb57bf65628ae87f4 + */ + + skb_pull(skb, MLX5_IB_GRH_BYTES); + + skb->protocol = *((__be16 *)(skb->data)); + + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + skb_reset_mac_header(skb); + skb_pull(skb, MLX5_IPOIB_ENCAP_LEN); + + skb->dev = netdev; + + rq->stats.csum_complete++; + rq->stats.packets++; + rq->stats.bytes += cqe_bcnt; +} + +void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + __be16 wqe_counter_be; + struct sk_buff *skb; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + + mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +#endif /* CONFIG_MLX5_CORE_IPOIB */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index cbfac06b7ffd..02dd3a95ed8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -293,7 +293,7 @@ void mlx5e_rx_am_work(struct work_struct *work) struct mlx5e_rq *rq = container_of(am, struct mlx5e_rq, am); struct mlx5e_cq_moder cur_profile = profile[am->mode][am->profile_ix]; - mlx5_core_modify_cq_moderation(rq->priv->mdev, &rq->cq.mcq, + mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq, cur_profile.usec, cur_profile.pkts); am->state = MLX5E_AM_START_MEASURE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 5621dcfda4f1..5225f2226a67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -236,12 +236,9 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, { int err = 0; - err = mlx5e_refresh_tirs_self_loopback(priv->mdev, true); - if (err) { - netdev_err(priv->netdev, - "\tFailed to enable UC loopback err(%d)\n", err); + err = mlx5e_refresh_tirs(priv, true); + if (err) return err; - } lbtp->loopback_ok = false; init_completion(&lbtp->comp); @@ -258,7 +255,7 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, struct mlx5e_lbt_priv *lbtp) { dev_remove_pack(&lbtp->pt); - mlx5e_refresh_tirs_self_loopback(priv->mdev, false); + mlx5e_refresh_tirs(priv, false); } #define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5436866798f4..11c27e4fadf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -42,14 +42,25 @@ #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> +#include <net/tc_act/tc_pedit.h> #include <net/vxlan.h> +#include <net/arp.h> #include "en.h" +#include "en_rep.h" #include "en_tc.h" #include "eswitch.h" #include "vxlan.h" +struct mlx5_nic_flow_attr { + u32 action; + u32 flow_tag; + u32 mod_hdr_id; +}; + enum { MLX5E_TC_FLOW_ESWITCH = BIT(0), + MLX5E_TC_FLOW_NIC = BIT(1), + MLX5E_TC_FLOW_OFFLOADED = BIT(2), }; struct mlx5e_tc_flow { @@ -58,7 +69,16 @@ struct mlx5e_tc_flow { u8 flags; struct mlx5_flow_handle *rule; struct list_head encap; /* flows sharing the same encap */ - struct mlx5_esw_flow_attr *attr; + union { + struct mlx5_esw_flow_attr esw_attr[0]; + struct mlx5_nic_flow_attr nic_attr[0]; + }; +}; + +struct mlx5e_tc_flow_parse_attr { + struct mlx5_flow_spec spec; + int num_mod_hdr_actions; + void *mod_hdr_actions; }; enum { @@ -71,24 +91,26 @@ enum { static struct mlx5_flow_handle * mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - u32 action, u32 flow_tag) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) { + struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_flow_destination dest = { 0 }; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = { - .action = action, - .flow_tag = flow_tag, + .action = attr->action, + .flow_tag = attr->flow_tag, .encap_id = 0, }; struct mlx5_fc *counter = NULL; struct mlx5_flow_handle *rule; bool table_created = false; + int err; - if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fs.vlan.ft.t; - } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); @@ -97,6 +119,19 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, dest.counter = counter; } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5_modify_header_alloc(dev, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr->num_mod_hdr_actions, + parse_attr->mod_hdr_actions, + &attr->mod_hdr_id); + flow_act.modify_id = attr->mod_hdr_id; + kfree(parse_attr->mod_hdr_actions); + if (err) { + rule = ERR_PTR(err); + goto err_create_mod_hdr_id; + } + } + if (IS_ERR_OR_NULL(priv->fs.tc.t)) { priv->fs.tc.t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, @@ -114,8 +149,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, table_created = true; } - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1); + parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, + &flow_act, &dest, 1); if (IS_ERR(rule)) goto err_add_rule; @@ -128,6 +164,10 @@ err_add_rule: priv->fs.tc.t = NULL; } err_create_ft: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + attr->mod_hdr_id); +err_create_mod_hdr_id: mlx5_fc_destroy(dev, counter); return rule; @@ -138,47 +178,195 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, { struct mlx5_fc *counter = NULL; - if (!IS_ERR(flow->rule)) { - counter = mlx5_flow_rule_counter(flow->rule); - mlx5_del_flow_rules(flow->rule); - mlx5_fc_destroy(priv->mdev, counter); - } + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } + + if (flow->nic_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + flow->nic_attr->mod_hdr_id); } +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + static struct mlx5_flow_handle * mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5_flow_handle *rule; int err; err = mlx5_eswitch_add_vlan_action(esw, attr); - if (err) - return ERR_PTR(err); + if (err) { + rule = ERR_PTR(err); + goto err_add_vlan; + } - return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); -} + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_FDB, + parse_attr->num_mod_hdr_actions, + parse_attr->mod_hdr_actions, + &attr->mod_hdr_id); + kfree(parse_attr->mod_hdr_actions); + if (err) { + rule = ERR_PTR(err); + goto err_mod_hdr; + } + } -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow); + rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); + if (IS_ERR(rule)) + goto err_add_rule; + + return rule; + +err_add_rule: + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + attr->mod_hdr_id); +err_mod_hdr: + mlx5_eswitch_del_vlan_action(esw, attr); +err_add_vlan: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); + return rule; +} static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; - mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->attr); + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); + } - mlx5_eswitch_del_vlan_action(esw, flow->attr); + mlx5_eswitch_del_vlan_action(esw, flow->esw_attr); - if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { mlx5e_detach_encap(priv, flow); + kvfree(flow->esw_attr->parse_attr); + } + + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + attr->mod_hdr_id); +} + +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_tc_flow *flow; + int err; + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + e->encap_size, e->encap_header, + &e->encap_id); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", + err); + return; + } + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, &e->flows, encap) { + flow->esw_attr->encap_id = e->encap_id; + flow->rule = mlx5e_tc_add_fdb_flow(priv, + flow->esw_attr->parse_attr, + flow); + if (IS_ERR(flow->rule)) { + err = PTR_ERR(flow->rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + continue; + } + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + } +} + +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); + } + } + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_encap_dealloc(priv->mdev, e->encap_id); + } +} + +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + u64 bytes, packets, lastuse = 0; + struct mlx5e_tc_flow *flow; + struct mlx5e_encap_entry *e; + struct mlx5_fc *counter; + struct neigh_table *tbl; + bool neigh_used = false; + struct neighbour *n; + + if (m_neigh->family == AF_INET) + tbl = &arp_tbl; +#if IS_ENABLED(CONFIG_IPV6) + else if (m_neigh->family == AF_INET6) + tbl = ipv6_stub->nd_tbl; +#endif + else + return; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) + continue; + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + neigh_used = true; + break; + } + } + } + } + + if (neigh_used) { + nhe->reported_lastuse = jiffies; + + /* find the relevant neigh according to the cached device and + * dst ip pair + */ + n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev); + if (!n) { + WARN(1, "The neighbour already freed\n"); + return; + } + + neigh_event_send(n, NULL); + neigh_release(n); + } } static void mlx5e_detach_encap(struct mlx5e_priv *priv, @@ -188,22 +376,20 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, list_del(&flow->encap); if (list_empty(next)) { - struct mlx5_encap_entry *e; + struct mlx5e_encap_entry *e; + + e = list_entry(next, struct mlx5e_encap_entry, flows); + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); - e = list_entry(next, struct mlx5_encap_entry, flows); - if (e->n) { + if (e->flags & MLX5_ENCAP_ENTRY_VALID) mlx5_encap_dealloc(priv->mdev, e->encap_id); - neigh_release(e->n); - } + hlist_del_rcu(&e->encap_hlist); + kfree(e->encap_header); kfree(e); } } -/* we get here also when setting rule to the FW failed, etc. It means that the - * flow rule itself might not exist, but some offloading related to the actions - * should be cleaned. - */ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -631,16 +817,18 @@ static int parse_cls_flower(struct mlx5e_priv *priv, { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; u8 min_inline; int err; err = __parse_cls_flower(priv, spec, f, &min_inline); - if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) && - rep->vport != FDB_UPLINK_VPORT) { - if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && - esw->offloads.inline_mode < min_inline) { + if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + rep = rpriv->rep; + if (rep->vport != FDB_UPLINK_VPORT && + (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < min_inline)) { netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", min_inline, esw->offloads.inline_mode); @@ -651,29 +839,313 @@ static int parse_cls_flower(struct mlx5e_priv *priv, return err; } +struct pedit_headers { + struct ethhdr eth; + struct iphdr ip4; + struct ipv6hdr ip6; + struct tcphdr tcp; + struct udphdr udp; +}; + +static int pedit_header_offsets[] = { + [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), +}; + +#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) + +static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, + struct pedit_headers *masks, + struct pedit_headers *vals) +{ + u32 *curr_pmask, *curr_pval; + + if (hdr_type >= __PEDIT_HDR_TYPE_MAX) + goto out_err; + + curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset); + + if (*curr_pmask & mask) /* disallow acting twice on the same location */ + goto out_err; + + *curr_pmask |= mask; + *curr_pval |= (val & mask); + + return 0; + +out_err: + return -EOPNOTSUPP; +} + +struct mlx5_fields { + u8 field; + u8 size; + u32 offset; +}; + +static struct mlx5_fields fields[] = { + {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_dest[0])}, + {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_dest[4])}, + {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_source[0])}, + {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_source[4])}, + {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE, 2, offsetof(struct pedit_headers, eth.h_proto)}, + + {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)}, + {MLX5_ACTION_IN_FIELD_OUT_IP_TTL, 1, offsetof(struct pedit_headers, ip4.ttl)}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV4, 4, offsetof(struct pedit_headers, ip4.saddr)}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV4, 4, offsetof(struct pedit_headers, ip4.daddr)}, + + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[0])}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[1])}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[2])}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[3])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[0])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[1])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[2])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[3])}, + + {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT, 2, offsetof(struct pedit_headers, tcp.source)}, + {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT, 2, offsetof(struct pedit_headers, tcp.dest)}, + {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS, 1, offsetof(struct pedit_headers, tcp.ack_seq) + 5}, + + {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT, 2, offsetof(struct pedit_headers, udp.source)}, + {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT, 2, offsetof(struct pedit_headers, udp.dest)}, +}; + +/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, it says how many HW actions were + * actually parsed. + */ +static int offload_pedit_fields(struct pedit_headers *masks, + struct pedit_headers *vals, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; + int i, action_size, nactions, max_actions, first, last; + void *s_masks_p, *a_masks_p, *vals_p; + u32 s_mask, a_mask, val; + struct mlx5_fields *f; + u8 cmd, field_bsize; + unsigned long mask; + void *action; + + set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET]; + add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD]; + set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET]; + add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; + + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + action = parse_attr->mod_hdr_actions; + max_actions = parse_attr->num_mod_hdr_actions; + nactions = 0; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + /* avoid seeing bits set from previous iterations */ + s_mask = a_mask = mask = val = 0; + + s_masks_p = (void *)set_masks + f->offset; + a_masks_p = (void *)add_masks + f->offset; + + memcpy(&s_mask, s_masks_p, f->size); + memcpy(&a_mask, a_masks_p, f->size); + + if (!s_mask && !a_mask) /* nothing to offload here */ + continue; + + if (s_mask && a_mask) { + printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field); + return -EOPNOTSUPP; + } + + if (nactions == max_actions) { + printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); + return -EOPNOTSUPP; + } + + if (s_mask) { + cmd = MLX5_ACTION_TYPE_SET; + mask = s_mask; + vals_p = (void *)set_vals + f->offset; + /* clear to denote we consumed this field */ + memset(s_masks_p, 0, f->size); + } else { + cmd = MLX5_ACTION_TYPE_ADD; + mask = a_mask; + vals_p = (void *)add_vals + f->offset; + /* clear to denote we consumed this field */ + memset(a_masks_p, 0, f->size); + } + + memcpy(&val, vals_p, f->size); + + field_bsize = f->size * BITS_PER_BYTE; + first = find_first_bit(&mask, field_bsize); + last = find_last_bit(&mask, field_bsize); + if (first > 0 || last != (field_bsize - 1)) { + printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n", + mask); + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, action, action_type, cmd); + MLX5_SET(set_action_in, action, field, f->field); + + if (cmd == MLX5_ACTION_TYPE_SET) { + MLX5_SET(set_action_in, action, offset, 0); + /* length is num of bits to be written, zero means length of 32 */ + MLX5_SET(set_action_in, action, length, field_bsize); + } + + if (field_bsize == 32) + MLX5_SET(set_action_in, action, data, ntohl(val)); + else if (field_bsize == 16) + MLX5_SET(set_action_in, action, data, ntohs(val)); + else if (field_bsize == 8) + MLX5_SET(set_action_in, action, data, val); + + action += action_size; + nactions++; + } + + parse_attr->num_mod_hdr_actions = nactions; + return 0; +} + +static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, + const struct tc_action *a, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + int nkeys, action_size, max_actions; + + nkeys = tcf_pedit_nkeys(a); + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + + if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions); + else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions); + + /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ + max_actions = min(max_actions, nkeys * 16); + + parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); + if (!parse_attr->mod_hdr_actions) + return -ENOMEM; + + parse_attr->num_mod_hdr_actions = max_actions; + return 0; +} + +static const struct pedit_headers zero_masks = {}; + +static int parse_tc_pedit_action(struct mlx5e_priv *priv, + const struct tc_action *a, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks; + int nkeys, i, err = -EOPNOTSUPP; + u32 mask, val, offset; + u8 cmd, htype; + + nkeys = tcf_pedit_nkeys(a); + + memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + cmd = tcf_pedit_cmd(a, i); + err = -EOPNOTSUPP; /* can't be all optimistic */ + + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { + printk(KERN_WARNING "mlx5: legacy pedit isn't offloaded\n"); + goto out_err; + } + + if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) { + printk(KERN_WARNING "mlx5: pedit cmd %d isn't offloaded\n", cmd); + goto out_err; + } + + mask = tcf_pedit_mask(a, i); + val = tcf_pedit_val(a, i); + offset = tcf_pedit_offset(a, i); + + err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]); + if (err) + goto out_err; + } + + err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); + if (err) + goto out_err; + + err = offload_pedit_fields(masks, vals, parse_attr); + if (err < 0) + goto out_dealloc_parsed_actions; + + for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { + cmd_masks = &masks[cmd]; + if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { + printk(KERN_WARNING "mlx5: attempt to offload an unsupported field (cmd %d)\n", + cmd); + print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, + 16, 1, cmd_masks, sizeof(zero_masks), true); + err = -EOPNOTSUPP; + goto out_dealloc_parsed_actions; + } + } + + return 0; + +out_dealloc_parsed_actions: + kfree(parse_attr->mod_hdr_actions); +out_err: + return err; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *flow_tag) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) { + struct mlx5_nic_flow_attr *attr = flow->nic_attr; const struct tc_action *a; LIST_HEAD(actions); + int err; if (tc_no_actions(exts)) return -EINVAL; - *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - *action = 0; + attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + attr->action = 0; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ - if (*action) + if (attr->action) return -EINVAL; if (is_tcf_gact_shot(a)) { - *action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.flow_counter)) - *action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + continue; + } + + if (is_tcf_pedit(a)) { + err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; continue; } @@ -686,8 +1158,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - *flow_tag = mark; - *action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flow_tag = mark; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; continue; } @@ -853,16 +1325,17 @@ static void gen_vxlan_header_ipv6(struct net_device *out_dev, static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5_encap_entry *e, - struct net_device **out_dev) + struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; int ttl, err; + u8 nud_state; if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -887,25 +1360,36 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, fl4.daddr = tun_key->u.ipv4.dst; fl4.saddr = tun_key->u.ipv4.src; - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); if (err) goto out; - if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); - err = -EOPNOTSUPP; + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) goto out; - } - e->n = n; - e->out_dev = *out_dev; - - neigh_ha_snapshot(e->h_dest, n, *out_dev); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - gen_vxlan_header_ipv4(*out_dev, encap_header, + gen_vxlan_header_ipv4(out_dev, encap_header, ipv4_encap_size, e->h_dest, ttl, fl4.daddr, fl4.saddr, tun_key->tp_dst, @@ -913,31 +1397,49 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto destroy_neigh_entry; + } + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + neigh_release(n); + return -EAGAIN; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, ipv4_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); out: - if (err && n) - neigh_release(n); kfree(encap_header); + if (n) + neigh_release(n); return err; } static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5_encap_entry *e, - struct net_device **out_dev) - + struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; char *encap_header; int err, ttl = 0; + u8 nud_state; if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -963,25 +1465,36 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, fl6.daddr = tun_key->u.ipv6.dst; fl6.saddr = tun_key->u.ipv6.src; - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev, + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &fl6, &n, &ttl); if (err) goto out; - if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr); - err = -EOPNOTSUPP; + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) goto out; - } - - e->n = n; - e->out_dev = *out_dev; - neigh_ha_snapshot(e->h_dest, n, *out_dev); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - gen_vxlan_header_ipv6(*out_dev, encap_header, + gen_vxlan_header_ipv6(out_dev, encap_header, ipv6_encap_size, e->h_dest, ttl, &fl6.daddr, &fl6.saddr, tun_key->tp_dst, @@ -989,31 +1502,51 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto destroy_neigh_entry; + } + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + neigh_release(n); + return -EAGAIN; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, ipv6_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); out: - if (err && n) - neigh_release(n); kfree(encap_header); + if (n) + neigh_release(n); return err; } static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, struct net_device *mirred_dev, - struct mlx5_esw_flow_attr *attr) + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw); - struct mlx5e_priv *up_priv = netdev_priv(up_dev); unsigned short family = ip_tunnel_info_af(tun_info); + struct mlx5e_priv *up_priv = netdev_priv(up_dev); + struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct ip_tunnel_key *key = &tun_info->key; - struct mlx5_encap_entry *e; - struct net_device *out_dev; - int tunnel_type, err = -EOPNOTSUPP; + struct mlx5e_encap_entry *e; + int tunnel_type, err = 0; uintptr_t hash_key; bool found = false; @@ -1048,10 +1581,8 @@ vxlan_encap_offload_err: } } - if (found) { - attr->encap = e; - return 0; - } + if (found) + goto attach_flow; e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) @@ -1062,16 +1593,21 @@ vxlan_encap_offload_err: INIT_LIST_HEAD(&e->flows); if (family == AF_INET) - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) - err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev); + err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e); - if (err) + if (err && err != -EAGAIN) goto out_err; - attr->encap = e; hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); +attach_flow: + list_add(&flow->encap, &e->flows); + *encap_dev = e->out_dev; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + attr->encap_id = e->encap_id; + return err; out_err: @@ -1080,20 +1616,22 @@ out_err: } static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow) { - struct mlx5_esw_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rpriv = priv->ppriv; struct ip_tunnel_info *info = NULL; const struct tc_action *a; LIST_HEAD(actions); bool encap = false; - int err; + int err = 0; if (tc_no_actions(exts)) return -EINVAL; memset(attr, 0, sizeof(*attr)); - attr->in_rep = priv->ppriv; + attr->in_rep = rpriv->rep; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { @@ -1103,9 +1641,19 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_pedit(a)) { + err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, + parse_attr); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + continue; + } + if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); - struct net_device *out_dev; + struct net_device *out_dev, *encap_dev = NULL; struct mlx5e_priv *out_priv; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -1115,18 +1663,20 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; out_priv = netdev_priv(out_dev); - attr->out_rep = out_priv->ppriv; + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; } else if (encap) { err = mlx5e_attach_encap(priv, info, - out_dev, attr); - if (err) + out_dev, &encap_dev, flow); + if (err && err != -EAGAIN) return err; - list_add(&flow->encap, &attr->encap->flows); attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - out_priv = netdev_priv(attr->encap->out_dev); - attr->out_rep = out_priv->ppriv; + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; + attr->parse_attr = parse_attr; } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); @@ -1166,28 +1716,30 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - return 0; + return err; } int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_table *tc = &priv->fs.tc; - int err, attr_size = 0; - u32 flow_tag, action; struct mlx5e_tc_flow *flow; - struct mlx5_flow_spec *spec; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int attr_size, err = 0; u8 flow_flags = 0; if (esw && esw->mode == SRIOV_OFFLOADS) { flow_flags = MLX5E_TC_FLOW_ESWITCH; attr_size = sizeof(struct mlx5_esw_flow_attr); + } else { + flow_flags = MLX5E_TC_FLOW_NIC; + attr_size = sizeof(struct mlx5_nic_flow_attr); } flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); - spec = mlx5_vzalloc(sizeof(*spec)); - if (!spec || !flow) { + parse_attr = mlx5_vzalloc(sizeof(*parse_attr)); + if (!parse_attr || !flow) { err = -ENOMEM; goto err_free; } @@ -1195,42 +1747,54 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, flow->cookie = f->cookie; flow->flags = flow_flags; - err = parse_cls_flower(priv, flow, spec, f); + err = parse_cls_flower(priv, flow, &parse_attr->spec, f); if (err < 0) goto err_free; if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); - err = parse_tc_fdb_actions(priv, f->exts, flow); + err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); if (err < 0) - goto err_free; - flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); + goto err_handle_encap_flow; + flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); } else { - err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); if (err < 0) goto err_free; - flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); + flow->rule = mlx5e_tc_add_nic_flow(priv, parse_attr, flow); } if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_del_rule; + goto err_free; } + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) goto err_del_rule; - goto out; + if (flow->flags & MLX5E_TC_FLOW_ESWITCH && + !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)) + kvfree(parse_attr); + return err; err_del_rule: mlx5e_tc_del_flow(priv, flow); +err_handle_encap_flow: + if (err == -EAGAIN) { + err = rhashtable_insert_fast(&tc->ht, &flow->node, + tc->ht_params); + if (err) + mlx5e_tc_del_flow(priv, flow); + else + return 0; + } + err_free: + kvfree(parse_attr); kfree(flow); -out: - kvfree(spec); return err; } @@ -1249,7 +1813,6 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, mlx5e_tc_del_flow(priv, flow); - kfree(flow); return 0; @@ -1272,6 +1835,9 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, if (!flow) return -EINVAL; + if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) + return 0; + counter = mlx5_flow_rule_counter(flow->rule); if (!counter) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 34bf903fc886..ecbe30d808ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -46,6 +46,15 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, int mlx5e_stats_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); +struct mlx5e_encap_entry; +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +struct mlx5e_neigh_hash_entry; +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); + static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return atomic_read(&priv->fs.tc.ht.nelems); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 57f5e2d7ebd1..ab3bb026ff9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -33,34 +33,12 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> #include "en.h" +#include "ipoib.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ MLX5E_SQ_NOPS_ROOM) -void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - - memset(cseg, 0, sizeof(*cseg)); - - cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); - - sq->pc++; - sq->stats.nop++; - - if (notify_hw) { - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); - } -} - static inline void mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) { @@ -76,25 +54,25 @@ static inline void mlx5e_tx_dma_unmap(struct device *pdev, } } -static inline void mlx5e_dma_push(struct mlx5e_sq *sq, +static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, enum mlx5e_dma_map_type map_type) { u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask; - sq->db.txq.dma_fifo[i].addr = addr; - sq->db.txq.dma_fifo[i].size = size; - sq->db.txq.dma_fifo[i].type = map_type; + sq->db.dma_fifo[i].addr = addr; + sq->db.dma_fifo[i].size = size; + sq->db.dma_fifo[i].type = map_type; sq->dma_fifo_pc++; } -static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) +static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) { - return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask]; + return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; } -static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) +static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) { int i; @@ -111,6 +89,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx5e_priv *priv = netdev_priv(dev); int channel_ix = fallback(dev, skb); + u16 num_channels; int up = 0; if (!netdev_get_num_tc(dev)) @@ -122,11 +101,11 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, /* channel_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc */ - if (channel_ix >= priv->params.num_channels) - channel_ix = reciprocal_scale(channel_ix, - priv->params.num_channels); + num_channels = priv->channels.params.num_channels; + if (channel_ix >= num_channels) + channel_ix = reciprocal_scale(channel_ix, num_channels); - return priv->channeltc_to_txq_map[channel_ix][up]; + return priv->channel_tc2txq[channel_ix][up]; } static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) @@ -175,25 +154,6 @@ static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, } } -static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, - struct sk_buff *skb, bool bf) -{ - /* Some NIC TX decisions, e.g loopback, are based on the packet - * headers and occur before the data gather. - * Therefore these headers must be copied into the WQE - */ - if (bf) { - u16 ihs = skb_headlen(skb); - - if (skb_vlan_tag_present(skb)) - ihs += VLAN_HLEN; - - if (ihs <= sq->max_inline) - return skb_headlen(skb); - } - return mlx5e_calc_min_inline(sq->min_inline_mode, skb); -} - static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, unsigned int *skb_len, unsigned int len) @@ -218,31 +178,9 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs, mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy2_sz); } -static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) +static inline void +mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { - struct mlx5_wq_cyc *wq = &sq->wq; - - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi]; - - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg; - - unsigned char *skb_data = skb->data; - unsigned int skb_len = skb->len; - u8 opcode = MLX5_OPCODE_SEND; - dma_addr_t dma_addr = 0; - unsigned int num_bytes; - bool bf = false; - u16 headlen; - u16 ds_cnt; - u16 ihs; - int i; - - memset(wqe, 0, sizeof(*wqe)); - if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; if (skb->encapsulation) { @@ -254,74 +192,51 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } } else sq->stats.csum_none++; +} - if (sq->cc != sq->prev_cc) { - sq->prev_cc = sq->cc; - sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0; - } - - if (skb_is_gso(skb)) { - eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); - opcode = MLX5_OPCODE_LSO; +static inline u16 +mlx5e_txwqe_build_eseg_gso(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, unsigned int *num_bytes) +{ + u16 ihs; - if (skb->encapsulation) { - ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); - sq->stats.tso_inner_packets++; - sq->stats.tso_inner_bytes += skb->len - ihs; - } else { - ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); - sq->stats.tso_packets++; - sq->stats.tso_bytes += skb->len - ihs; - } + eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); - sq->stats.packets += skb_shinfo(skb)->gso_segs; - num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + if (skb->encapsulation) { + ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); + sq->stats.tso_inner_packets++; + sq->stats.tso_inner_bytes += skb->len - ihs; } else { - bf = sq->bf_budget && - !skb->xmit_more && - !skb_shinfo(skb)->nr_frags; - ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); - sq->stats.packets++; - num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - } - - sq->stats.bytes += num_bytes; - wi->num_bytes = num_bytes; - - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - if (ihs) { - if (skb_vlan_tag_present(skb)) { - mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); - ihs += VLAN_HLEN; - } else { - memcpy(eseg->inline_hdr.start, skb_data, ihs); - mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); - } - eseg->inline_hdr.sz = cpu_to_be16(ihs); - ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); - } else if (skb_vlan_tag_present(skb)) { - eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); - eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); + ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); + sq->stats.tso_packets++; + sq->stats.tso_bytes += skb->len - ihs; } - dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; + *num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + return ihs; +} - wi->num_dma = 0; +static inline int +mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, + unsigned char *skb_data, u16 headlen, + struct mlx5_wqe_data_seg *dseg) +{ + dma_addr_t dma_addr = 0; + u8 num_dma = 0; + int i; - headlen = skb_len - skb->data_len; if (headlen) { dma_addr = dma_map_single(sq->pdev, skb_data, headlen, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - goto dma_unmap_wqe_err; + return -ENOMEM; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(headlen); mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); - wi->num_dma++; - + num_dma++; dseg++; } @@ -330,59 +245,120 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) int fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, - DMA_TO_DEVICE); + DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - goto dma_unmap_wqe_err; + return -ENOMEM; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); - wi->num_dma++; - + num_dma++; dseg++; } - ds_cnt += wi->num_dma; - - cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + return num_dma; +} - sq->db.txq.skb[pi] = skb; +static inline void +mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, + u8 opcode, u16 ds_cnt, u32 num_bytes, u8 num_dma, + struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi; + wi->num_bytes = num_bytes; + wi->num_dma = num_dma; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - sq->pc += wi->num_wqebbs; + wi->skb = skb; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - netdev_tx_sent_queue(sq->txq, wi->num_bytes); + netdev_tx_sent_queue(sq->txq, num_bytes); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) { + sq->pc += wi->num_wqebbs; + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) { netif_tx_stop_queue(sq->txq); sq->stats.stopped++; } - sq->stats.xmit_more += skb->xmit_more; - if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { - int bf_sz = 0; + if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); - if (bf && test_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state)) - bf_sz = wi->num_wqebbs << 3; + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.wqe_info[pi].skb = NULL; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + sq->stats.nop++; + } +} - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz); +static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + + unsigned char *skb_data = skb->data; + unsigned int skb_len = skb->len; + u8 opcode = MLX5_OPCODE_SEND; + unsigned int num_bytes; + int num_dma; + u16 headlen; + u16 ds_cnt; + u16 ihs; + + memset(wqe, 0, sizeof(*wqe)); + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + if (skb_is_gso(skb)) { + opcode = MLX5_OPCODE_LSO; + ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); + sq->stats.packets += skb_shinfo(skb)->gso_segs; + } else { + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + sq->stats.packets++; } + sq->stats.bytes += num_bytes; + sq->stats.xmit_more += skb->xmit_more; - /* fill sq edge with nops to avoid wqe wrap around */ - while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->db.txq.skb[pi] = NULL; - mlx5e_send_nop(sq, false); + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (ihs) { + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); + ihs += VLAN_HLEN; + } else { + memcpy(eseg->inline_hdr.start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + } + eseg->inline_hdr.sz = cpu_to_be16(ihs); + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); + } else if (skb_vlan_tag_present(skb)) { + eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); } - if (bf) - sq->bf_budget--; + headlen = skb_len - skb->data_len; + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, + (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + if (unlikely(num_dma < 0)) + goto dma_unmap_wqe_err; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, + num_bytes, num_dma, wi, cseg); return NETDEV_TX_OK; @@ -398,21 +374,21 @@ dma_unmap_wqe_err: netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sq *sq = priv->txq_to_sq_map[skb_get_queue_mapping(skb)]; + struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)]; return mlx5e_sq_xmit(sq, skb); } bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { - struct mlx5e_sq *sq; + struct mlx5e_txqsq *sq; u32 dma_fifo_cc; u32 nbytes; u16 npkts; u16 sqcc; int i; - sq = container_of(cq, struct mlx5e_sq, cq); + sq = container_of(cq, struct mlx5e_txqsq, cq); if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; @@ -450,8 +426,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); ci = sqcc & sq->wq.sz_m1; - skb = sq->db.txq.skb[ci]; - wi = &sq->db.txq.wqe_info[ci]; + wi = &sq->db.wqe_info[ci]; + skb = wi->skb; if (unlikely(!skb)) { /* nop */ sqcc++; @@ -492,7 +468,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) { + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM)) { netif_tx_wake_queue(sq->txq); sq->stats.wake++; } @@ -500,7 +476,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; @@ -509,8 +485,8 @@ static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; - skb = sq->db.txq.skb[ci]; - wi = &sq->db.txq.wqe_info[ci]; + wi = &sq->db.wqe_info[ci]; + skb = wi->skb; if (!skb) { /* nop */ sq->cc++; @@ -529,36 +505,89 @@ static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) } } -static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq) +#ifdef CONFIG_MLX5_CORE_IPOIB + +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; + +struct mlx5i_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_datagram_seg datagram; + struct mlx5_wqe_eth_pad pad; + struct mlx5_wqe_eth_seg eth; +}; + +static inline void +mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, + struct mlx5_wqe_datagram_seg *dseg) { - struct mlx5e_sq_wqe_info *wi; - struct mlx5e_dma_info *di; - u16 ci; + memcpy(&dseg->av, av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); +} - while (sq->cc != sq->pc) { - ci = sq->cc & sq->wq.sz_m1; - di = &sq->db.xdp.di[ci]; - wi = &sq->db.xdp.wqe_info[ci]; +netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5i_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; - if (wi->opcode == MLX5_OPCODE_NOP) { - sq->cc++; - continue; - } + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_datagram_seg *datagram = &wqe->datagram; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - sq->cc += wi->num_wqebbs; + unsigned char *skb_data = skb->data; + unsigned int skb_len = skb->len; + u8 opcode = MLX5_OPCODE_SEND; + unsigned int num_bytes; + int num_dma; + u16 headlen; + u16 ds_cnt; + u16 ihs; - mlx5e_page_release(&sq->channel->rq, di, false); + memset(wqe, 0, sizeof(*wqe)); + + mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + if (skb_is_gso(skb)) { + opcode = MLX5_OPCODE_LSO; + ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); + } else { + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); } -} -void mlx5e_free_sq_descs(struct mlx5e_sq *sq) -{ - switch (sq->type) { - case MLX5E_SQ_TXQ: - mlx5e_free_txq_sq_descs(sq); - break; - case MLX5E_SQ_XDP: - mlx5e_free_xdp_sq_descs(sq); - break; + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (ihs) { + memcpy(eseg->inline_hdr.start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + eseg->inline_hdr.sz = cpu_to_be16(ihs); + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); } + + headlen = skb_len - skb->data_len; + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, + (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + if (unlikely(num_dma < 0)) + goto dma_unmap_wqe_err; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, + num_bytes, num_dma, wi, cseg); + + return NETDEV_TX_OK; + +dma_unmap_wqe_err: + sq->stats.dropped++; + mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); + + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; } + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index e5c12a732aa1..5ca6714e3e02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -37,124 +37,69 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) struct mlx5_cqwq *wq = &cq->wq; u32 ci = mlx5_cqwq_get_ci(wq); struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); - int cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; - int sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; if (cqe_ownership_bit != sw_ownership_val) return NULL; /* ensure cqe content is read after cqe ownership bit */ - rmb(); + dma_rmb(); return cqe; } -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5_cqe64 *cqe, + u16 *sqcc) { - struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq); - struct mlx5_wq_cyc *wq; - struct mlx5_cqe64 *cqe; - u16 sqcc; - - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + struct mlx5e_rq *rq = &sq->channel->rq; + + prefetch(rq); + mlx5_cqwq_pop(&cq->wq); + *sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); return; + } - cqe = mlx5e_get_cqe(cq); - if (likely(!cqe)) + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); return; + } - wq = &sq->wq; - - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), - * otherwise a cq overrun may occur - */ - sqcc = sq->cc; - - do { - u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - - mlx5_cqwq_pop(&cq->wq); - sqcc += icowi->num_wqebbs; - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { - WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", - cqe->op_own); - break; - } - - switch (icowi->opcode) { - case MLX5_OPCODE_NOP: - break; - case MLX5_OPCODE_UMR: - mlx5e_post_rx_mpwqe(&sq->channel->rq); - break; - default: - WARN_ONCE(true, - "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", - icowi->opcode); - } - - } while ((cqe = mlx5e_get_cqe(cq))); - - mlx5_cqwq_update_db_record(&cq->wq); - - /* ensure cq space is freed before enabling more cqes */ - wmb(); - - sq->cc = sqcc; + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); } -static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { - struct mlx5e_sq *sq; + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; u16 sqcc; - int i; - - sq = container_of(cq, struct mlx5e_sq, cq); if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return false; + return; + + cqe = mlx5e_get_cqe(cq); + if (likely(!cqe)) + return; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ sqcc = sq->cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; - u16 wqe_counter; - bool last_wqe; - - cqe = mlx5e_get_cqe(cq); - if (!cqe) - break; - - mlx5_cqwq_pop(&cq->wq); - - wqe_counter = be16_to_cpu(cqe->wqe_counter); - - do { - struct mlx5e_sq_wqe_info *wi; - struct mlx5e_dma_info *di; - u16 ci; - - last_wqe = (sqcc == wqe_counter); - - ci = sqcc & sq->wq.sz_m1; - di = &sq->db.xdp.di[ci]; - wi = &sq->db.xdp.wqe_info[ci]; - - if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) { - sqcc++; - continue; - } - - sqcc += wi->num_wqebbs; - /* Recycle RX page */ - mlx5e_page_release(&sq->channel->rq, di, true); - } while (!last_wqe); - } + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc); mlx5_cqwq_update_db_record(&cq->wq); @@ -162,7 +107,6 @@ static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) wmb(); sq->cc = sqcc; - return (i == MLX5E_TX_CQ_POLL_BUDGET); } int mlx5e_napi_poll(struct napi_struct *napi, int budget) @@ -178,12 +122,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); + if (c->xdp) + busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); + work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; - if (c->xdp) - busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq); - mlx5e_poll_ico_cq(&c->icosq.cq); busy |= mlx5e_post_rx_wqes(&c->rq); @@ -224,8 +168,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); struct mlx5e_channel *c = cq->channel; - struct mlx5e_priv *priv = c->priv; - struct net_device *netdev = priv->netdev; + struct net_device *netdev = c->netdev; netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n", __func__, mcq->cqn, event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index fcd5bc7e31db..2e34d95ea776 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -53,13 +53,6 @@ struct esw_uc_addr { u32 vport; }; -/* E-Switch MC FDB table hash node */ -struct esw_mc_addr { /* SRIOV only */ - struct l2addr_node node; - struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ - u32 refcnt; -}; - /* Vport UC/MC hash node */ struct vport_addr { struct l2addr_node node; @@ -337,6 +330,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb; @@ -362,7 +356,9 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0, 0); + + ft_attr.max_fte = table_size; + fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); @@ -814,7 +810,7 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, bool promisc, bool mc_promisc) { - struct esw_mc_addr *allmulti_addr = esw->mc_promisc; + struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; struct mlx5_vport *vport = &esw->vports[vport_num]; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) @@ -1685,7 +1681,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", esw->enabled_vports, esw->mode); - mc_promisc = esw->mc_promisc; + mc_promisc = &esw->mc_promisc; nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) @@ -1729,7 +1725,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); int total_vports = MLX5_TOTAL_VPORTS(dev); - struct esw_mc_addr *mc_promisc; struct mlx5_eswitch *esw; int vport_num; int err; @@ -1758,13 +1753,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } esw->l2_table.size = l2_table_size; - mc_promisc = kzalloc(sizeof(*mc_promisc), GFP_KERNEL); - if (!mc_promisc) { - err = -ENOMEM; - goto abort; - } - esw->mc_promisc = mc_promisc; - esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -1803,6 +1791,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; + else + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; dev->priv.eswitch = esw; return 0; @@ -1827,7 +1820,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); - kfree(esw->mc_promisc); kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ad329b1680b4..b746f62c8c79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -36,7 +36,6 @@ #include <linux/if_ether.h> #include <linux/if_link.h> #include <net/devlink.h> -#include <net/ip_tunnels.h> #include <linux/mlx5/device.h> #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -210,6 +209,14 @@ struct mlx5_esw_offload { DECLARE_HASHTABLE(encap_tbl, 8); u8 inline_mode; u64 num_flows; + u8 encap; +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; }; struct mlx5_eswitch { @@ -225,7 +232,7 @@ struct mlx5_eswitch { * and async SRIOV admin state changes */ struct mutex state_lock; - struct esw_mc_addr *mc_promisc; + struct esw_mc_addr mc_promisc; struct { bool enabled; @@ -285,20 +292,8 @@ enum { SET_VLAN_INSERT = BIT(1) }; -#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 -#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 - -struct mlx5_encap_entry { - struct hlist_node encap_hlist; - struct list_head flows; - u32 encap_id; - struct neighbour *n; - struct ip_tunnel_info tun_info; - unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ - - struct net_device *out_dev; - int tunnel_type; -}; +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x4000 +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x8000 struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; @@ -307,7 +302,9 @@ struct mlx5_esw_flow_attr { int action; u16 vlan; bool vlan_handled; - struct mlx5_encap_entry *encap; + u32 encap_id; + u32 mod_hdr_id; + struct mlx5e_tc_flow_parse_attr *parse_attr; }; int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, @@ -321,6 +318,8 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, int vport_index, struct mlx5_eswitch_rep *rep); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d111cebca9f1..f991f669047e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -68,8 +68,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); - if (IS_ERR(counter)) - return ERR_CAST(counter); + if (IS_ERR(counter)) { + rule = ERR_CAST(counter); + goto err_counter_alloc; + } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[i].counter = counter; i++; @@ -86,17 +88,25 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - if (attr->encap) - flow_act.encap_id = attr->encap->encap_id; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + flow_act.modify_id = attr->mod_hdr_id; + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + flow_act.encap_id = attr->encap_id; rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) - mlx5_fc_destroy(esw->dev, counter); + goto err_add_rule; else esw->offloads.num_flows++; return rule; + +err_add_rule: + mlx5_fc_destroy(esw->dev, counter); +err_counter_alloc: + return rule; } void @@ -106,12 +116,10 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, { struct mlx5_fc *counter = NULL; - if (!IS_ERR(rule)) { - counter = mlx5_flow_rule_counter(rule); - mlx5_del_flow_rules(rule); - mlx5_fc_destroy(esw->dev, counter); - esw->offloads.num_flows--; - } + counter = mlx5_flow_rule_counter(rule); + mlx5_del_flow_rules(rule); + mlx5_fc_destroy(esw->dev, counter); + esw->offloads.num_flows--; } static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) @@ -418,30 +426,21 @@ out: return err; } -#define MAX_PF_SQ 256 #define ESW_OFFLOADS_NUM_GROUPS 4 -static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) { - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - int table_size, ix, esw_size, err = 0; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - struct mlx5_flow_group *g; - u32 *flow_group_in; - void *match_criteria; + int esw_size, err = 0; u32 flags = 0; - flow_group_in = mlx5_vzalloc(inlen); - if (!flow_group_in) - return -ENOMEM; - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); err = -EOPNOTSUPP; - goto ns_err; + goto out; } esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", @@ -451,8 +450,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) flags |= MLX5_FLOW_TABLE_TUNNEL_EN; fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, @@ -462,12 +460,55 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); - goto fast_fdb_err; + goto out; } esw->fdb_table.fdb = fdb; +out: + return err; +} + +static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_table(esw->fdb_table.fdb); +} + +#define MAX_PF_SQ 256 + +static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + int table_size, ix, err = 0; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + + esw_debug(esw->dev, "Create offloads FDB Tables\n"); + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + err = -EOPNOTSUPP; + goto ns_err; + } + + err = esw_create_offloads_fast_fdb_table(esw); + if (err) + goto fast_fdb_err; + table_size = nvports + MAX_PF_SQ + 1; - fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0, 0); + + ft_attr.max_fte = table_size; + ft_attr.prio = FDB_SLOW_PATH; + + fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); @@ -532,25 +573,26 @@ ns_err: return err; } -static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; - esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); - mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw_destroy_offloads_fast_fdb_table(esw); } static int esw_create_offloads_table(struct mlx5_eswitch *esw) { - struct mlx5_flow_namespace *ns; - struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_namespace *ns; int err = 0; ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); @@ -559,7 +601,9 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0); + ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + + ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { err = PTR_ERR(ft_offloads); esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); @@ -700,7 +744,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_dev_list_unlock(); - err = esw_create_offloads_fdb_table(esw, nvports); + err = esw_create_offloads_fdb_tables(esw, nvports); if (err) goto create_fdb_err; @@ -737,7 +781,7 @@ create_fg_err: esw_destroy_offloads_table(esw); create_ft_err: - esw_destroy_offloads_fdb_table(esw); + esw_destroy_offloads_fdb_tables(esw); create_fdb_err: /* enable back PF RoCE */ @@ -783,7 +827,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); - esw_destroy_offloads_fdb_table(esw); + esw_destroy_offloads_fdb_tables(esw); } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) @@ -1012,6 +1056,66 @@ out: return 0; } +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && + (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) + return -EOPNOTSUPP; + + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_LEGACY) { + esw->offloads.encap = encap; + return 0; + } + + if (esw->offloads.encap == encap) + return 0; + + if (esw->offloads.num_flows > 0) { + esw_warn(dev, "Can't set encapsulation when flows are configured\n"); + return -EOPNOTSUPP; + } + + esw_destroy_offloads_fast_fdb_table(esw); + + esw->offloads.encap = encap; + err = esw_create_offloads_fast_fdb_table(esw); + if (err) { + esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err); + esw->offloads.encap = !encap; + (void) esw_create_offloads_fast_fdb_table(esw); + } + return err; +} + +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + *encap = esw->offloads.encap; + return 0; +} + void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, int vport_index, struct mlx5_eswitch_rep *__rep) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index b64a781c7e85..19e3d2fc2099 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -45,6 +45,10 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + ft->underlay_qpn == 0) + return 0; + MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); @@ -54,6 +58,10 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_SET(set_flow_table_root_in, in, other_vport, 1); } + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + ft->underlay_qpn != 0) + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, ft->underlay_qpn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -249,6 +257,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action); MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); @@ -515,3 +524,69 @@ void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id) mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)]; + int max_actions, actions_size, inlen, err; + void *actions_in; + u8 table_type; + u32 *in; + + switch (namespace) { + case MLX5_FLOW_NAMESPACE_FDB: + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions); + table_type = FS_FT_FDB; + break; + case MLX5_FLOW_NAMESPACE_KERNEL: + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_RX; + break; + default: + return -EOPNOTSUPP; + } + + if (num_actions > max_actions) { + mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n", + num_actions, max_actions); + return -EOPNOTSUPP; + } + + actions_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * num_actions; + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions); + + actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(actions_in, modify_actions, actions_size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + kfree(in); + return err; +} + +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_header_id); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index ded27bb9a3b6..b8a176503d38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -476,6 +476,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, fte->index = index; fte->action = flow_act->action; fte->encap_id = flow_act->encap_id; + fte->modify_id = flow_act->modify_id; return fte; } @@ -777,18 +778,16 @@ static void list_add_flow_table(struct mlx5_flow_table *ft, } static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr, enum fs_flow_table_op_mod op_mod, - u16 vport, int prio, - int max_fte, u32 level, - u32 flags) + u16 vport) { + struct mlx5_flow_root_namespace *root = find_root(&ns->node); struct mlx5_flow_table *next_ft = NULL; + struct fs_prio *fs_prio = NULL; struct mlx5_flow_table *ft; - int err; int log_table_sz; - struct mlx5_flow_root_namespace *root = - find_root(&ns->node); - struct fs_prio *fs_prio = NULL; + int err; if (!root) { pr_err("mlx5: flow steering failed to find root of namespace\n"); @@ -796,29 +795,31 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa } mutex_lock(&root->chain_lock); - fs_prio = find_prio(ns, prio); + fs_prio = find_prio(ns, ft_attr->prio); if (!fs_prio) { err = -EINVAL; goto unlock_root; } - if (level >= fs_prio->num_levels) { + if (ft_attr->level >= fs_prio->num_levels) { err = -ENOSPC; goto unlock_root; } /* The level is related to the * priority level range. */ - level += fs_prio->start_level; - ft = alloc_flow_table(level, + ft_attr->level += fs_prio->start_level; + ft = alloc_flow_table(ft_attr->level, vport, - max_fte ? roundup_pow_of_two(max_fte) : 0, + ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, root->table_type, - op_mod, flags); + op_mod, ft_attr->flags); if (!ft) { err = -ENOMEM; goto unlock_root; } + ft->underlay_qpn = ft_attr->underlay_qpn; + tree_init_node(&ft->node, 1, del_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); @@ -848,44 +849,56 @@ unlock_root: } struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, - int prio, int max_fte, - u32 level, - u32 flags) + struct mlx5_flow_table_attr *ft_attr) { - return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio, - max_fte, level, flags); + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0); } struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, u32 level, u16 vport) { - return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio, - max_fte, level, 0); + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.max_fte = max_fte; + ft_attr.level = level; + ft_attr.prio = prio; + + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, 0); } -struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( - struct mlx5_flow_namespace *ns, - int prio, u32 level) +struct mlx5_flow_table* +mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, + int prio, u32 level) { - return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0, - level, 0); + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.level = level; + ft_attr.prio = prio; + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); } EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); -struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - int max_num_groups, - u32 level, - u32 flags) +struct mlx5_flow_table* +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + int max_num_groups, + u32 level, + u32 flags) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; if (max_num_groups > num_flow_table_entries) return ERR_PTR(-EINVAL); - ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level, flags); + ft_attr.max_fte = num_flow_table_entries; + ft_attr.prio = prio; + ft_attr.level = level; + ft_attr.flags = flags; + + ft = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft)) return ft; @@ -1827,12 +1840,18 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) static int create_anchor_flow_table(struct mlx5_flow_steering *steering) { struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (WARN_ON(!ns)) return -EINVAL; - ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0); + + ft_attr.max_fte = ANCHOR_SIZE; + ft_attr.level = ANCHOR_LEVEL; + ft_attr.prio = ANCHOR_PRIO; + + ft = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft)) { mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); @@ -1886,9 +1905,6 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return; - cleanup_root_ns(steering->root_ns); cleanup_root_ns(steering->esw_egress_root_ns); cleanup_root_ns(steering->esw_ingress_root_ns); @@ -1991,9 +2007,6 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) struct mlx5_flow_steering *steering; int err = 0; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return 0; - err = mlx5_init_fc_stats(dev); if (err) return err; @@ -2004,7 +2017,10 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) steering->dev = dev; dev->priv.steering = steering; - if (MLX5_CAP_GEN(dev, nic_flow_table) && + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + (MLX5_CAP_GEN(dev, nic_flow_table))) || + ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) && MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { err = init_root_ns(steering); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 8e668c63f69e..81eafc7b9dd9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -118,6 +118,7 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; + u32 underlay_qpn; }; struct mlx5_fc_cache { @@ -152,6 +153,7 @@ struct fs_fte { u32 index; u32 action; u32 encap_id; + u32 modify_id; enum fs_fte_status status; struct mlx5_fc *counter; }; @@ -197,6 +199,11 @@ struct mlx5_flow_root_namespace { int mlx5_init_fc_stats(struct mlx5_core_dev *dev); void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay); +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval); int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 7431f633de31..6507d8acc54d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -165,7 +165,8 @@ static void mlx5_fc_stats_work(struct work_struct *work) list_splice_tail_init(&fc_stats->addlist, &tmplist); if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters)) - queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); + queue_delayed_work(fc_stats->wq, &fc_stats->work, + fc_stats->sampling_interval); spin_unlock(&fc_stats->addlist_lock); @@ -200,7 +201,7 @@ static void mlx5_fc_stats_work(struct work_struct *work) node = mlx5_fc_stats_query(dev, counter, last->id); } - fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; + fc_stats->next_query = now + fc_stats->sampling_interval; } struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) @@ -265,6 +266,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) if (!fc_stats->wq) return -ENOMEM; + fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); return 0; @@ -317,3 +319,21 @@ void mlx5_fc_query_cached(struct mlx5_fc *counter, counter->lastbytes = c.bytes; counter->lastpackets = c.packets; } + +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + queue_delayed_work(fc_stats->wq, dwork, delay); +} + +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + fc_stats->sampling_interval = min_t(unsigned long, interval, + fc_stats->sampling_interval); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index d0bbefa08af7..1bc14d0fded8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -137,7 +137,8 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (MLX5_CAP_GEN(dev, nic_flow_table)) { + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c new file mode 100644 index 000000000000..3c84e36af018 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/fs.h> +#include "en.h" +#include "ipoib.h" + +#define IB_DEFAULT_Q_KEY 0xb1b + +static int mlx5i_open(struct net_device *netdev); +static int mlx5i_close(struct net_device *netdev); +static int mlx5i_dev_init(struct net_device *dev); +static void mlx5i_dev_cleanup(struct net_device *dev); + +static const struct net_device_ops mlx5i_netdev_ops = { + .ndo_open = mlx5i_open, + .ndo_stop = mlx5i_close, + .ndo_init = mlx5i_dev_init, + .ndo_uninit = mlx5i_dev_cleanup, +}; + +/* IPoIB mlx5 netdev profile */ + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +static void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + + mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); + + mutex_init(&priv->state_lock); + + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + netdev->hw_features |= NETIF_F_RXHASH; + + netdev->netdev_ops = &mlx5i_netdev_ops; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_cleanup(struct mlx5e_priv *priv) +{ + /* Do nothing .. */ +} + +#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 + +static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +{ + struct mlx5_qp_context *context = NULL; + u32 *in = NULL; + void *addr_path; + int ret = 0; + int inlen; + void *qpc; + + inlen = MLX5_ST_SZ_BYTES(create_qp_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, + MLX5_QP_ENHANCED_ULP_STATELESS_MODE); + + addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, addr_path, port, 1); + MLX5_SET(ads, addr_path, grh, 1); + + ret = mlx5_core_create_qp(mdev, qp, in, inlen); + if (ret) { + mlx5_core_err(mdev, "Failed creating IPoIB QP err : %d\n", ret); + goto out; + } + + /* QP states */ + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) { + ret = -ENOMEM; + goto out; + } + + context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + context->pri_path.port = 1; + context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); + goto out; + } + memset(context, 0, sizeof(*context)); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); + goto out; + } + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); + goto out; + } + +out: + kfree(context); + kvfree(in); + return ret; +} + +static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +{ + mlx5_core_destroy_qp(mdev, qp); +} + +static int mlx5i_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp); + if (err) { + mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err); + return err; + } + + err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); + if (err) { + mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); + return err; + } + + return 0; +} + +static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5e_destroy_tis(priv->mdev, priv->tisn[0]); + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); +} + +static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + if (!priv->fs.ns) + return -EINVAL; + + err = mlx5e_arfs_create_tables(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } + + err = mlx5e_create_ttc_table(priv, ipriv->qp.qpn); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + + return 0; + +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv); + + return err; +} + +static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) +{ + mlx5e_destroy_ttc_table(priv); + mlx5e_arfs_destroy_tables(priv); +} + +static int mlx5i_init_rx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_indirect_rqt(priv); + if (err) + return err; + + err = mlx5e_create_direct_rqts(priv); + if (err) + goto err_destroy_indirect_rqts; + + err = mlx5e_create_indirect_tirs(priv); + if (err) + goto err_destroy_direct_rqts; + + err = mlx5e_create_direct_tirs(priv); + if (err) + goto err_destroy_indirect_tirs; + + err = mlx5i_create_flow_steering(priv); + if (err) + goto err_destroy_direct_tirs; + + return 0; + +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv); +err_destroy_direct_rqts: + mlx5e_destroy_direct_rqts(priv); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + return err; +} + +static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) +{ + mlx5i_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +} + +static const struct mlx5e_profile mlx5i_nic_profile = { + .init = mlx5i_init, + .cleanup = mlx5i_cleanup, + .init_tx = mlx5i_init_tx, + .cleanup_tx = mlx5i_cleanup_tx, + .init_rx = mlx5i_init_rx, + .cleanup_rx = mlx5i_cleanup_rx, + .enable = NULL, /* mlx5i_enable */ + .disable = NULL, /* mlx5i_disable */ + .update_stats = NULL, /* mlx5i_update_stats */ + .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ + .max_tc = MLX5I_MAX_NUM_TC, +}; + +/* mlx5i netdev NDos */ + +static int mlx5i_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; + + /* Set dev address using underlay QP */ + dev->dev_addr[1] = (ipriv->qp.qpn >> 16) & 0xff; + dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff; + dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff; + + return 0; +} + +static void mlx5i_dev_cleanup(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_qp_context context; + + /* detach qp from flow-steering by reset it */ + mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, &ipriv->qp); +} + +static int mlx5i_open(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &priv->state); + + err = mlx5e_open_channels(priv, &priv->channels); + if (err) + goto err_clear_state_opened_flag; + + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); + mutex_unlock(&priv->state_lock); + return 0; + +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &priv->state); + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5i_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); +unlock: + mutex_unlock(&priv->state_lock); + return 0; +} + +#ifdef notusedyet +/* IPoIB RDMA netdev callbacks */ +static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid, int set_qkey) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); + err = mlx5_core_attach_mcg(mdev, gid, ipriv->qp.qpn); + if (err) + mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n", + ipriv->qp.qpn, gid->raw); + + return err; +} + +static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); + + err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn); + if (err) + mlx5_core_dbg(mdev, "failed dettaching QPN 0x%x, MGID %pI6\n", + ipriv->qp.qpn, gid->raw); + + return err; +} + +static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, + struct ib_ah *address, u32 dqpn, u32 dqkey) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)]; + struct mlx5_ib_ah *mah = to_mah(address); + + return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, dqkey); +} +#endif + +static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + return -EOPNOTSUPP; + + if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) { + mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n"); + return -ENOTSUPP; + } + + return 0; +} + +static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)) +{ + const struct mlx5e_profile *profile = &mlx5i_nic_profile; + int nch = profile->max_nch(mdev); + struct net_device *netdev; + struct mlx5i_priv *ipriv; + struct mlx5e_priv *epriv; + int err; + + if (mlx5i_check_required_hca_cap(mdev)) { + mlx5_core_warn(mdev, "Accelerated mode is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + /* This function should only be called once per mdev */ + err = mlx5e_create_mdev_resources(mdev); + if (err) + return NULL; + + netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv), + name, NET_NAME_UNKNOWN, + setup, + nch * MLX5E_MAX_NUM_TC, + nch); + if (!netdev) { + mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n"); + goto free_mdev_resources; + } + + ipriv = netdev_priv(netdev); + epriv = mlx5i_epriv(netdev); + + epriv->wq = create_singlethread_workqueue("mlx5i"); + if (!epriv->wq) + goto err_free_netdev; + + profile->init(mdev, netdev, profile, ipriv); + + mlx5e_attach_netdev(epriv); + netif_carrier_off(netdev); + + /* TODO: set rdma_netdev func pointers + * rn = &ipriv->rn; + * rn->hca = ibdev; + * rn->send = mlx5i_xmit; + * rn->attach_mcast = mlx5i_attach_mcast; + * rn->detach_mcast = mlx5i_detach_mcast; + */ + return netdev; + +err_free_netdev: + free_netdev(netdev); +free_mdev_resources: + mlx5e_destroy_mdev_resources(mdev); + + return NULL; +} +EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); + +static void mlx5_rdma_netdev_free(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + const struct mlx5e_profile *profile = priv->profile; + + mlx5e_detach_netdev(priv); + profile->cleanup(priv); + destroy_workqueue(priv->wq); + free_netdev(netdev); + + mlx5e_destroy_mdev_resources(priv->mdev); +} +EXPORT_SYMBOL(mlx5_rdma_netdev_free); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h new file mode 100644 index 000000000000..bae0a5cbc8ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_IPOB_H__ +#define __MLX5E_IPOB_H__ + +#include <linux/mlx5/fs.h> +#include "en.h" + +#define MLX5I_MAX_NUM_TC 1 + +/* ipoib rdma netdev's private data structure */ +struct mlx5i_priv { + struct mlx5_core_qp qp; + char *mlx5e_priv[0]; +}; + +/* Extract mlx5e_priv from IPoIB netdev */ +#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) + +netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey); +void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +#endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0ad66324247f..0c123d571b4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1280,6 +1280,8 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, + .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, + .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, #endif }; @@ -1514,8 +1516,10 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ - { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5, PCIe 4.0 */ - { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5, PCIe 4.0 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */ + { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ + { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ + { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b3dabe6e8836..fbc6e9e9e305 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -141,6 +141,11 @@ int mlx5_encap_alloc(struct mlx5_core_dev *dev, u32 *encap_id); void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id); +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id); + bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 6b6c30deee83..2fb8c6585ac7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -15,7 +15,8 @@ obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o spectrum_router.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ - spectrum_acl.o spectrum_flower.o + spectrum_acl.o spectrum_flower.o \ + spectrum_cnt.o spectrum_dpipe.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o mlxsw_minimal-objs := minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index a1b48421648a..479511cf79bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -1043,13 +1043,6 @@ MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cv, 0x00, 28, 4); */ MLXSW_ITEM32(cmd_mbox, sw2hw_cq, c_eqn, 0x00, 24, 1); -/* cmd_mbox_sw2hw_cq_oi - * When set, overrun ignore is enabled. When set, updates of - * CQ consumer counter (poll for completion) or Request completion - * notifications (Arm CQ) DoorBells should not be rung on that CQ. - */ -MLXSW_ITEM32(cmd_mbox, sw2hw_cq, oi, 0x00, 12, 1); - /* cmd_mbox_sw2hw_cq_st * Event delivery state machine * 0x0 - FIRED @@ -1132,11 +1125,6 @@ static inline int mlxsw_cmd_sw2hw_eq(struct mlxsw_core *mlxsw_core, */ MLXSW_ITEM32(cmd_mbox, sw2hw_eq, int_msix, 0x00, 24, 1); -/* cmd_mbox_sw2hw_eq_oi - * When set, overrun ignore is enabled. - */ -MLXSW_ITEM32(cmd_mbox, sw2hw_eq, oi, 0x00, 12, 1); - /* cmd_mbox_sw2hw_eq_st * Event delivery state machine * 0x0 - FIRED diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index a4c07841aaf6..affe84eb4bff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -40,9 +40,6 @@ #include <linux/export.h> #include <linux/err.h> #include <linux/if_link.h> -#include <linux/debugfs.h> -#include <linux/seq_file.h> -#include <linux/u64_stats_sync.h> #include <linux/netdevice.h> #include <linux/completion.h> #include <linux/skbuff.h> @@ -74,23 +71,9 @@ static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock); static const char mlxsw_core_driver_name[] = "mlxsw_core"; -static struct dentry *mlxsw_core_dbg_root; - static struct workqueue_struct *mlxsw_wq; static struct workqueue_struct *mlxsw_owq; -struct mlxsw_core_pcpu_stats { - u64 trap_rx_packets[MLXSW_TRAP_ID_MAX]; - u64 trap_rx_bytes[MLXSW_TRAP_ID_MAX]; - u64 port_rx_packets[MLXSW_PORT_MAX_PORTS]; - u64 port_rx_bytes[MLXSW_PORT_MAX_PORTS]; - struct u64_stats_sync syncp; - u32 trap_rx_dropped[MLXSW_TRAP_ID_MAX]; - u32 port_rx_dropped[MLXSW_PORT_MAX_PORTS]; - u32 trap_rx_invalid; - u32 port_rx_invalid; -}; - struct mlxsw_core_port { struct devlink_port devlink_port; void *port_driver_priv; @@ -121,23 +104,48 @@ struct mlxsw_core { spinlock_t trans_list_lock; /* protects trans_list writes */ bool use_emad; } emad; - struct mlxsw_core_pcpu_stats __percpu *pcpu_stats; - struct dentry *dbg_dir; - struct { - struct debugfs_blob_wrapper vsd_blob; - struct debugfs_blob_wrapper psid_blob; - } dbg; struct { u8 *mapping; /* lag_id+port_index to local_port mapping */ } lag; struct mlxsw_res res; struct mlxsw_hwmon *hwmon; struct mlxsw_thermal *thermal; - struct mlxsw_core_port ports[MLXSW_PORT_MAX_PORTS]; + struct mlxsw_core_port *ports; + unsigned int max_ports; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; +#define MLXSW_PORT_MAX_PORTS_DEFAULT 0x40 + +static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core) +{ + /* Switch ports are numbered from 1 to queried value */ + if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SYSTEM_PORT)) + mlxsw_core->max_ports = MLXSW_CORE_RES_GET(mlxsw_core, + MAX_SYSTEM_PORT) + 1; + else + mlxsw_core->max_ports = MLXSW_PORT_MAX_PORTS_DEFAULT + 1; + + mlxsw_core->ports = kcalloc(mlxsw_core->max_ports, + sizeof(struct mlxsw_core_port), GFP_KERNEL); + if (!mlxsw_core->ports) + return -ENOMEM; + + return 0; +} + +static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core) +{ + kfree(mlxsw_core->ports); +} + +unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->max_ports; +} +EXPORT_SYMBOL(mlxsw_core_max_ports); + void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) { return mlxsw_core->driver_priv; @@ -703,91 +711,6 @@ err_out: * Core functions *****************/ -static int mlxsw_core_rx_stats_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_core *mlxsw_core = file->private; - struct mlxsw_core_pcpu_stats *p; - u64 rx_packets, rx_bytes; - u64 tmp_rx_packets, tmp_rx_bytes; - u32 rx_dropped, rx_invalid; - unsigned int start; - int i; - int j; - static const char hdr[] = - " NUM RX_PACKETS RX_BYTES RX_DROPPED\n"; - - seq_printf(file, hdr); - for (i = 0; i < MLXSW_TRAP_ID_MAX; i++) { - rx_packets = 0; - rx_bytes = 0; - rx_dropped = 0; - for_each_possible_cpu(j) { - p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); - do { - start = u64_stats_fetch_begin(&p->syncp); - tmp_rx_packets = p->trap_rx_packets[i]; - tmp_rx_bytes = p->trap_rx_bytes[i]; - } while (u64_stats_fetch_retry(&p->syncp, start)); - - rx_packets += tmp_rx_packets; - rx_bytes += tmp_rx_bytes; - rx_dropped += p->trap_rx_dropped[i]; - } - seq_printf(file, "trap %3d %12llu %12llu %10u\n", - i, rx_packets, rx_bytes, rx_dropped); - } - rx_invalid = 0; - for_each_possible_cpu(j) { - p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); - rx_invalid += p->trap_rx_invalid; - } - seq_printf(file, "trap INV %10u\n", - rx_invalid); - - for (i = 0; i < MLXSW_PORT_MAX_PORTS; i++) { - rx_packets = 0; - rx_bytes = 0; - rx_dropped = 0; - for_each_possible_cpu(j) { - p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); - do { - start = u64_stats_fetch_begin(&p->syncp); - tmp_rx_packets = p->port_rx_packets[i]; - tmp_rx_bytes = p->port_rx_bytes[i]; - } while (u64_stats_fetch_retry(&p->syncp, start)); - - rx_packets += tmp_rx_packets; - rx_bytes += tmp_rx_bytes; - rx_dropped += p->port_rx_dropped[i]; - } - seq_printf(file, "port %3d %12llu %12llu %10u\n", - i, rx_packets, rx_bytes, rx_dropped); - } - rx_invalid = 0; - for_each_possible_cpu(j) { - p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); - rx_invalid += p->port_rx_invalid; - } - seq_printf(file, "port INV %10u\n", - rx_invalid); - return 0; -} - -static int mlxsw_core_rx_stats_dbg_open(struct inode *inode, struct file *f) -{ - struct mlxsw_core *mlxsw_core = inode->i_private; - - return single_open(f, mlxsw_core_rx_stats_dbg_read, mlxsw_core); -} - -static const struct file_operations mlxsw_core_rx_stats_dbg_ops = { - .owner = THIS_MODULE, - .open = mlxsw_core_rx_stats_dbg_open, - .release = single_release, - .read = seq_read, - .llseek = seq_lseek -}; - int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver) { spin_lock(&mlxsw_core_driver_list_lock); @@ -835,39 +758,13 @@ static void mlxsw_core_driver_put(const char *kind) spin_unlock(&mlxsw_core_driver_list_lock); } -static int mlxsw_core_debugfs_init(struct mlxsw_core *mlxsw_core) -{ - const struct mlxsw_bus_info *bus_info = mlxsw_core->bus_info; - - mlxsw_core->dbg_dir = debugfs_create_dir(bus_info->device_name, - mlxsw_core_dbg_root); - if (!mlxsw_core->dbg_dir) - return -ENOMEM; - debugfs_create_file("rx_stats", S_IRUGO, mlxsw_core->dbg_dir, - mlxsw_core, &mlxsw_core_rx_stats_dbg_ops); - mlxsw_core->dbg.vsd_blob.data = (void *) &bus_info->vsd; - mlxsw_core->dbg.vsd_blob.size = sizeof(bus_info->vsd); - debugfs_create_blob("vsd", S_IRUGO, mlxsw_core->dbg_dir, - &mlxsw_core->dbg.vsd_blob); - mlxsw_core->dbg.psid_blob.data = (void *) &bus_info->psid; - mlxsw_core->dbg.psid_blob.size = sizeof(bus_info->psid); - debugfs_create_blob("psid", S_IRUGO, mlxsw_core->dbg_dir, - &mlxsw_core->dbg.psid_blob); - return 0; -} - -static void mlxsw_core_debugfs_fini(struct mlxsw_core *mlxsw_core) -{ - debugfs_remove_recursive(mlxsw_core->dbg_dir); -} - static int mlxsw_devlink_port_split(struct devlink *devlink, unsigned int port_index, unsigned int count) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - if (port_index >= MLXSW_PORT_MAX_PORTS) + if (port_index >= mlxsw_core->max_ports) return -EINVAL; if (!mlxsw_core->driver->port_split) return -EOPNOTSUPP; @@ -879,7 +776,7 @@ static int mlxsw_devlink_port_unsplit(struct devlink *devlink, { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - if (port_index >= MLXSW_PORT_MAX_PORTS) + if (port_index >= mlxsw_core->max_ports) return -EINVAL; if (!mlxsw_core->driver->port_unsplit) return -EOPNOTSUPP; @@ -1101,18 +998,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, mlxsw_core->bus_priv = bus_priv; mlxsw_core->bus_info = mlxsw_bus_info; - mlxsw_core->pcpu_stats = - netdev_alloc_pcpu_stats(struct mlxsw_core_pcpu_stats); - if (!mlxsw_core->pcpu_stats) { - err = -ENOMEM; - goto err_alloc_stats; - } - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, &mlxsw_core->res); if (err) goto err_bus_init; + err = mlxsw_ports_init(mlxsw_core); + if (err) + goto err_ports_init; + if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG) && MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG_MEMBERS)) { alloc_size = sizeof(u8) * @@ -1148,15 +1042,8 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_driver_init; } - err = mlxsw_core_debugfs_init(mlxsw_core); - if (err) - goto err_debugfs_init; - return 0; -err_debugfs_init: - if (mlxsw_core->driver->fini) - mlxsw_core->driver->fini(mlxsw_core); err_driver_init: mlxsw_thermal_fini(mlxsw_core->thermal); err_thermal_init: @@ -1167,10 +1054,10 @@ err_devlink_register: err_emad_init: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: + mlxsw_ports_fini(mlxsw_core); +err_ports_init: mlxsw_bus->fini(bus_priv); err_bus_init: - free_percpu(mlxsw_core->pcpu_stats); -err_alloc_stats: devlink_free(devlink); err_devlink_alloc: mlxsw_core_driver_put(device_kind); @@ -1183,15 +1070,14 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) const char *device_kind = mlxsw_core->bus_info->device_kind; struct devlink *devlink = priv_to_devlink(mlxsw_core); - mlxsw_core_debugfs_fini(mlxsw_core); if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); mlxsw_thermal_fini(mlxsw_core->thermal); devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); kfree(mlxsw_core->lag.mapping); + mlxsw_ports_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); - free_percpu(mlxsw_core->pcpu_stats); devlink_free(devlink); mlxsw_core_driver_put(device_kind); } @@ -1639,7 +1525,6 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, { struct mlxsw_rx_listener_item *rxl_item; const struct mlxsw_rx_listener *rxl; - struct mlxsw_core_pcpu_stats *pcpu_stats; u8 local_port; bool found = false; @@ -1661,7 +1546,7 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, __func__, local_port, rx_info->trap_id); if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) || - (local_port >= MLXSW_PORT_MAX_PORTS)) + (local_port >= mlxsw_core->max_ports)) goto drop; rcu_read_lock(); @@ -1678,26 +1563,10 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, if (!found) goto drop; - pcpu_stats = this_cpu_ptr(mlxsw_core->pcpu_stats); - u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->port_rx_packets[local_port]++; - pcpu_stats->port_rx_bytes[local_port] += skb->len; - pcpu_stats->trap_rx_packets[rx_info->trap_id]++; - pcpu_stats->trap_rx_bytes[rx_info->trap_id] += skb->len; - u64_stats_update_end(&pcpu_stats->syncp); - rxl->func(skb, local_port, rxl_item->priv); return; drop: - if (rx_info->trap_id >= MLXSW_TRAP_ID_MAX) - this_cpu_inc(mlxsw_core->pcpu_stats->trap_rx_invalid); - else - this_cpu_inc(mlxsw_core->pcpu_stats->trap_rx_dropped[rx_info->trap_id]); - if (local_port >= MLXSW_PORT_MAX_PORTS) - this_cpu_inc(mlxsw_core->pcpu_stats->port_rx_invalid); - else - this_cpu_inc(mlxsw_core->pcpu_stats->port_rx_dropped[local_port]); dev_kfree_skb(skb); } EXPORT_SYMBOL(mlxsw_core_skb_receive); @@ -1926,15 +1795,8 @@ static int __init mlxsw_core_module_init(void) err = -ENOMEM; goto err_alloc_ordered_workqueue; } - mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); - if (!mlxsw_core_dbg_root) { - err = -ENOMEM; - goto err_debugfs_create_dir; - } return 0; -err_debugfs_create_dir: - destroy_workqueue(mlxsw_owq); err_alloc_ordered_workqueue: destroy_workqueue(mlxsw_wq); return err; @@ -1942,7 +1804,6 @@ err_alloc_ordered_workqueue: static void __exit mlxsw_core_module_exit(void) { - debugfs_remove_recursive(mlxsw_core_dbg_root); destroy_workqueue(mlxsw_owq); destroy_workqueue(mlxsw_wq); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index cf38cf9027f8..7fb35395adf5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -57,6 +57,8 @@ struct mlxsw_driver; struct mlxsw_bus; struct mlxsw_bus_info; +unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); + void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 5f337715a4da..46304ffb9449 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -567,6 +567,89 @@ static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block, return oneact + MLXSW_AFA_PAYLOAD_OFFSET; } +/* VLAN Action + * ----------- + * VLAN action is used for manipulating VLANs. It can be used to implement QinQ, + * VLAN translation, change of PCP bits of the VLAN tag, push, pop as swap VLANs + * and more. + */ + +#define MLXSW_AFA_VLAN_CODE 0x02 +#define MLXSW_AFA_VLAN_SIZE 1 + +enum mlxsw_afa_vlan_vlan_tag_cmd { + MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, + MLXSW_AFA_VLAN_VLAN_TAG_CMD_PUSH_TAG, + MLXSW_AFA_VLAN_VLAN_TAG_CMD_POP_TAG, +}; + +enum mlxsw_afa_vlan_cmd { + MLXSW_AFA_VLAN_CMD_NOP, + MLXSW_AFA_VLAN_CMD_SET_OUTER, + MLXSW_AFA_VLAN_CMD_SET_INNER, + MLXSW_AFA_VLAN_CMD_COPY_OUTER_TO_INNER, + MLXSW_AFA_VLAN_CMD_COPY_INNER_TO_OUTER, + MLXSW_AFA_VLAN_CMD_SWAP, +}; + +/* afa_vlan_vlan_tag_cmd + * Tag command: push, pop, nop VLAN header. + */ +MLXSW_ITEM32(afa, vlan, vlan_tag_cmd, 0x00, 29, 3); + +/* afa_vlan_vid_cmd */ +MLXSW_ITEM32(afa, vlan, vid_cmd, 0x04, 29, 3); + +/* afa_vlan_vid */ +MLXSW_ITEM32(afa, vlan, vid, 0x04, 0, 12); + +/* afa_vlan_ethertype_cmd */ +MLXSW_ITEM32(afa, vlan, ethertype_cmd, 0x08, 29, 3); + +/* afa_vlan_ethertype + * Index to EtherTypes in Switch VLAN EtherType Register (SVER). + */ +MLXSW_ITEM32(afa, vlan, ethertype, 0x08, 24, 3); + +/* afa_vlan_pcp_cmd */ +MLXSW_ITEM32(afa, vlan, pcp_cmd, 0x08, 13, 3); + +/* afa_vlan_pcp */ +MLXSW_ITEM32(afa, vlan, pcp, 0x08, 8, 3); + +static inline void +mlxsw_afa_vlan_pack(char *payload, + enum mlxsw_afa_vlan_vlan_tag_cmd vlan_tag_cmd, + enum mlxsw_afa_vlan_cmd vid_cmd, u16 vid, + enum mlxsw_afa_vlan_cmd pcp_cmd, u8 pcp, + enum mlxsw_afa_vlan_cmd ethertype_cmd, u8 ethertype) +{ + mlxsw_afa_vlan_vlan_tag_cmd_set(payload, vlan_tag_cmd); + mlxsw_afa_vlan_vid_cmd_set(payload, vid_cmd); + mlxsw_afa_vlan_vid_set(payload, vid); + mlxsw_afa_vlan_pcp_cmd_set(payload, pcp_cmd); + mlxsw_afa_vlan_pcp_set(payload, pcp); + mlxsw_afa_vlan_ethertype_cmd_set(payload, ethertype_cmd); + mlxsw_afa_vlan_ethertype_set(payload, ethertype); +} + +int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, + u16 vid, u8 pcp, u8 et) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_VLAN_CODE, + MLXSW_AFA_VLAN_SIZE); + + if (!act) + return -ENOBUFS; + mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, + MLXSW_AFA_VLAN_CMD_SET_OUTER, vid, + MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp, + MLXSW_AFA_VLAN_CMD_SET_OUTER, et); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_vlan_modify); + /* Trap / Discard Action * --------------------- * The Trap / Discard action enables trapping / mirroring packets to the CPU @@ -677,3 +760,98 @@ err_append_action: return err; } EXPORT_SYMBOL(mlxsw_afa_block_append_fwd); + +/* Policing and Counting Action + * ---------------------------- + * Policing and Counting action is used for binding policer and counter + * to ACL rules. + */ + +#define MLXSW_AFA_POLCNT_CODE 0x08 +#define MLXSW_AFA_POLCNT_SIZE 1 + +enum mlxsw_afa_polcnt_counter_set_type { + /* No count */ + MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* afa_polcnt_counter_set_type + * Counter set type for flow counters. + */ +MLXSW_ITEM32(afa, polcnt, counter_set_type, 0x04, 24, 8); + +/* afa_polcnt_counter_index + * Counter index for flow counters. + */ +MLXSW_ITEM32(afa, polcnt, counter_index, 0x04, 0, 24); + +static inline void +mlxsw_afa_polcnt_pack(char *payload, + enum mlxsw_afa_polcnt_counter_set_type set_type, + u32 counter_index) +{ + mlxsw_afa_polcnt_counter_set_type_set(payload, set_type); + mlxsw_afa_polcnt_counter_index_set(payload, counter_index); +} + +int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, + u32 counter_index) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_POLCNT_CODE, + MLXSW_AFA_POLCNT_SIZE); + if (!act) + return -ENOBUFS; + mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES, + counter_index); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_counter); + +/* Virtual Router and Forwarding Domain Action + * ------------------------------------------- + * Virtual Switch action is used for manipulate the Virtual Router (VR), + * MPLS label space and the Forwarding Identifier (FID). + */ + +#define MLXSW_AFA_VIRFWD_CODE 0x0E +#define MLXSW_AFA_VIRFWD_SIZE 1 + +enum mlxsw_afa_virfwd_fid_cmd { + /* Do nothing */ + MLXSW_AFA_VIRFWD_FID_CMD_NOOP, + /* Set the Forwarding Identifier (FID) to fid */ + MLXSW_AFA_VIRFWD_FID_CMD_SET, +}; + +/* afa_virfwd_fid_cmd */ +MLXSW_ITEM32(afa, virfwd, fid_cmd, 0x08, 29, 3); + +/* afa_virfwd_fid + * The FID value. + */ +MLXSW_ITEM32(afa, virfwd, fid, 0x08, 0, 16); + +static inline void mlxsw_afa_virfwd_pack(char *payload, + enum mlxsw_afa_virfwd_fid_cmd fid_cmd, + u16 fid) +{ + mlxsw_afa_virfwd_fid_cmd_set(payload, fid_cmd); + mlxsw_afa_virfwd_fid_set(payload, fid); +} + +int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_VIRFWD_CODE, + MLXSW_AFA_VIRFWD_SIZE); + if (!act) + return -ENOBUFS; + mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 43f78dcfe394..bd8b91d02880 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -62,5 +62,10 @@ void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); +int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, + u16 vid, u8 pcp, u8 et); +int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, + u32 counter_index); +int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index e4fcba7c2af2..c75e9141e3ec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -54,6 +54,8 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_DST_IP6_LO, MLXSW_AFK_ELEMENT_DST_L4_PORT, MLXSW_AFK_ELEMENT_SRC_L4_PORT, + MLXSW_AFK_ELEMENT_VID, + MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_MAX, }; @@ -88,7 +90,7 @@ struct mlxsw_afk_element_info { MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \ _element, _offset, 0, _size) -/* For the purpose of the driver, define a internal storage scratchpad +/* For the purpose of the driver, define an internal storage scratchpad * that will be used to store key/mask values. For each defined element type * define an internal storage geometry. */ @@ -98,6 +100,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_BUF(SMAC, 0x0A, 6), MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16), MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), + MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32), MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8), diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index a223c85dfde0..23f7d828cf67 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -44,8 +44,6 @@ #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <linux/log2.h> -#include <linux/debugfs.h> -#include <linux/seq_file.h> #include <linux/string.h> #include "pci_hw.h" @@ -57,8 +55,6 @@ static const char mlxsw_pci_driver_name[] = "mlxsw_pci"; -static struct dentry *mlxsw_pci_dbg_root; - #define mlxsw_pci_write32(mlxsw_pci, reg, val) \ iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg)) #define mlxsw_pci_read32(mlxsw_pci, reg) \ @@ -71,21 +67,6 @@ enum mlxsw_pci_queue_type { MLXSW_PCI_QUEUE_TYPE_EQ, }; -static const char *mlxsw_pci_queue_type_str(enum mlxsw_pci_queue_type q_type) -{ - switch (q_type) { - case MLXSW_PCI_QUEUE_TYPE_SDQ: - return "sdq"; - case MLXSW_PCI_QUEUE_TYPE_RDQ: - return "rdq"; - case MLXSW_PCI_QUEUE_TYPE_CQ: - return "cq"; - case MLXSW_PCI_QUEUE_TYPE_EQ: - return "eq"; - } - BUG(); -} - #define MLXSW_PCI_QUEUE_TYPE_COUNT 4 static const u16 mlxsw_pci_doorbell_type_offset[] = { @@ -155,7 +136,6 @@ struct mlxsw_pci { u8 __iomem *hw_addr; struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT]; u32 doorbell_offset; - struct msix_entry msix_entry; struct mlxsw_core *core; struct { struct mlxsw_pci_mem_item *items; @@ -174,7 +154,6 @@ struct mlxsw_pci { } comp; } cmd; struct mlxsw_bus_info bus_info; - struct dentry *dbg_dir; }; static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q) @@ -261,21 +240,11 @@ static u8 mlxsw_pci_sdq_count(struct mlxsw_pci *mlxsw_pci) return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ); } -static u8 mlxsw_pci_rdq_count(struct mlxsw_pci *mlxsw_pci) -{ - return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_RDQ); -} - static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci) { return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ); } -static u8 mlxsw_pci_eq_count(struct mlxsw_pci *mlxsw_pci) -{ - return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ); -} - static struct mlxsw_pci_queue * __mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci, enum mlxsw_pci_queue_type q_type, u8 q_num) @@ -390,26 +359,6 @@ static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num); } -static int mlxsw_pci_sdq_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); - struct mlxsw_pci_queue *q; - int i; - static const char hdr[] = - "NUM PROD_COUNT CONS_COUNT COUNT\n"; - - seq_printf(file, hdr); - for (i = 0; i < mlxsw_pci_sdq_count(mlxsw_pci); i++) { - q = mlxsw_pci_sdq_get(mlxsw_pci, i); - spin_lock_bh(&q->lock); - seq_printf(file, "%3d %10d %10d %5d\n", - i, q->producer_counter, q->consumer_counter, - q->count); - spin_unlock_bh(&q->lock); - } - return 0; -} - static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe, int index, char *frag_data, size_t frag_len, int direction) @@ -544,26 +493,6 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci, } } -static int mlxsw_pci_rdq_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); - struct mlxsw_pci_queue *q; - int i; - static const char hdr[] = - "NUM PROD_COUNT CONS_COUNT COUNT\n"; - - seq_printf(file, hdr); - for (i = 0; i < mlxsw_pci_rdq_count(mlxsw_pci); i++) { - q = mlxsw_pci_rdq_get(mlxsw_pci, i); - spin_lock_bh(&q->lock); - seq_printf(file, "%3d %10d %10d %5d\n", - i, q->producer_counter, q->consumer_counter, - q->count); - spin_unlock_bh(&q->lock); - } - return 0; -} - static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { @@ -580,7 +509,6 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_sw2hw_cq_cv_set(mbox, 0); /* CQE ver 0 */ mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM); - mlxsw_cmd_mbox_sw2hw_cq_oi_set(mbox, 0); mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0); mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count)); for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { @@ -602,27 +530,6 @@ static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num); } -static int mlxsw_pci_cq_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); - - struct mlxsw_pci_queue *q; - int i; - static const char hdr[] = - "NUM CONS_INDEX SDQ_COUNT RDQ_COUNT COUNT\n"; - - seq_printf(file, hdr); - for (i = 0; i < mlxsw_pci_cq_count(mlxsw_pci); i++) { - q = mlxsw_pci_cq_get(mlxsw_pci, i); - spin_lock_bh(&q->lock); - seq_printf(file, "%3d %10d %10d %10d %5d\n", - i, q->consumer_counter, q->u.cq.comp_sdq_count, - q->u.cq.comp_rdq_count, q->count); - spin_unlock_bh(&q->lock); - } - return 0; -} - static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, @@ -755,7 +662,6 @@ static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, } mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */ - mlxsw_cmd_mbox_sw2hw_eq_oi_set(mbox, 0); mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */ mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count)); for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { @@ -777,27 +683,6 @@ static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num); } -static int mlxsw_pci_eq_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); - struct mlxsw_pci_queue *q; - int i; - static const char hdr[] = - "NUM CONS_COUNT EV_CMD EV_COMP EV_OTHER COUNT\n"; - - seq_printf(file, hdr); - for (i = 0; i < mlxsw_pci_eq_count(mlxsw_pci); i++) { - q = mlxsw_pci_eq_get(mlxsw_pci, i); - spin_lock_bh(&q->lock); - seq_printf(file, "%3d %10d %10d %10d %10d %5d\n", - i, q->consumer_counter, q->u.eq.ev_cmd_count, - q->u.eq.ev_comp_count, q->u.eq.ev_other_count, - q->count); - spin_unlock_bh(&q->lock); - } - return 0; -} - static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe) { mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe); @@ -868,7 +753,6 @@ struct mlxsw_pci_queue_ops { void (*fini)(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q); void (*tasklet)(unsigned long data); - int (*dbg_read)(struct seq_file *s, void *data); u16 elem_count; u8 elem_size; }; @@ -877,7 +761,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_sdq_ops = { .type = MLXSW_PCI_QUEUE_TYPE_SDQ, .init = mlxsw_pci_sdq_init, .fini = mlxsw_pci_sdq_fini, - .dbg_read = mlxsw_pci_sdq_dbg_read, .elem_count = MLXSW_PCI_WQE_COUNT, .elem_size = MLXSW_PCI_WQE_SIZE, }; @@ -886,7 +769,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = { .type = MLXSW_PCI_QUEUE_TYPE_RDQ, .init = mlxsw_pci_rdq_init, .fini = mlxsw_pci_rdq_fini, - .dbg_read = mlxsw_pci_rdq_dbg_read, .elem_count = MLXSW_PCI_WQE_COUNT, .elem_size = MLXSW_PCI_WQE_SIZE }; @@ -896,7 +778,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = { .init = mlxsw_pci_cq_init, .fini = mlxsw_pci_cq_fini, .tasklet = mlxsw_pci_cq_tasklet, - .dbg_read = mlxsw_pci_cq_dbg_read, .elem_count = MLXSW_PCI_CQE_COUNT, .elem_size = MLXSW_PCI_CQE_SIZE }; @@ -906,7 +787,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = { .init = mlxsw_pci_eq_init, .fini = mlxsw_pci_eq_fini, .tasklet = mlxsw_pci_eq_tasklet, - .dbg_read = mlxsw_pci_eq_dbg_read, .elem_count = MLXSW_PCI_EQE_COUNT, .elem_size = MLXSW_PCI_EQE_SIZE }; @@ -984,9 +864,7 @@ static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox, const struct mlxsw_pci_queue_ops *q_ops, u8 num_qs) { - struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_type_group *queue_group; - char tmp[16]; int i; int err; @@ -1003,10 +881,6 @@ static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox, } queue_group->count = num_qs; - sprintf(tmp, "%s_stats", mlxsw_pci_queue_type_str(q_ops->type)); - debugfs_create_devm_seqfile(&pdev->dev, tmp, mlxsw_pci->dbg_dir, - q_ops->dbg_read); - return 0; err_queue_init: @@ -1534,7 +1408,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_aqs_init; - err = request_irq(mlxsw_pci->msix_entry.vector, + err = request_irq(pci_irq_vector(pdev, 0), mlxsw_pci_eq_irq_handler, 0, mlxsw_pci->bus_info.device_kind, mlxsw_pci); if (err) { @@ -1567,7 +1441,7 @@ static void mlxsw_pci_fini(void *bus_priv) { struct mlxsw_pci *mlxsw_pci = bus_priv; - free_irq(mlxsw_pci->msix_entry.vector, mlxsw_pci); + free_irq(pci_irq_vector(mlxsw_pci->pdev, 0), mlxsw_pci); mlxsw_pci_aqs_fini(mlxsw_pci); mlxsw_pci_fw_area_fini(mlxsw_pci); mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); @@ -1842,8 +1716,8 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_sw_reset; } - err = pci_enable_msix_exact(pdev, &mlxsw_pci->msix_entry, 1); - if (err) { + err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX); + if (err < 0) { dev_err(&pdev->dev, "MSI-X init failed\n"); goto err_msix_init; } @@ -1852,14 +1726,6 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev); mlxsw_pci->bus_info.dev = &pdev->dev; - mlxsw_pci->dbg_dir = debugfs_create_dir(mlxsw_pci->bus_info.device_name, - mlxsw_pci_dbg_root); - if (!mlxsw_pci->dbg_dir) { - dev_err(&pdev->dev, "Failed to create debugfs dir\n"); - err = -ENOMEM; - goto err_dbg_create_dir; - } - err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus, mlxsw_pci); if (err) { @@ -1870,9 +1736,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_bus_device_register: - debugfs_remove_recursive(mlxsw_pci->dbg_dir); -err_dbg_create_dir: - pci_disable_msix(mlxsw_pci->pdev); + pci_free_irq_vectors(mlxsw_pci->pdev); err_msix_init: err_sw_reset: iounmap(mlxsw_pci->hw_addr); @@ -1892,8 +1756,7 @@ static void mlxsw_pci_remove(struct pci_dev *pdev) struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev); mlxsw_core_bus_device_unregister(mlxsw_pci->core); - debugfs_remove_recursive(mlxsw_pci->dbg_dir); - pci_disable_msix(mlxsw_pci->pdev); + pci_free_irq_vectors(mlxsw_pci->pdev); iounmap(mlxsw_pci->hw_addr); pci_release_regions(mlxsw_pci->pdev); pci_disable_device(mlxsw_pci->pdev); @@ -1916,15 +1779,11 @@ EXPORT_SYMBOL(mlxsw_pci_driver_unregister); static int __init mlxsw_pci_module_init(void) { - mlxsw_pci_dbg_root = debugfs_create_dir(mlxsw_pci_driver_name, NULL); - if (!mlxsw_pci_dbg_root) - return -ENOMEM; return 0; } static void __exit mlxsw_pci_module_exit(void) { - debugfs_remove_recursive(mlxsw_pci_dbg_root); } module_init(mlxsw_pci_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index 3d42146473b3..c580abba8d34 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -49,20 +49,12 @@ #define MLXSW_PORT_MID 0xd000 -#define MLXSW_PORT_MAX_PHY_PORTS 0x40 -#define MLXSW_PORT_MAX_PORTS (MLXSW_PORT_MAX_PHY_PORTS + 1) - #define MLXSW_PORT_MAX_IB_PHY_PORTS 36 #define MLXSW_PORT_MAX_IB_PORTS (MLXSW_PORT_MAX_IB_PHY_PORTS + 1) -#define MLXSW_PORT_DEVID_BITS_OFFSET 10 -#define MLXSW_PORT_PHY_BITS_OFFSET 4 -#define MLXSW_PORT_PHY_BITS_MASK (MLXSW_PORT_MAX_PHY_PORTS - 1) - #define MLXSW_PORT_CPU_PORT 0x0 -#define MLXSW_PORT_ROUTER_PORT (MLXSW_PORT_MAX_PHY_PORTS + 2) -#define MLXSW_PORT_DONT_CARE (MLXSW_PORT_MAX_PORTS) +#define MLXSW_PORT_DONT_CARE 0xFF #define MLXSW_PORT_MODULE_MAX_WIDTH 4 diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index d9616daf8a70..83b277c8090e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4125,6 +4125,60 @@ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); */ MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12); +/* Shared between ingress/egress */ +enum mlxsw_reg_ritr_counter_set_type { + /* No Count. */ + MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT = 0x0, + /* Basic. Used for router interfaces, counting the following: + * - Error and Discard counters. + * - Unicast, Multicast and Broadcast counters. Sharing the + * same set of counters for the different type of traffic + * (IPv4, IPv6 and mpls). + */ + MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC = 0x9, +}; + +/* reg_ritr_ingress_counter_index + * Counter Index for flow counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ingress_counter_index, 0x38, 0, 24); + +/* reg_ritr_ingress_counter_set_type + * Igress Counter Set Type for router interface counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ingress_counter_set_type, 0x38, 24, 8); + +/* reg_ritr_egress_counter_index + * Counter Index for flow counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, egress_counter_index, 0x3C, 0, 24); + +/* reg_ritr_egress_counter_set_type + * Egress Counter Set Type for router interface counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, egress_counter_set_type, 0x3C, 24, 8); + +static inline void mlxsw_reg_ritr_counter_pack(char *payload, u32 index, + bool enable, bool egress) +{ + enum mlxsw_reg_ritr_counter_set_type set_type; + + if (enable) + set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC; + else + set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT; + mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type); + + if (egress) + mlxsw_reg_ritr_egress_counter_index_set(payload, index); + else + mlxsw_reg_ritr_ingress_counter_index_set(payload, index); +} + static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif) { MLXSW_REG_ZERO(ritr, payload); @@ -4141,7 +4195,8 @@ static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag, static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, enum mlxsw_reg_ritr_if_type type, - u16 rif, u16 mtu, const char *mac) + u16 rif, u16 vr_id, u16 mtu, + const char *mac) { bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL; @@ -4153,6 +4208,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); + mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); } @@ -4285,6 +4341,129 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); } +/* RICNT - Router Interface Counter Register + * ----------------------------------------- + * The RICNT register retrieves per port performance counters + */ +#define MLXSW_REG_RICNT_ID 0x800B +#define MLXSW_REG_RICNT_LEN 0x100 + +MLXSW_REG_DEFINE(ricnt, MLXSW_REG_RICNT_ID, MLXSW_REG_RICNT_LEN); + +/* reg_ricnt_counter_index + * Counter index + * Access: RW + */ +MLXSW_ITEM32(reg, ricnt, counter_index, 0x04, 0, 24); + +enum mlxsw_reg_ricnt_counter_set_type { + /* No Count. */ + MLXSW_REG_RICNT_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Basic. Used for router interfaces, counting the following: + * - Error and Discard counters. + * - Unicast, Multicast and Broadcast counters. Sharing the + * same set of counters for the different type of traffic + * (IPv4, IPv6 and mpls). + */ + MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC = 0x09, +}; + +/* reg_ricnt_counter_set_type + * Counter Set Type for router interface counter + * Access: RW + */ +MLXSW_ITEM32(reg, ricnt, counter_set_type, 0x04, 24, 8); + +enum mlxsw_reg_ricnt_opcode { + /* Nop. Supported only for read access*/ + MLXSW_REG_RICNT_OPCODE_NOP = 0x00, + /* Clear. Setting the clr bit will reset the counter value for + * all counters of the specified Router Interface. + */ + MLXSW_REG_RICNT_OPCODE_CLEAR = 0x08, +}; + +/* reg_ricnt_opcode + * Opcode + * Access: RW + */ +MLXSW_ITEM32(reg, ricnt, op, 0x00, 28, 4); + +/* reg_ricnt_good_unicast_packets + * good unicast packets. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_unicast_packets, 0x08, 0, 64); + +/* reg_ricnt_good_multicast_packets + * good multicast packets. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_multicast_packets, 0x10, 0, 64); + +/* reg_ricnt_good_broadcast_packets + * good broadcast packets + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_broadcast_packets, 0x18, 0, 64); + +/* reg_ricnt_good_unicast_bytes + * A count of L3 data and padding octets not including L2 headers + * for good unicast frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_unicast_bytes, 0x20, 0, 64); + +/* reg_ricnt_good_multicast_bytes + * A count of L3 data and padding octets not including L2 headers + * for good multicast frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_multicast_bytes, 0x28, 0, 64); + +/* reg_ritr_good_broadcast_bytes + * A count of L3 data and padding octets not including L2 headers + * for good broadcast frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_broadcast_bytes, 0x30, 0, 64); + +/* reg_ricnt_error_packets + * A count of errored frames that do not pass the router checks. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, error_packets, 0x38, 0, 64); + +/* reg_ricnt_discrad_packets + * A count of non-errored frames that do not pass the router checks. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, discard_packets, 0x40, 0, 64); + +/* reg_ricnt_error_bytes + * A count of L3 data and padding octets not including L2 headers + * for errored frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, error_bytes, 0x48, 0, 64); + +/* reg_ricnt_discard_bytes + * A count of L3 data and padding octets not including L2 headers + * for non-errored frames that do not pass the router checks. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, discard_bytes, 0x50, 0, 64); + +static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index, + enum mlxsw_reg_ricnt_opcode op) +{ + MLXSW_REG_ZERO(ricnt, payload); + mlxsw_reg_ricnt_op_set(payload, op); + mlxsw_reg_ricnt_counter_index_set(payload, index); + mlxsw_reg_ricnt_counter_set_type_set(payload, + MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC); +} + /* RALTA - Router Algorithmic LPM Tree Allocation Register * ------------------------------------------------------- * RALTA is used to allocate the LPM trees of the SHSPM method. @@ -5504,6 +5683,70 @@ static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e, mlxsw_reg_mpsc_rate_set(payload, rate); } +/* MGPC - Monitoring General Purpose Counter Set Register + * The MGPC register retrieves and sets the General Purpose Counter Set. + */ +#define MLXSW_REG_MGPC_ID 0x9081 +#define MLXSW_REG_MGPC_LEN 0x18 + +MLXSW_REG_DEFINE(mgpc, MLXSW_REG_MGPC_ID, MLXSW_REG_MGPC_LEN); + +enum mlxsw_reg_mgpc_counter_set_type { + /* No count */ + MLXSW_REG_MGPC_COUNTER_SET_TYPE_NO_COUT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* reg_mgpc_counter_set_type + * Counter set type. + * Access: OP + */ +MLXSW_ITEM32(reg, mgpc, counter_set_type, 0x00, 24, 8); + +/* reg_mgpc_counter_index + * Counter index. + * Access: Index + */ +MLXSW_ITEM32(reg, mgpc, counter_index, 0x00, 0, 24); + +enum mlxsw_reg_mgpc_opcode { + /* Nop */ + MLXSW_REG_MGPC_OPCODE_NOP = 0x00, + /* Clear counters */ + MLXSW_REG_MGPC_OPCODE_CLEAR = 0x08, +}; + +/* reg_mgpc_opcode + * Opcode. + * Access: OP + */ +MLXSW_ITEM32(reg, mgpc, opcode, 0x04, 28, 4); + +/* reg_mgpc_byte_counter + * Byte counter value. + * Access: RW + */ +MLXSW_ITEM64(reg, mgpc, byte_counter, 0x08, 0, 64); + +/* reg_mgpc_packet_counter + * Packet counter value. + * Access: RW + */ +MLXSW_ITEM64(reg, mgpc, packet_counter, 0x10, 0, 64); + +static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, + enum mlxsw_reg_mgpc_opcode opcode, + enum mlxsw_reg_mgpc_counter_set_type set_type) +{ + MLXSW_REG_ZERO(mgpc, payload); + mlxsw_reg_mgpc_counter_index_set(payload, counter_index); + mlxsw_reg_mgpc_counter_set_type_set(payload, set_type); + mlxsw_reg_mgpc_opcode_set(payload, opcode); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -5960,6 +6203,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rgcr), MLXSW_REG(ritr), MLXSW_REG(ratr), + MLXSW_REG(ricnt), MLXSW_REG(ralta), MLXSW_REG(ralst), MLXSW_REG(raltb), @@ -5977,6 +6221,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mpar), MLXSW_REG(mlcr), MLXSW_REG(mpsc), + MLXSW_REG(mgpc), MLXSW_REG(sbpr), MLXSW_REG(sbcm), MLXSW_REG(sbpm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index bce8c2e00630..9556d934714b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -43,11 +43,15 @@ enum mlxsw_res_id { MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE, MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE, MLXSW_RES_ID_MAX_TRAP_GROUPS, + MLXSW_RES_ID_COUNTER_POOL_SIZE, MLXSW_RES_ID_MAX_SPAN, + MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, + MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, MLXSW_RES_ID_MAX_SYSTEM_PORT, MLXSW_RES_ID_MAX_LAG, MLXSW_RES_ID_MAX_LAG_MEMBERS, MLXSW_RES_ID_MAX_BUFFER_SIZE, + MLXSW_RES_ID_CELL_SIZE, MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS, MLXSW_RES_ID_ACL_MAX_TCAM_RULES, MLXSW_RES_ID_ACL_MAX_REGIONS, @@ -59,6 +63,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_CPU_POLICERS, MLXSW_RES_ID_MAX_VRS, MLXSW_RES_ID_MAX_RIFS, + MLXSW_RES_ID_MAX_LPM_TREES, /* Internal resources. * Determined by the SW, not queried from the HW. @@ -75,11 +80,15 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002, [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003, [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201, + [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410, [MLXSW_RES_ID_MAX_SPAN] = 0x2420, + [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443, + [MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC] = 0x2449, [MLXSW_RES_ID_MAX_SYSTEM_PORT] = 0x2502, [MLXSW_RES_ID_MAX_LAG] = 0x2520, [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521, [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802, /* Bytes */ + [MLXSW_RES_ID_CELL_SIZE] = 0x2803, /* Bytes */ [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901, [MLXSW_RES_ID_ACL_MAX_TCAM_RULES] = 0x2902, [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903, @@ -91,6 +100,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13, [MLXSW_RES_ID_MAX_VRS] = 0x2C01, [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, + [MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30, }; struct mlxsw_res { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 16484f24b7db..88357cee7679 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -66,6 +66,8 @@ #include "port.h" #include "trap.h" #include "txheader.h" +#include "spectrum_cnt.h" +#include "spectrum_dpipe.h" static const char mlxsw_sp_driver_name[] = "mlxsw_spectrum"; static const char mlxsw_sp_driver_version[] = "1.0"; @@ -138,6 +140,60 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16); */ MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); +int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index, u64 *packets, + u64 *bytes) +{ + char mgpc_pl[MLXSW_REG_MGPC_LEN]; + int err; + + mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP, + MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); + if (err) + return err; + *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl); + *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl); + return 0; +} + +static int mlxsw_sp_flow_counter_clear(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index) +{ + char mgpc_pl[MLXSW_REG_MGPC_LEN]; + + mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR, + MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); +} + +int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, + unsigned int *p_counter_index) +{ + int err; + + err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW, + p_counter_index); + if (err) + return err; + err = mlxsw_sp_flow_counter_clear(mlxsw_sp, *p_counter_index); + if (err) + goto err_counter_clear; + return 0; + +err_counter_clear: + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW, + *p_counter_index); + return err; +} + +void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index) +{ + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW, + counter_index); +} + static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info) { @@ -304,9 +360,10 @@ static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) return false; } -static int mlxsw_sp_span_mtu_to_buffsize(int mtu) +static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp, + int mtu) { - return MLXSW_SP_BYTES_TO_CELLS(mtu * 5 / 2) + 1; + return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1; } static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) @@ -319,8 +376,9 @@ static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) * updated according to the mtu value */ if (mlxsw_sp_span_is_egress_mirror(port)) { - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, - mlxsw_sp_span_mtu_to_buffsize(mtu)); + u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu); + + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); if (err) { netdev_err(port->dev, "Could not update shared buffer for mirroring\n"); @@ -357,8 +415,10 @@ mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, /* if it is an egress SPAN, bind a shared buffer to it */ if (type == MLXSW_SP_SPAN_EGRESS) { - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, - mlxsw_sp_span_mtu_to_buffsize(port->dev->mtu)); + u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, + port->dev->mtu); + + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); if (err) { netdev_err(port->dev, "Could not create shared buffer for mirroring\n"); @@ -745,19 +805,47 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) return 0; } -static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int pg_index, int mtu, - bool pause_en, bool pfc_en, u16 delay) +static u16 mlxsw_sp_pg_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp, + int mtu) { - u16 pg_size = 2 * MLXSW_SP_BYTES_TO_CELLS(mtu); + return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu); +} - delay = pfc_en ? mlxsw_sp_pfc_delay_get(mtu, delay) : - MLXSW_SP_PAUSE_DELAY; +#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */ + +static u16 mlxsw_sp_pfc_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu, + u16 delay) +{ + delay = mlxsw_sp_bytes_cells(mlxsw_sp, DIV_ROUND_UP(delay, + BITS_PER_BYTE)); + return MLXSW_SP_CELL_FACTOR * delay + mlxsw_sp_bytes_cells(mlxsw_sp, + mtu); +} + +/* Maximum delay buffer needed in case of PAUSE frames, in bytes. + * Assumes 100m cable and maximum MTU. + */ +#define MLXSW_SP_PAUSE_DELAY 58752 - if (pause_en || pfc_en) - mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, pg_index, - pg_size + delay, pg_size); +static u16 mlxsw_sp_pg_buf_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu, + u16 delay, bool pfc, bool pause) +{ + if (pfc) + return mlxsw_sp_pfc_delay_get(mlxsw_sp, mtu, delay); + else if (pause) + return mlxsw_sp_bytes_cells(mlxsw_sp, MLXSW_SP_PAUSE_DELAY); + else + return 0; +} + +static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres, + bool lossy) +{ + if (lossy) + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size); else - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg_index, pg_size); + mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size, + thres); } int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, @@ -778,6 +866,8 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { bool configure = false; bool pfc = false; + bool lossy; + u16 thres; for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { if (prio_tc[j] == i) { @@ -789,7 +879,12 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, if (!configure) continue; - mlxsw_sp_pg_buf_pack(pbmc_pl, i, mtu, pause_en, pfc, delay); + + lossy = !(pfc || pause_en); + thres = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); + delay = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc, + pause_en); + mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres + delay, thres, lossy); } return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); @@ -966,8 +1061,9 @@ mlxsw_sp_port_get_stats64(struct net_device *dev, memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); } -int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, - u16 vid_end, bool is_member, bool untagged) +static int __mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool is_member, bool untagged) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *spvm_pl; @@ -984,6 +1080,26 @@ int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, return err; } +int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, + u16 vid_end, bool is_member, bool untagged) +{ + u16 vid, vid_e; + int err; + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), + vid_end); + + err = __mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, + is_member, untagged); + if (err) + return err; + } + + return 0; +} + static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) { enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; @@ -1368,7 +1484,7 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, tc->cls_mall); return 0; default: - return -EINVAL; + return -EOPNOTSUPP; } case TC_SETUP_CLSFLOWER: switch (tc->cls_flower->command) { @@ -1379,6 +1495,9 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, tc->cls_flower); return 0; + case TC_CLSFLOWER_STATS: + return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, + tc->cls_flower); default: return -EOPNOTSUPP; } @@ -1492,6 +1611,7 @@ err_port_pause_configure: struct mlxsw_sp_port_hw_stats { char str[ETH_GSTRING_LEN]; u64 (*getter)(const char *payload); + bool cells_bytes; }; static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { @@ -1612,17 +1732,11 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = { #define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats) -static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(const char *ppcnt_pl) -{ - u64 transmit_queue = mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl); - - return MLXSW_SP_CELLS_TO_BYTES(transmit_queue); -} - static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { { .str = "tc_transmit_queue_tc", - .getter = mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get, + .getter = mlxsw_reg_ppcnt_tc_transmit_queue_get, + .cells_bytes = true, }, { .str = "tc_no_buffer_discard_uc_tc", @@ -1734,6 +1848,8 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, enum mlxsw_reg_ppcnt_grp grp, int prio, u64 *data, int data_index) { + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_hw_stats *hw_stats; char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; int i, len; @@ -1743,8 +1859,13 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, if (err) return; mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); - for (i = 0; i < len; i++) + for (i = 0; i < len; i++) { data[data_index + i] = hw_stats[i].getter(ppcnt_pl); + if (!hw_stats[i].cells_bytes) + continue; + data[data_index + i] = mlxsw_sp_cells_bytes(mlxsw_sp, + data[data_index + i]); + } } static void mlxsw_sp_port_get_stats(struct net_device *dev, @@ -2537,25 +2658,33 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) { int i; - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) if (mlxsw_sp_port_created(mlxsw_sp, i)) mlxsw_sp_port_remove(mlxsw_sp, i); + kfree(mlxsw_sp->port_to_module); kfree(mlxsw_sp->ports); } static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) { + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); u8 module, width, lane; size_t alloc_size; int i; int err; - alloc_size = sizeof(struct mlxsw_sp_port *) * MLXSW_PORT_MAX_PORTS; + alloc_size = sizeof(struct mlxsw_sp_port *) * max_ports; mlxsw_sp->ports = kzalloc(alloc_size, GFP_KERNEL); if (!mlxsw_sp->ports) return -ENOMEM; - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { + mlxsw_sp->port_to_module = kcalloc(max_ports, sizeof(u8), GFP_KERNEL); + if (!mlxsw_sp->port_to_module) { + err = -ENOMEM; + goto err_port_to_module_alloc; + } + + for (i = 1; i < max_ports; i++) { err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module, &width, &lane); if (err) @@ -2575,6 +2704,8 @@ err_port_module_info_get: for (i--; i >= 1; i--) if (mlxsw_sp_port_created(mlxsw_sp, i)) mlxsw_sp_port_remove(mlxsw_sp, i); + kfree(mlxsw_sp->port_to_module); +err_port_to_module_alloc: kfree(mlxsw_sp->ports); return err; } @@ -2877,6 +3008,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, IGMP, false), MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false), + MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false), /* L3 traps */ MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), @@ -3158,6 +3290,18 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } +static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create); + +static int mlxsw_sp_dummy_fid_init(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp_vfid_op(mlxsw_sp, MLXSW_SP_DUMMY_FID, true); +} + +static void mlxsw_sp_dummy_fid_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_vfid_op(mlxsw_sp, MLXSW_SP_DUMMY_FID, false); +} + static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -3224,6 +3368,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_acl_init; } + err = mlxsw_sp_counter_pool_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n"); + goto err_counter_pool_init; + } + + err = mlxsw_sp_dpipe_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n"); + goto err_dpipe_init; + } + + err = mlxsw_sp_dummy_fid_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init dummy FID\n"); + goto err_dummy_fid_init; + } + err = mlxsw_sp_ports_create(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); @@ -3233,6 +3395,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return 0; err_ports_create: + mlxsw_sp_dummy_fid_fini(mlxsw_sp); +err_dummy_fid_init: + mlxsw_sp_dpipe_fini(mlxsw_sp); +err_dpipe_init: + mlxsw_sp_counter_pool_fini(mlxsw_sp); +err_counter_pool_init: mlxsw_sp_acl_fini(mlxsw_sp); err_acl_init: mlxsw_sp_span_fini(mlxsw_sp); @@ -3255,6 +3423,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); mlxsw_sp_ports_remove(mlxsw_sp); + mlxsw_sp_dummy_fid_fini(mlxsw_sp); + mlxsw_sp_dpipe_fini(mlxsw_sp); + mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); @@ -3326,13 +3497,13 @@ bool mlxsw_sp_port_dev_check(const struct net_device *dev) return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; } -static int mlxsw_lower_dev_walk(struct net_device *lower_dev, void *data) +static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data) { - struct mlxsw_sp_port **port = data; + struct mlxsw_sp_port **p_mlxsw_sp_port = data; int ret = 0; if (mlxsw_sp_port_dev_check(lower_dev)) { - *port = netdev_priv(lower_dev); + *p_mlxsw_sp_port = netdev_priv(lower_dev); ret = 1; } @@ -3341,18 +3512,18 @@ static int mlxsw_lower_dev_walk(struct net_device *lower_dev, void *data) static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) { - struct mlxsw_sp_port *port; + struct mlxsw_sp_port *mlxsw_sp_port; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - port = NULL; - netdev_walk_all_lower_dev(dev, mlxsw_lower_dev_walk, &port); + mlxsw_sp_port = NULL; + netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &mlxsw_sp_port); - return port; + return mlxsw_sp_port; } -static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) +struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) { struct mlxsw_sp_port *mlxsw_sp_port; @@ -3362,15 +3533,16 @@ static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) { - struct mlxsw_sp_port *port; + struct mlxsw_sp_port *mlxsw_sp_port; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - port = NULL; - netdev_walk_all_lower_dev_rcu(dev, mlxsw_lower_dev_walk, &port); + mlxsw_sp_port = NULL; + netdev_walk_all_lower_dev_rcu(dev, mlxsw_sp_lower_dev_walk, + &mlxsw_sp_port); - return port; + return mlxsw_sp_port; } struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) @@ -3390,546 +3562,6 @@ void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) dev_put(mlxsw_sp_port->dev); } -static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, - unsigned long event) -{ - switch (event) { - case NETDEV_UP: - if (!r) - return true; - r->ref_count++; - return false; - case NETDEV_DOWN: - if (r && --r->ref_count == 0) - return true; - /* It is possible we already removed the RIF ourselves - * if it was assigned to a netdev that is now a bridge - * or LAG slave. - */ - return false; - } - - return false; -} - -static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) -{ - int i; - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) - if (!mlxsw_sp->rifs[i]) - return i; - - return MLXSW_SP_INVALID_RIF; -} - -static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, - bool *p_lagged, u16 *p_system_port) -{ - u8 local_port = mlxsw_sp_vport->local_port; - - *p_lagged = mlxsw_sp_vport->lagged; - *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port; -} - -static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *l3_dev, u16 rif, - bool create) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - bool lagged = mlxsw_sp_vport->lagged; - char ritr_pl[MLXSW_REG_RITR_LEN]; - u16 system_port; - - mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif, - l3_dev->mtu, l3_dev->dev_addr); - - mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port); - mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port, - mlxsw_sp_vport_vid_get(mlxsw_sp_vport)); - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} - -static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport); - -static struct mlxsw_sp_fid * -mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev) -{ - struct mlxsw_sp_fid *f; - - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) - return NULL; - - f->leave = mlxsw_sp_vport_rif_sp_leave; - f->ref_count = 0; - f->dev = l3_dev; - f->fid = fid; - - return f; -} - -static struct mlxsw_sp_rif * -mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f) -{ - struct mlxsw_sp_rif *r; - - r = kzalloc(sizeof(*r), GFP_KERNEL); - if (!r) - return NULL; - - INIT_LIST_HEAD(&r->nexthop_list); - INIT_LIST_HEAD(&r->neigh_list); - ether_addr_copy(r->addr, l3_dev->dev_addr); - r->mtu = l3_dev->mtu; - r->ref_count = 1; - r->dev = l3_dev; - r->rif = rif; - r->f = f; - - return r; -} - -static struct mlxsw_sp_rif * -mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *l3_dev) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - struct mlxsw_sp_fid *f; - struct mlxsw_sp_rif *r; - u16 fid, rif; - int err; - - rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_INVALID_RIF) - return ERR_PTR(-ERANGE); - - err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true); - if (err) - return ERR_PTR(err); - - fid = mlxsw_sp_rif_sp_to_fid(rif); - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true); - if (err) - goto err_rif_fdb_op; - - f = mlxsw_sp_rfid_alloc(fid, l3_dev); - if (!f) { - err = -ENOMEM; - goto err_rfid_alloc; - } - - r = mlxsw_sp_rif_alloc(rif, l3_dev, f); - if (!r) { - err = -ENOMEM; - goto err_rif_alloc; - } - - f->r = r; - mlxsw_sp->rifs[rif] = r; - - return r; - -err_rif_alloc: - kfree(f); -err_rfid_alloc: - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); -err_rif_fdb_op: - mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); - return ERR_PTR(err); -} - -static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, - struct mlxsw_sp_rif *r) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - struct net_device *l3_dev = r->dev; - struct mlxsw_sp_fid *f = r->f; - u16 fid = f->fid; - u16 rif = r->rif; - - mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r); - - mlxsw_sp->rifs[rif] = NULL; - f->r = NULL; - - kfree(r); - - kfree(f); - - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); - - mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); -} - -static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *l3_dev) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - struct mlxsw_sp_rif *r; - - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); - if (!r) { - r = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev); - if (IS_ERR(r)) - return PTR_ERR(r); - } - - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, r->f); - r->f->ref_count++; - - netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", r->f->fid); - - return 0; -} - -static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport) -{ - struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); - - netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); - - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); - if (--f->ref_count == 0) - mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->r); -} - -static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev, - struct net_device *port_dev, - unsigned long event, u16 vid) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); - struct mlxsw_sp_port *mlxsw_sp_vport; - - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (WARN_ON(!mlxsw_sp_vport)) - return -EINVAL; - - switch (event) { - case NETDEV_UP: - return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev); - case NETDEV_DOWN: - mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); - break; - } - - return 0; -} - -static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, - unsigned long event) -{ - if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev)) - return 0; - - return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1); -} - -static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, - struct net_device *lag_dev, - unsigned long event, u16 vid) -{ - struct net_device *port_dev; - struct list_head *iter; - int err; - - netdev_for_each_lower_dev(lag_dev, port_dev, iter) { - if (mlxsw_sp_port_dev_check(port_dev)) { - err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev, - event, vid); - if (err) - return err; - } - } - - return 0; -} - -static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, - unsigned long event) -{ - if (netif_is_bridge_port(lag_dev)) - return 0; - - return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); -} - -static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev) -{ - u16 fid; - - if (is_vlan_dev(l3_dev)) - fid = vlan_dev_vlan_id(l3_dev); - else if (mlxsw_sp->master_bridge.dev == l3_dev) - fid = 1; - else - return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev); - - return mlxsw_sp_fid_find(mlxsw_sp, fid); -} - -static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid) -{ - return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID : - MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; -} - -static u16 mlxsw_sp_flood_table_index_get(u16 fid) -{ - return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid; -} - -static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid, - bool set) -{ - enum mlxsw_flood_table_type table_type; - char *sftr_pl; - u16 index; - int err; - - sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); - if (!sftr_pl) - return -ENOMEM; - - table_type = mlxsw_sp_flood_table_type_get(fid); - index = mlxsw_sp_flood_table_index_get(fid); - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type, - 1, MLXSW_PORT_ROUTER_PORT, set); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); - - kfree(sftr_pl); - return err; -} - -static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) -{ - if (mlxsw_sp_fid_is_vfid(fid)) - return MLXSW_REG_RITR_FID_IF; - else - return MLXSW_REG_RITR_VLAN_IF; -} - -static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev, - u16 fid, u16 rif, - bool create) -{ - enum mlxsw_reg_ritr_if_type rif_type; - char ritr_pl[MLXSW_REG_RITR_LEN]; - - rif_type = mlxsw_sp_rif_type_get(fid); - mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu, - l3_dev->dev_addr); - mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid); - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} - -static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev, - struct mlxsw_sp_fid *f) -{ - struct mlxsw_sp_rif *r; - u16 rif; - int err; - - rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_INVALID_RIF) - return -ERANGE; - - err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); - if (err) - return err; - - err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true); - if (err) - goto err_rif_bridge_op; - - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); - if (err) - goto err_rif_fdb_op; - - r = mlxsw_sp_rif_alloc(rif, l3_dev, f); - if (!r) { - err = -ENOMEM; - goto err_rif_alloc; - } - - f->r = r; - mlxsw_sp->rifs[rif] = r; - - netdev_dbg(l3_dev, "RIF=%d created\n", rif); - - return 0; - -err_rif_alloc: - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); -err_rif_fdb_op: - mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); -err_rif_bridge_op: - mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); - return err; -} - -void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r) -{ - struct net_device *l3_dev = r->dev; - struct mlxsw_sp_fid *f = r->f; - u16 rif = r->rif; - - mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r); - - mlxsw_sp->rifs[rif] = NULL; - f->r = NULL; - - kfree(r); - - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); - - mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); - - mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); - - netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif); -} - -static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, - struct net_device *br_dev, - unsigned long event) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); - struct mlxsw_sp_fid *f; - - /* FID can either be an actual FID if the L3 device is the - * VLAN-aware bridge or a VLAN device on top. Otherwise, the - * L3 device is a VLAN-unaware bridge and we get a vFID. - */ - f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); - if (WARN_ON(!f)) - return -EINVAL; - - switch (event) { - case NETDEV_UP: - return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f); - case NETDEV_DOWN: - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); - break; - } - - return 0; -} - -static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, - unsigned long event) -{ - struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); - u16 vid = vlan_dev_vlan_id(vlan_dev); - - if (mlxsw_sp_port_dev_check(real_dev)) - return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event, - vid); - else if (netif_is_lag_master(real_dev)) - return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, - vid); - else if (netif_is_bridge_master(real_dev) && - mlxsw_sp->master_bridge.dev == real_dev) - return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev, - event); - - return 0; -} - -static int mlxsw_sp_inetaddr_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; - struct net_device *dev = ifa->ifa_dev->dev; - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_rif *r; - int err = 0; - - mlxsw_sp = mlxsw_sp_lower_get(dev); - if (!mlxsw_sp) - goto out; - - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!mlxsw_sp_rif_should_config(r, event)) - goto out; - - if (mlxsw_sp_port_dev_check(dev)) - err = mlxsw_sp_inetaddr_port_event(dev, event); - else if (netif_is_lag_master(dev)) - err = mlxsw_sp_inetaddr_lag_event(dev, event); - else if (netif_is_bridge_master(dev)) - err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event); - else if (is_vlan_dev(dev)) - err = mlxsw_sp_inetaddr_vlan_event(dev, event); - -out: - return notifier_from_errno(err); -} - -static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif, - const char *mac, int mtu) -{ - char ritr_pl[MLXSW_REG_RITR_LEN]; - int err; - - mlxsw_reg_ritr_rif_pack(ritr_pl, rif); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); - if (err) - return err; - - mlxsw_reg_ritr_mtu_set(ritr_pl, mtu); - mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac); - mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} - -static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) -{ - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_rif *r; - int err; - - mlxsw_sp = mlxsw_sp_lower_get(dev); - if (!mlxsw_sp) - return 0; - - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!r) - return 0; - - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false); - if (err) - return err; - - err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu); - if (err) - goto err_rif_edit; - - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true); - if (err) - goto err_rif_fdb_op; - - ether_addr_copy(r->addr, dev->dev_addr); - r->mtu = dev->mtu; - - netdev_dbg(dev, "Updated RIF=%d\n", r->rif); - - return 0; - -err_rif_fdb_op: - mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu); -err_rif_edit: - mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true); - return err; -} - static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port, u16 fid) { @@ -4220,7 +3852,7 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, - u16 lag_id) + struct net_device *lag_dev, u16 lag_id) { struct mlxsw_sp_port *mlxsw_sp_vport; struct mlxsw_sp_fid *f; @@ -4238,6 +3870,7 @@ mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_vport->lag_id = lag_id; mlxsw_sp_vport->lagged = 1; + mlxsw_sp_vport->dev = lag_dev; } static void @@ -4254,6 +3887,7 @@ mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port) if (f) f->leave(mlxsw_sp_vport); + mlxsw_sp_vport->dev = mlxsw_sp_port->dev; mlxsw_sp_vport->lagged = 0; } @@ -4293,7 +3927,7 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->lagged = 1; lag->ref_count++; - mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_id); + mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_dev, lag_id); return 0; @@ -4403,6 +4037,56 @@ static void mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_vport->dev = mlxsw_sp_port->dev; } +static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + enum mlxsw_reg_spms_state spms_state; + char *spms_pl; + u16 vid; + int err; + + spms_state = enable ? MLXSW_REG_SPMS_STATE_FORWARDING : + MLXSW_REG_SPMS_STATE_DISCARDING; + + spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); + if (!spms_pl) + return -ENOMEM; + mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); + + for (vid = 0; vid < VLAN_N_VID; vid++) + mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); + kfree(spms_pl); + return err; +} + +static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_stp_set(mlxsw_sp_port, true); + if (err) + return err; + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, + true, false); + if (err) + goto err_port_vlan_set; + return 0; + +err_port_vlan_set: + mlxsw_sp_port_stp_set(mlxsw_sp_port, false); + return err; +} + +static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, + false, false); + mlxsw_sp_port_stp_set(mlxsw_sp_port, false); +} + static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, unsigned long event, void *ptr) { @@ -4421,7 +4105,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, upper_dev = info->upper_dev; if (!is_vlan_dev(upper_dev) && !netif_is_lag_master(upper_dev) && - !netif_is_bridge_master(upper_dev)) + !netif_is_bridge_master(upper_dev) && + !netif_is_ovs_master(upper_dev)) return -EINVAL; if (!info->linking) break; @@ -4438,6 +4123,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) return -EINVAL; + if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) + return -EINVAL; + if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) + return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -4446,8 +4135,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, err = mlxsw_sp_port_vlan_link(mlxsw_sp_port, upper_dev); else - mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, - upper_dev); + mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, + upper_dev); } else if (netif_is_bridge_master(upper_dev)) { if (info->linking) err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, @@ -4461,6 +4150,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, else mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); + } else if (netif_is_ovs_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_port_ovs_join(mlxsw_sp_port); + else + mlxsw_sp_port_ovs_leave(mlxsw_sp_port); } else { err = -EINVAL; WARN_ON(1); @@ -4552,8 +4246,8 @@ static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f; f = mlxsw_sp_fid_find(mlxsw_sp, fid); - if (f && f->r) - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + if (f && f->rif) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); if (f && --f->ref_count == 0) mlxsw_sp_fid_destroy(mlxsw_sp, f); } @@ -4564,33 +4258,40 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, struct netdev_notifier_changeupper_info *info; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; - int err; + int err = 0; mlxsw_sp = mlxsw_sp_lower_get(br_dev); if (!mlxsw_sp) return 0; - if (br_dev != mlxsw_sp->master_bridge.dev) - return 0; info = ptr; switch (event) { - case NETDEV_CHANGEUPPER: + case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; if (!is_vlan_dev(upper_dev)) - break; - if (info->linking) { - err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp, - upper_dev); - if (err) - return err; + return -EINVAL; + if (is_vlan_dev(upper_dev) && + br_dev != mlxsw_sp->master_bridge.dev) + return -EINVAL; + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (is_vlan_dev(upper_dev)) { + if (info->linking) + err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp, + upper_dev); + else + mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, + upper_dev); } else { - mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, upper_dev); + err = -EINVAL; + WARN_ON(1); } break; } - return 0; + return err; } static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) @@ -4657,8 +4358,8 @@ static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, clear_bit(vfid, mlxsw_sp->vfids.mapped); list_del(&f->list); - if (f->r) - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + if (f->rif) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); kfree(f); @@ -4810,6 +4511,8 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, int err = 0; mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (!mlxsw_sp_vport) + return 0; switch (event) { case NETDEV_PRECHANGEUPPER: @@ -4821,22 +4524,24 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, /* We can't have multiple VLAN interfaces configured on * the same port and being members in the same bridge. */ - if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, upper_dev)) return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (info->linking) { - if (WARN_ON(!mlxsw_sp_vport)) - return -EINVAL; - err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport, - upper_dev); + if (netif_is_bridge_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport, + upper_dev); + else + mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport); } else { - if (!mlxsw_sp_vport) - return 0; - mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport); + err = -EINVAL; + WARN_ON(1); } + break; } return err; @@ -4878,6 +4583,15 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, return 0; } +static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *info = ptr; + + if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER) + return false; + return netif_is_l3_master(info->upper_dev); +} + static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -4886,6 +4600,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); + else if (mlxsw_sp_is_vrf_event(event, ptr)) + err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); else if (mlxsw_sp_port_dev_check(dev)) err = mlxsw_sp_netdevice_port_event(dev, event, ptr); else if (netif_is_lag_master(dev)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 13ec85e7c392..0c23bc1e946d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -57,41 +57,21 @@ #define MLXSW_SP_VFID_BASE VLAN_N_VID #define MLXSW_SP_VFID_MAX 1024 /* Bridged VLAN interfaces */ +#define MLXSW_SP_DUMMY_FID 15359 + #define MLXSW_SP_RFID_BASE 15360 -#define MLXSW_SP_INVALID_RIF 0xffff #define MLXSW_SP_MID_MAX 7000 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 -#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */ -#define MLXSW_SP_LPM_TREE_MAX 22 -#define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN) - #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ -#define MLXSW_SP_BYTES_PER_CELL 96 - -#define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL) -#define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) - #define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ #define MLXSW_SP_KVD_GRANULARITY 128 -/* Maximum delay buffer needed in case of PAUSE frames, in cells. - * Assumes 100m cable and maximum MTU. - */ -#define MLXSW_SP_PAUSE_DELAY 612 - -#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */ - -static inline u16 mlxsw_sp_pfc_delay_get(int mtu, u16 delay) -{ - delay = MLXSW_SP_BYTES_TO_CELLS(DIV_ROUND_UP(delay, BITS_PER_BYTE)); - return MLXSW_SP_CELL_FACTOR * delay + MLXSW_SP_BYTES_TO_CELLS(mtu); -} - struct mlxsw_sp_port; +struct mlxsw_sp_rif; struct mlxsw_sp_upper { struct net_device *dev; @@ -103,21 +83,10 @@ struct mlxsw_sp_fid { struct list_head list; unsigned int ref_count; struct net_device *dev; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; u16 fid; }; -struct mlxsw_sp_rif { - struct list_head nexthop_list; - struct list_head neigh_list; - struct net_device *dev; - unsigned int ref_count; - struct mlxsw_sp_fid *f; - unsigned char addr[ETH_ALEN]; - int mtu; - u16 rif; -}; - struct mlxsw_sp_mid { struct list_head list; unsigned char addr[ETH_ALEN]; @@ -138,17 +107,7 @@ static inline u16 mlxsw_sp_fid_to_vfid(u16 fid) static inline bool mlxsw_sp_fid_is_vfid(u16 fid) { - return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_RFID_BASE; -} - -static inline bool mlxsw_sp_fid_is_rfid(u16 fid) -{ - return fid >= MLXSW_SP_RFID_BASE; -} - -static inline u16 mlxsw_sp_rif_sp_to_fid(u16 rif) -{ - return MLXSW_SP_RFID_BASE + rif; + return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_DUMMY_FID; } struct mlxsw_sp_sb_pr { @@ -177,12 +136,15 @@ struct mlxsw_sp_sb_pm { #define MLXSW_SP_SB_POOL_COUNT 4 #define MLXSW_SP_SB_TC_COUNT 8 +struct mlxsw_sp_sb_port { + struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; + struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; +}; + struct mlxsw_sp_sb { struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT]; - struct { - struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; - struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; - } ports[MLXSW_PORT_MAX_PORTS]; + struct mlxsw_sp_sb_port *ports; + u32 cell_size; }; #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) @@ -207,11 +169,9 @@ struct mlxsw_sp_fib; struct mlxsw_sp_vr { u16 id; /* virtual router ID */ - bool used; - enum mlxsw_sp_l3proto proto; u32 tb_id; /* kernel fib table id */ - struct mlxsw_sp_lpm_tree *lpm_tree; - struct mlxsw_sp_fib *fib; + unsigned int rif_count; + struct mlxsw_sp_fib *fib4; }; enum mlxsw_sp_span_type { @@ -253,12 +213,15 @@ struct mlxsw_sp_port_mall_tc_entry { }; struct mlxsw_sp_router { - struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; struct mlxsw_sp_vr *vrs; struct rhashtable neigh_ht; struct rhashtable nexthop_group_ht; struct rhashtable nexthop_ht; struct { + struct mlxsw_sp_lpm_tree *trees; + unsigned int tree_count; + } lpm; + struct { struct delayed_work dw; unsigned long interval; /* ms */ } neighs_update; @@ -269,6 +232,7 @@ struct mlxsw_sp_router { }; struct mlxsw_sp_acl; +struct mlxsw_sp_counter_pool; struct mlxsw_sp { struct { @@ -296,7 +260,7 @@ struct mlxsw_sp { u32 ageing_time; struct mlxsw_sp_upper master_bridge; struct mlxsw_sp_upper *lags; - u8 port_to_module[MLXSW_PORT_MAX_PORTS]; + u8 *port_to_module; struct mlxsw_sp_sb sb; struct mlxsw_sp_router router; struct mlxsw_sp_acl *acl; @@ -304,6 +268,7 @@ struct mlxsw_sp { DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); } kvdl; + struct mlxsw_sp_counter_pool *counter_pool; struct { struct mlxsw_sp_span_entry *entries; int entries_count; @@ -317,6 +282,18 @@ mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id) return &mlxsw_sp->lags[lag_id]; } +static inline u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, + u32 cells) +{ + return mlxsw_sp->sb.cell_size * cells; +} + +static inline u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, + u32 bytes) +{ + return DIV_ROUND_UP(bytes, mlxsw_sp->sb.cell_size); +} + struct mlxsw_sp_port_pcpu_stats { u64 rx_packets; u64 rx_bytes; @@ -386,6 +363,7 @@ struct mlxsw_sp_port { }; bool mlxsw_sp_port_dev_check(const struct net_device *dev); +struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); @@ -497,19 +475,6 @@ mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, return NULL; } -static inline struct mlxsw_sp_rif * -mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) -{ - int i; - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) - if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) - return mlxsw_sp->rifs[i]; - - return NULL; -} - enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BC, @@ -570,8 +535,6 @@ int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, bool adding); struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid); void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f); -void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight); @@ -608,10 +571,16 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_router_netevent_event(struct notifier_block *unused, unsigned long event, void *ptr); -void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r); +int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); +int mlxsw_sp_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr); +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); +int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, + struct netdev_notifier_changeupper_info *info); -int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, + u32 *p_entry_index); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); @@ -620,6 +589,8 @@ struct mlxsw_sp_acl_rule_info { unsigned int priority; struct mlxsw_afk_element_values values; struct mlxsw_afa_block *act_block; + unsigned int counter_index; + bool counter_valid; }; enum mlxsw_sp_acl_profile { @@ -639,6 +610,8 @@ struct mlxsw_sp_acl_profile_ops { void *ruleset_priv, void *rule_priv, struct mlxsw_sp_acl_rule_info *rulei); void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv); + int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, + bool *activity); }; struct mlxsw_sp_acl_ops { @@ -679,6 +652,14 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct net_device *out_dev); +int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 action, u16 vid, u16 proto, u8 prio); +int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u16 fid); struct mlxsw_sp_acl_rule; @@ -698,6 +679,9 @@ mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp, unsigned long cookie); struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule); +int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + u64 *packets, u64 *bytes, u64 *last_use); int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); @@ -708,5 +692,14 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, __be16 protocol, struct tc_cls_flower_offload *f); void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct tc_cls_flower_offload *f); +int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, + struct tc_cls_flower_offload *f); +int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index, u64 *packets, + u64 *bytes); +int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, + unsigned int *p_counter_index); +void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 8a18b3aa70dc..317f7b14627f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -39,6 +39,7 @@ #include <linux/string.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> +#include <net/tc_act/tc_vlan.h> #include "reg.h" #include "core.h" @@ -49,10 +50,17 @@ #include "spectrum_acl_flex_keys.h" struct mlxsw_sp_acl { + struct mlxsw_sp *mlxsw_sp; struct mlxsw_afk *afk; struct mlxsw_afa *afa; const struct mlxsw_sp_acl_ops *ops; struct rhashtable ruleset_ht; + struct list_head rules; + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ +#define MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS 1000 + } rule_activity_update; unsigned long priv[0]; /* priv has to be always the last item */ }; @@ -79,9 +87,13 @@ struct mlxsw_sp_acl_ruleset { struct mlxsw_sp_acl_rule { struct rhash_head ht_node; /* Member of rule HT */ + struct list_head list; unsigned long cookie; /* HT key */ struct mlxsw_sp_acl_ruleset *ruleset; struct mlxsw_sp_acl_rule_info *rulei; + u64 last_used; + u64 last_packets; + u64 last_bytes; unsigned long priv[0]; /* priv has to be always the last item */ }; @@ -237,6 +249,27 @@ void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); } +static int +mlxsw_sp_acl_rulei_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei) +{ + int err; + + err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &rulei->counter_index); + if (err) + return err; + rulei->counter_valid = true; + return 0; +} + +static void +mlxsw_sp_acl_rulei_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei) +{ + rulei->counter_valid = false; + mlxsw_sp_flow_counter_free(mlxsw_sp, rulei->counter_index); +} + struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl) { @@ -335,6 +368,48 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, local_port, in_port); } +int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 action, u16 vid, u16 proto, u8 prio) +{ + u8 ethertype; + + if (action == TCA_VLAN_ACT_MODIFY) { + switch (proto) { + case ETH_P_8021Q: + ethertype = 0; + break; + case ETH_P_8021AD: + ethertype = 1; + break; + default: + dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN protocol %#04x\n", + proto); + return -EINVAL; + } + + return mlxsw_afa_block_append_vlan_modify(rulei->act_block, + vid, prio, ethertype); + } else { + dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN action\n"); + return -EINVAL; + } +} + +int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_append_counter(rulei->act_block, + rulei->counter_index); +} + +int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u16 fid) +{ + return mlxsw_afa_block_append_fid_set(rulei->act_block, fid); +} + struct mlxsw_sp_acl_rule * mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, @@ -358,8 +433,14 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, err = PTR_ERR(rule->rulei); goto err_rulei_create; } + + err = mlxsw_sp_acl_rulei_counter_alloc(mlxsw_sp, rule->rulei); + if (err) + goto err_counter_alloc; return rule; +err_counter_alloc: + mlxsw_sp_acl_rulei_destroy(rule->rulei); err_rulei_create: kfree(rule); err_alloc: @@ -372,6 +453,7 @@ void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + mlxsw_sp_acl_rulei_counter_free(mlxsw_sp, rule->rulei); mlxsw_sp_acl_rulei_destroy(rule->rulei); kfree(rule); mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); @@ -393,6 +475,7 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, if (err) goto err_rhashtable_insert; + list_add_tail(&rule->list, &mlxsw_sp->acl->rules); return 0; err_rhashtable_insert: @@ -406,6 +489,7 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + list_del(&rule->list); rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node, mlxsw_sp_acl_rule_ht_params); ops->rule_del(mlxsw_sp, rule->priv); @@ -426,6 +510,90 @@ mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule) return rule->rulei; } +static int mlxsw_sp_acl_rule_activity_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + bool active; + int err; + + err = ops->rule_activity_get(mlxsw_sp, rule->priv, &active); + if (err) + return err; + if (active) + rule->last_used = jiffies; + return 0; +} + +static int mlxsw_sp_acl_rules_activity_update(struct mlxsw_sp_acl *acl) +{ + struct mlxsw_sp_acl_rule *rule; + int err; + + /* Protect internal structures from changes */ + rtnl_lock(); + list_for_each_entry(rule, &acl->rules, list) { + err = mlxsw_sp_acl_rule_activity_update(acl->mlxsw_sp, + rule); + if (err) + goto err_rule_update; + } + rtnl_unlock(); + return 0; + +err_rule_update: + rtnl_unlock(); + return err; +} + +static void mlxsw_sp_acl_rule_activity_work_schedule(struct mlxsw_sp_acl *acl) +{ + unsigned long interval = acl->rule_activity_update.interval; + + mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_acl_rul_activity_update_work(struct work_struct *work) +{ + struct mlxsw_sp_acl *acl = container_of(work, struct mlxsw_sp_acl, + rule_activity_update.dw.work); + int err; + + err = mlxsw_sp_acl_rules_activity_update(acl); + if (err) + dev_err(acl->mlxsw_sp->bus_info->dev, "Could not update acl activity"); + + mlxsw_sp_acl_rule_activity_work_schedule(acl); +} + +int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + u64 *packets, u64 *bytes, u64 *last_use) + +{ + struct mlxsw_sp_acl_rule_info *rulei; + u64 current_packets; + u64 current_bytes; + int err; + + rulei = mlxsw_sp_acl_rule_rulei(rule); + err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index, + ¤t_packets, ¤t_bytes); + if (err) + return err; + + *packets = current_packets - rule->last_packets; + *bytes = current_bytes - rule->last_bytes; + *last_use = rule->last_used; + + rule->last_bytes = current_bytes; + rule->last_packets = current_packets; + + return 0; +} + #define MLXSW_SP_KDVL_ACT_EXT_SIZE 1 static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, @@ -434,7 +602,6 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, struct mlxsw_sp *mlxsw_sp = priv; char pefa_pl[MLXSW_REG_PEFA_LEN]; u32 kvdl_index; - int ret; int err; /* The first action set of a TCAM entry is stored directly in TCAM, @@ -443,10 +610,10 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, if (is_first) return 0; - ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE); - if (ret < 0) - return ret; - kvdl_index = ret; + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE, + &kvdl_index); + if (err) + return err; mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); if (err) @@ -475,13 +642,11 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, struct mlxsw_sp *mlxsw_sp = priv; char ppbs_pl[MLXSW_REG_PPBS_LEN]; u32 kvdl_index; - int ret; int err; - ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1); - if (ret < 0) - return ret; - kvdl_index = ret; + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index); + if (err) + return err; mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); if (err) @@ -518,7 +683,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) if (!acl) return -ENOMEM; mlxsw_sp->acl = acl; - + acl->mlxsw_sp = mlxsw_sp; acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_FLEX_KEYS), mlxsw_sp_afk_blocks, @@ -541,11 +706,18 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_rhashtable_init; + INIT_LIST_HEAD(&acl->rules); err = acl_ops->init(mlxsw_sp, acl->priv); if (err) goto err_acl_ops_init; acl->ops = acl_ops; + + /* Create the delayed work for the rule activity_update */ + INIT_DELAYED_WORK(&acl->rule_activity_update.dw, + mlxsw_sp_acl_rul_activity_update_work); + acl->rule_activity_update.interval = MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS; + mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, 0); return 0; err_acl_ops_init: @@ -564,7 +736,9 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp_acl *acl = mlxsw_sp->acl; const struct mlxsw_sp_acl_ops *acl_ops = acl->ops; + cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw); acl_ops->fini(mlxsw_sp, acl->priv); + WARN_ON(!list_empty(&acl->rules)); rhashtable_destroy(&acl->ruleset_ht); mlxsw_afa_destroy(acl->afa); mlxsw_afk_destroy(acl->afk); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h index 82b81cf7f4a7..af7b7bad48df 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h @@ -39,11 +39,15 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = { MLXSW_AFK_ELEMENT_INST_BUF(DMAC, 0x00, 6), + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = { MLXSW_AFK_ELEMENT_INST_BUF(SMAC, 0x00, 6), + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; @@ -65,6 +69,8 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = { + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12), + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3), MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16), MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16), }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 7382832215fa..3a24289979d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -561,6 +561,24 @@ mlxsw_sp_acl_tcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); } +static int +mlxsw_sp_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + unsigned int offset, + bool *activity) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + int err; + + mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ, + region->tcam_region_info, offset); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + if (err) + return err; + *activity = mlxsw_reg_ptce2_a_get(ptce2_pl); + return 0; +} + #define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U) static int @@ -940,6 +958,19 @@ static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); } +static int +mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_entry *entry, + bool *activity) +{ + struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; + struct mlxsw_sp_acl_tcam_region *region = chunk->region; + + return mlxsw_sp_acl_tcam_region_entry_activity_get(mlxsw_sp, region, + entry->parman_item.index, + activity); +} + static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_SRC_SYS_PORT, MLXSW_AFK_ELEMENT_DMAC, @@ -950,6 +981,8 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_DST_IP4, MLXSW_AFK_ELEMENT_DST_L4_PORT, MLXSW_AFK_ELEMENT_SRC_L4_PORT, + MLXSW_AFK_ELEMENT_VID, + MLXSW_AFK_ELEMENT_PCP, }; static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = { @@ -1046,6 +1079,16 @@ mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry); } +static int +mlxsw_sp_acl_tcam_flower_rule_activity_get(struct mlxsw_sp *mlxsw_sp, + void *rule_priv, bool *activity) +{ + struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry, + activity); +} + static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .ruleset_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_ruleset), .ruleset_add = mlxsw_sp_acl_tcam_flower_ruleset_add, @@ -1055,6 +1098,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, + .rule_activity_get = mlxsw_sp_acl_tcam_flower_rule_activity_get, }; static const struct mlxsw_sp_acl_profile_ops * diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index a7468262f118..997189cfe7fd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -162,8 +162,8 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, } static const u16 mlxsw_sp_pbs[] = { - [0] = 2 * MLXSW_SP_BYTES_TO_CELLS(ETH_FRAME_LEN), - [9] = 2 * MLXSW_SP_BYTES_TO_CELLS(MLXSW_PORT_MAX_MTU), + [0] = 2 * ETH_FRAME_LEN, + [9] = 2 * MLXSW_PORT_MAX_MTU, }; #define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs) @@ -171,20 +171,22 @@ static const u16 mlxsw_sp_pbs[] = { static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pbmc_pl[MLXSW_REG_PBMC_LEN]; int i; mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2); for (i = 0; i < MLXSW_SP_PBS_LEN; i++) { + u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp_pbs[i]); + if (i == MLXSW_SP_PB_UNUSED) continue; - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, mlxsw_sp_pbs[i]); + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, size); } mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0); - return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(pbmc), pbmc_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); } static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port) @@ -209,11 +211,25 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port); } -#define MLXSW_SP_SB_PR_INGRESS_SIZE \ - (15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) +static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + + mlxsw_sp->sb.ports = kcalloc(max_ports, sizeof(struct mlxsw_sp_sb_port), + GFP_KERNEL); + if (!mlxsw_sp->sb.ports) + return -ENOMEM; + return 0; +} + +static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->sb.ports); +} + +#define MLXSW_SP_SB_PR_INGRESS_SIZE 12440000 #define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000) -#define MLXSW_SP_SB_PR_EGRESS_SIZE \ - (14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) +#define MLXSW_SP_SB_PR_EGRESS_SIZE 13232000 #define MLXSW_SP_SB_PR(_mode, _size) \ { \ @@ -223,18 +239,17 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = { MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_SIZE)), + MLXSW_SP_SB_PR_INGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_MNG_SIZE)), + MLXSW_SP_SB_PR_INGRESS_MNG_SIZE), }; #define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress) static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = { - MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_EGRESS_SIZE)), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), @@ -251,11 +266,9 @@ static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, int err; for (i = 0; i < prs_len; i++) { - const struct mlxsw_sp_sb_pr *pr; + u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size); - pr = &prs[i]; - err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, - pr->mode, pr->size); + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, prs[i].mode, size); if (err) return err; } @@ -284,7 +297,7 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp) } static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 8, 0), + MLXSW_SP_SB_CM(10000, 8, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), @@ -293,20 +306,20 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */ - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(20000), 1, 3), + MLXSW_SP_SB_CM(20000, 1, 3), }; #define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress) static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), MLXSW_SP_SB_CM(0, 0, 0), MLXSW_SP_SB_CM(0, 0, 0), MLXSW_SP_SB_CM(0, 0, 0), @@ -330,7 +343,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0), + MLXSW_SP_SB_CM(10000, 0, 0), MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, @@ -370,13 +383,17 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, for (i = 0; i < cms_len; i++) { const struct mlxsw_sp_sb_cm *cm; + u32 min_buff; if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) continue; /* PG number 8 does not exist, skip it */ cm = &cms[i]; + /* All pools are initialized using dynamic thresholds, + * therefore 'max_buff' isn't specified in cells. + */ + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff); err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir, - cm->min_buff, cm->max_buff, - cm->pool); + min_buff, cm->max_buff, cm->pool); if (err) return err; } @@ -484,21 +501,21 @@ struct mlxsw_sp_sb_mm { } static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), }; #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) @@ -511,10 +528,15 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) { const struct mlxsw_sp_sb_mm *mc; + u32 min_buff; mc = &mlxsw_sp_sb_mms[i]; - mlxsw_reg_sbmm_pack(sbmm_pl, i, mc->min_buff, - mc->max_buff, mc->pool); + /* All pools are initialized using dynamic thresholds, + * therefore 'max_buff' isn't specified in cells. + */ + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff); + mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff, + mc->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl); if (err) return err; @@ -522,32 +544,53 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) return 0; } -#define MLXSW_SP_SB_SIZE (16 * 1024 * 1024) - int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { + u64 sb_size; int err; - err = mlxsw_sp_sb_prs_init(mlxsw_sp); + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE)) + return -EIO; + mlxsw_sp->sb.cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE)) + return -EIO; + sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE); + + err = mlxsw_sp_sb_ports_init(mlxsw_sp); if (err) return err; + err = mlxsw_sp_sb_prs_init(mlxsw_sp); + if (err) + goto err_sb_prs_init; err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); if (err) - return err; + goto err_sb_cpu_port_sb_cms_init; err = mlxsw_sp_sb_mms_init(mlxsw_sp); if (err) - return err; - return devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, - MLXSW_SP_SB_SIZE, - MLXSW_SP_SB_POOL_COUNT, - MLXSW_SP_SB_POOL_COUNT, - MLXSW_SP_SB_TC_COUNT, - MLXSW_SP_SB_TC_COUNT); + goto err_sb_mms_init; + err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_TC_COUNT, + MLXSW_SP_SB_TC_COUNT); + if (err) + goto err_devlink_sb_register; + + return 0; + +err_devlink_sb_register: +err_sb_mms_init: +err_sb_cpu_port_sb_cms_init: +err_sb_prs_init: + mlxsw_sp_sb_ports_fini(mlxsw_sp); + return err; } void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp) { devlink_sb_unregister(priv_to_devlink(mlxsw_sp->core), 0); + mlxsw_sp_sb_ports_fini(mlxsw_sp); } int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) @@ -596,7 +639,7 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); pool_info->pool_type = (enum devlink_sb_pool_type) dir; - pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size); + pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size); pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; return 0; } @@ -606,9 +649,9 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, enum devlink_sb_threshold_type threshold_type) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size); u8 pool = pool_get(pool_index); enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); - u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size); enum mlxsw_reg_sbpr_mode mode; if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) @@ -627,7 +670,7 @@ static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool, if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; - return MLXSW_SP_CELLS_TO_BYTES(max_buff); + return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff); } static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool, @@ -645,7 +688,7 @@ static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool, return -EINVAL; *p_max_buff = val; } else { - *p_max_buff = MLXSW_SP_BYTES_TO_CELLS(threshold); + *p_max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, threshold); } return 0; } @@ -761,7 +804,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, masked_count = 0; for (local_port = cb_ctx.local_port_1; - local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { @@ -775,7 +818,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, } masked_count = 0; for (local_port = cb_ctx.local_port_1; - local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { @@ -817,7 +860,7 @@ next_batch: mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); } - for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); @@ -847,7 +890,7 @@ do_query: cb_priv); if (err) goto out; - if (local_port < MLXSW_PORT_MAX_PORTS) + if (local_port < mlxsw_core_max_ports(mlxsw_core)) goto next_batch; out: @@ -882,7 +925,7 @@ next_batch: mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); } - for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); @@ -908,7 +951,7 @@ do_query: &bulk_list, NULL, 0); if (err) goto out; - if (local_port < MLXSW_PORT_MAX_PORTS) + if (local_port < mlxsw_core_max_ports(mlxsw_core)) goto next_batch; out: @@ -932,8 +975,8 @@ int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); - *p_cur = MLXSW_SP_CELLS_TO_BYTES(pm->occ.cur); - *p_max = MLXSW_SP_CELLS_TO_BYTES(pm->occ.max); + *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur); + *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max); return 0; } @@ -951,7 +994,7 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); - *p_cur = MLXSW_SP_CELLS_TO_BYTES(cm->occ.cur); - *p_max = MLXSW_SP_CELLS_TO_BYTES(cm->occ.max); + *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.cur); + *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.max); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c new file mode 100644 index 000000000000..0f46775e0307 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c @@ -0,0 +1,207 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arkadis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "spectrum_cnt.h" + +#define MLXSW_SP_COUNTER_POOL_BANK_SIZE 4096 + +struct mlxsw_sp_counter_sub_pool { + unsigned int base_index; + unsigned int size; + unsigned int entry_size; + unsigned int bank_count; +}; + +struct mlxsw_sp_counter_pool { + unsigned int pool_size; + unsigned long *usage; /* Usage bitmap */ + struct mlxsw_sp_counter_sub_pool *sub_pools; +}; + +static struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { + [MLXSW_SP_COUNTER_SUB_POOL_FLOW] = { + .bank_count = 6, + }, + [MLXSW_SP_COUNTER_SUB_POOL_RIF] = { + .bank_count = 2, + } +}; + +static int mlxsw_sp_counter_pool_validate(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int total_bank_config = 0; + unsigned int pool_size; + int i; + + pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); + /* Check config is valid, no bank over subscription */ + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) + total_bank_config += mlxsw_sp_counter_sub_pools[i].bank_count; + if (total_bank_config > pool_size / MLXSW_SP_COUNTER_POOL_BANK_SIZE + 1) + return -EINVAL; + return 0; +} + +static int mlxsw_sp_counter_sub_pools_prepare(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_counter_sub_pool *sub_pool; + + /* Prepare generic flow pool*/ + sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_FLOW]; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_PACKETS_BYTES)) + return -EIO; + sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + COUNTER_SIZE_PACKETS_BYTES); + /* Prepare erif pool*/ + sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_RIF]; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_ROUTER_BASIC)) + return -EIO; + sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + COUNTER_SIZE_ROUTER_BASIC); + return 0; +} + +int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_counter_sub_pool *sub_pool; + struct mlxsw_sp_counter_pool *pool; + unsigned int base_index; + unsigned int map_size; + int i; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_POOL_SIZE)) + return -EIO; + + err = mlxsw_sp_counter_pool_validate(mlxsw_sp); + if (err) + return err; + + err = mlxsw_sp_counter_sub_pools_prepare(mlxsw_sp); + if (err) + return err; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return -ENOMEM; + + pool->pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); + map_size = BITS_TO_LONGS(pool->pool_size) * sizeof(unsigned long); + + pool->usage = kzalloc(map_size, GFP_KERNEL); + if (!pool->usage) { + err = -ENOMEM; + goto err_usage_alloc; + } + + pool->sub_pools = mlxsw_sp_counter_sub_pools; + /* Allocation is based on bank count which should be + * specified for each sub pool statically. + */ + base_index = 0; + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { + sub_pool = &pool->sub_pools[i]; + sub_pool->size = sub_pool->bank_count * + MLXSW_SP_COUNTER_POOL_BANK_SIZE; + sub_pool->base_index = base_index; + base_index += sub_pool->size; + /* The last bank can't be fully used */ + if (sub_pool->base_index + sub_pool->size > pool->pool_size) + sub_pool->size = pool->pool_size - sub_pool->base_index; + } + + mlxsw_sp->counter_pool = pool; + return 0; + +err_usage_alloc: + kfree(pool); + return err; +} + +void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + + WARN_ON(find_first_bit(pool->usage, pool->pool_size) != + pool->pool_size); + kfree(pool->usage); + kfree(pool); +} + +int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int *p_counter_index) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int entry_index; + unsigned int stop_index; + int i; + + sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + stop_index = sub_pool->base_index + sub_pool->size; + entry_index = sub_pool->base_index; + + entry_index = find_next_zero_bit(pool->usage, stop_index, entry_index); + if (entry_index == stop_index) + return -ENOBUFS; + /* The sub-pools can contain non-integer number of entries + * so we must check for overflow + */ + if (entry_index + sub_pool->entry_size > stop_index) + return -ENOBUFS; + for (i = 0; i < sub_pool->entry_size; i++) + __set_bit(entry_index + i, pool->usage); + + *p_counter_index = entry_index; + return 0; +} + +void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int counter_index) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct mlxsw_sp_counter_sub_pool *sub_pool; + int i; + + if (WARN_ON(counter_index >= pool->pool_size)) + return; + sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + for (i = 0; i < sub_pool->entry_size; i++) + __clear_bit(counter_index + i, pool->usage); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h new file mode 100644 index 000000000000..fd34d0a01073 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h @@ -0,0 +1,54 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arkdis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_CNT_H +#define _MLXSW_SPECTRUM_CNT_H + +#include "spectrum.h" + +enum mlxsw_sp_counter_sub_pool_id { + MLXSW_SP_COUNTER_SUB_POOL_FLOW, + MLXSW_SP_COUNTER_SUB_POOL_RIF, +}; + +int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int *p_counter_index); +void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int counter_index); +int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c new file mode 100644 index 000000000000..ea56f6ade6b4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -0,0 +1,351 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arakdis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <net/devlink.h> + +#include "spectrum.h" +#include "spectrum_dpipe.h" +#include "spectrum_router.h" + +enum mlxsw_sp_field_metadata_id { + MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, +}; + +static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { + { .name = "erif_port", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + .bitwidth = 32, + .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, + }, + { .name = "l3_forward", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + .bitwidth = 1, + }, + { .name = "l3_drop", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + .bitwidth = 1, + }, +}; + +enum mlxsw_sp_dpipe_header_id { + MLXSW_SP_DPIPE_HEADER_METADATA, +}; + +static struct devlink_dpipe_header mlxsw_sp_dpipe_header_metadata = { + .name = "mlxsw_meta", + .id = MLXSW_SP_DPIPE_HEADER_METADATA, + .fields = mlxsw_sp_dpipe_fields_metadata, + .fields_count = ARRAY_SIZE(mlxsw_sp_dpipe_fields_metadata), +}; + +static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = { + &mlxsw_sp_dpipe_header_metadata, +}; + +static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = { + .headers = mlxsw_dpipe_headers, + .headers_count = ARRAY_SIZE(mlxsw_dpipe_headers), +}; + +static int mlxsw_sp_dpipe_table_erif_actions_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + int err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD; + + err = devlink_dpipe_action_put(skb, &action); + if (err) + return err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP; + + return devlink_dpipe_action_put(skb, &action); +} + +static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_match match = {0}; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + return devlink_dpipe_match_put(skb, &match); +} + +static void mlxsw_sp_erif_entry_clear(struct devlink_dpipe_entry *entry) +{ + unsigned int value_count, value_index; + struct devlink_dpipe_value *value; + + value = entry->action_values; + value_count = entry->action_values_count; + for (value_index = 0; value_index < value_count; value_index++) { + kfree(value[value_index].value); + kfree(value[value_index].mask); + } + + value = entry->match_values; + value_count = entry->match_values_count; + for (value_index = 0; value_index < value_count; value_index++) { + kfree(value[value_index].value); + kfree(value[value_index].mask); + } +} + +static void +mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match, + struct devlink_dpipe_action *action) +{ + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &mlxsw_sp_dpipe_header_metadata; + action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD; + + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; +} + +static int mlxsw_sp_erif_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_value, + struct devlink_dpipe_match *match, + struct devlink_dpipe_value *action_value, + struct devlink_dpipe_action *action) +{ + entry->match_values = match_value; + entry->match_values_count = 1; + + entry->action_values = action_value; + entry->action_values_count = 1; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action_value->action = action; + action_value->value_size = sizeof(u32); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + goto err_action_alloc; + return 0; + +err_action_alloc: + kfree(match_value->value); + return -ENOMEM; +} + +static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + struct mlxsw_sp_rif *rif, + bool counters_enabled) +{ + u32 *action_value; + u32 *rif_value; + u64 cnt; + int err; + + /* Set Match RIF index */ + rif_value = entry->match_values->value; + *rif_value = mlxsw_sp_rif_index(rif); + entry->match_values->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + entry->match_values->mapping_valid = true; + + /* Set Action Forwarding */ + action_value = entry->action_values->value; + *action_value = 1; + + entry->counter_valid = false; + entry->counter = 0; + if (!counters_enabled) + return 0; + + entry->index = mlxsw_sp_rif_index(rif); + err = mlxsw_sp_rif_counter_value_get(mlxsw_sp, rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + &cnt); + if (!err) { + entry->counter = cnt; + entry->counter_valid = true; + } + return 0; +} + +static int +mlxsw_sp_table_erif_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink_dpipe_value match_value = {{0}}, action_value = {{0}}; + struct devlink_dpipe_action action = {0}; + struct devlink_dpipe_match match = {0}; + struct devlink_dpipe_entry entry = {0}; + struct mlxsw_sp *mlxsw_sp = priv; + unsigned int rif_count; + int i, j; + int err; + + mlxsw_sp_erif_match_action_prepare(&match, &action); + err = mlxsw_sp_erif_entry_prepare(&entry, &match_value, &match, + &action_value, &action); + if (err) + return err; + + rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + rtnl_lock(); + i = 0; +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + return err; + j = 0; + for (; i < rif_count; i++) { + if (!mlxsw_sp->rifs[i]) + continue; + err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, + mlxsw_sp->rifs[i], + counters_enabled); + if (err) + goto err_entry_get; + err = devlink_dpipe_entry_ctx_append(dump_ctx, &entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + break; + } + goto err_entry_append; + } + j++; + } + + devlink_dpipe_entry_ctx_close(dump_ctx); + if (i != rif_count) + goto start_again; + rtnl_unlock(); + + mlxsw_sp_erif_entry_clear(&entry); + return 0; +err_entry_append: +err_entry_get: + rtnl_unlock(); + mlxsw_sp_erif_entry_clear(&entry); + return err; +} + +static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + int i; + + rtnl_lock(); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + if (!mlxsw_sp->rifs[i]) + continue; + if (enable) + mlxsw_sp_rif_counter_alloc(mlxsw_sp, + mlxsw_sp->rifs[i], + MLXSW_SP_RIF_COUNTER_EGRESS); + else + mlxsw_sp_rif_counter_free(mlxsw_sp, + mlxsw_sp->rifs[i], + MLXSW_SP_RIF_COUNTER_EGRESS); + } + rtnl_unlock(); + return 0; +} + +static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = { + .matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump, + .entries_dump = mlxsw_sp_table_erif_entries_dump, + .counters_set_update = mlxsw_sp_table_erif_counters_update, +}; + +static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + u64 table_size; + + table_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ERIF, + &mlxsw_sp_erif_ops, + mlxsw_sp, table_size, + false); +} + +static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF); +} + +int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + int err; + + err = devlink_dpipe_headers_register(devlink, + &mlxsw_sp_dpipe_headers); + if (err) + return err; + err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp); + if (err) + goto err_erif_register; + return 0; + +err_erif_register: + devlink_dpipe_headers_unregister(priv_to_devlink(mlxsw_sp->core)); + return err; +} + +void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); + devlink_dpipe_headers_unregister(devlink); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h new file mode 100644 index 000000000000..d2089298cba3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -0,0 +1,43 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arkadis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_PIPELINE_H_ +#define _MLXSW_PIPELINE_H_ + +int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp); + +#define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" + +#endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index ae6cccc666e4..7d87e23578a3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -39,6 +39,7 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_vlan.h> #include "spectrum.h" #include "core_acl_flex_keys.h" @@ -55,6 +56,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (tc_no_actions(exts)) return 0; + /* Count action is inserted first */ + err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei); + if (err) + return err; + tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { if (is_tcf_gact_shot(a)) { @@ -65,6 +71,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; + err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei, + MLXSW_SP_DUMMY_FID); + if (err) + return err; + out_dev = __dev_get_by_index(dev_net(dev), ifindex); if (out_dev == dev) out_dev = NULL; @@ -73,6 +84,15 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, out_dev); if (err) return err; + } else if (is_tcf_vlan(a)) { + u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); + u32 action = tcf_vlan_action(a); + u8 prio = tcf_vlan_push_prio(a); + u16 vid = tcf_vlan_push_vid(a); + + return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, + action, vid, + proto, prio); } else { dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); return -EOPNOTSUPP; @@ -173,7 +193,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_PORTS))) { + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_VLAN))) { dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); return -EOPNOTSUPP; } @@ -234,6 +255,27 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, sizeof(key->src)); } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + if (mask->vlan_id != 0) + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_VID, + key->vlan_id, + mask->vlan_id); + if (mask->vlan_priority != 0) + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_PCP, + key->vlan_priority, + mask->vlan_priority); + } + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) mlxsw_sp_flower_parse_ipv4(rulei, f); @@ -314,3 +356,47 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); } + +int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, + struct tc_cls_flower_offload *f) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + struct tc_action *a; + LIST_HEAD(actions); + u64 packets; + u64 lastuse; + u64 bytes; + int err; + + ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, + ingress, + MLXSW_SP_ACL_PROFILE_FLOWER); + if (WARN_ON(IS_ERR(ruleset))) + return -EINVAL; + + rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie); + if (!rule) + return -EINVAL; + + err = mlxsw_sp_acl_rule_get_stats(mlxsw_sp, rule, &packets, &bytes, + &lastuse); + if (err) + goto err_rule_get_stats; + + preempt_disable(); + + tcf_exts_to_list(f->exts, &actions); + list_for_each_entry(a, &actions, list) + tcf_action_stats_update(a, bytes, packets, lastuse); + + preempt_enable(); + + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return 0; + +err_rule_get_stats: + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index ac321e8e5c1a..26c26cd30c3d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -45,7 +45,8 @@ (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE) #define MLXSW_SP_CHUNK_MAX 32 -int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count) +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, + u32 *p_entry_index) { int entry_index; int size; @@ -72,7 +73,8 @@ int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count) for (i = 0; i < type_entries; i++) set_bit(entry_index + i, mlxsw_sp->kvdl.usage); - return entry_index; + *p_entry_index = entry_index; + return 0; } return -ENOBUFS; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index bd8de6b9be71..33cec1cc1642 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -41,14 +41,184 @@ #include <linux/in6.h> #include <linux/notifier.h> #include <linux/inetdevice.h> +#include <linux/netdevice.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> #include <net/ip_fib.h> +#include <net/fib_rules.h> +#include <net/l3mdev.h> #include "spectrum.h" #include "core.h" #include "reg.h" +#include "spectrum_cnt.h" +#include "spectrum_dpipe.h" +#include "spectrum_router.h" + +struct mlxsw_sp_rif { + struct list_head nexthop_list; + struct list_head neigh_list; + struct net_device *dev; + struct mlxsw_sp_fid *f; + unsigned char addr[ETH_ALEN]; + int mtu; + u16 rif_index; + u16 vr_id; + unsigned int counter_ingress; + bool counter_ingress_valid; + unsigned int counter_egress; + bool counter_egress_valid; +}; + +static unsigned int * +mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + switch (dir) { + case MLXSW_SP_RIF_COUNTER_EGRESS: + return &rif->counter_egress; + case MLXSW_SP_RIF_COUNTER_INGRESS: + return &rif->counter_ingress; + } + return NULL; +} + +static bool +mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + switch (dir) { + case MLXSW_SP_RIF_COUNTER_EGRESS: + return rif->counter_egress_valid; + case MLXSW_SP_RIF_COUNTER_INGRESS: + return rif->counter_ingress_valid; + } + return false; +} + +static void +mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + bool valid) +{ + switch (dir) { + case MLXSW_SP_RIF_COUNTER_EGRESS: + rif->counter_egress_valid = valid; + break; + case MLXSW_SP_RIF_COUNTER_INGRESS: + rif->counter_ingress_valid = valid; + break; + } +} + +static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + unsigned int counter_index, bool enable, + enum mlxsw_sp_rif_counter_dir dir) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + bool is_egress = false; + int err; + + if (dir == MLXSW_SP_RIF_COUNTER_EGRESS) + is_egress = true; + mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable, + is_egress); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, u64 *cnt) +{ + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + unsigned int *p_counter_index; + bool valid; + int err; + + valid = mlxsw_sp_rif_counter_valid_get(rif, dir); + if (!valid) + return -EINVAL; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index, + MLXSW_REG_RICNT_OPCODE_NOP); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); + if (err) + return err; + *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl); + return 0; +} + +static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index) +{ + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + + mlxsw_reg_ricnt_pack(ricnt_pl, counter_index, + MLXSW_REG_RICNT_OPCODE_CLEAR); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); +} + +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + unsigned int *p_counter_index; + int err; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, + p_counter_index); + if (err) + return err; + + err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index); + if (err) + goto err_counter_clear; + + err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index, + *p_counter_index, true, dir); + if (err) + goto err_counter_edit; + mlxsw_sp_rif_counter_valid_set(rif, dir, true); + return 0; + +err_counter_edit: +err_counter_clear: + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, + *p_counter_index); + return err; +} + +void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + unsigned int *p_counter_index; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (WARN_ON(!p_counter_index)) + return; + mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index, + *p_counter_index, false, dir); + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, + *p_counter_index); + mlxsw_sp_rif_counter_valid_set(rif, dir, false); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) @@ -89,12 +259,6 @@ mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, } static void -mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) -{ - memset(prefix_usage, 0, sizeof(*prefix_usage)); -} - -static void mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, unsigned char prefix_len) { @@ -125,7 +289,7 @@ struct mlxsw_sp_fib_node { struct list_head entry_list; struct list_head list; struct rhash_head ht_node; - struct mlxsw_sp_vr *vr; + struct mlxsw_sp_fib *fib; struct mlxsw_sp_fib_key key; }; @@ -149,13 +313,17 @@ struct mlxsw_sp_fib_entry { struct mlxsw_sp_fib { struct rhashtable ht; struct list_head node_list; + struct mlxsw_sp_vr *vr; + struct mlxsw_sp_lpm_tree *lpm_tree; unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; struct mlxsw_sp_prefix_usage prefix_usage; + enum mlxsw_sp_l3proto proto; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; -static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) +static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_fib *fib; int err; @@ -167,6 +335,8 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) if (err) goto err_rhashtable_init; INIT_LIST_HEAD(&fib->node_list); + fib->proto = proto; + fib->vr = vr; return fib; err_rhashtable_init: @@ -177,24 +347,21 @@ err_rhashtable_init: static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) { WARN_ON(!list_empty(&fib->node_list)); + WARN_ON(fib->lpm_tree); rhashtable_destroy(&fib->ht); kfree(fib); } static struct mlxsw_sp_lpm_tree * -mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) +mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp) { static struct mlxsw_sp_lpm_tree *lpm_tree; int i; - for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { - lpm_tree = &mlxsw_sp->router.lpm_trees[i]; - if (lpm_tree->ref_count == 0) { - if (one_reserved) - one_reserved = false; - else - return lpm_tree; - } + for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router.lpm.trees[i]; + if (lpm_tree->ref_count == 0) + return lpm_tree; } return NULL; } @@ -248,12 +415,12 @@ mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_lpm_tree * mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_prefix_usage *prefix_usage, - enum mlxsw_sp_l3proto proto, bool one_reserved) + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_lpm_tree *lpm_tree; int err; - lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); + lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp); if (!lpm_tree) return ERR_PTR(-EBUSY); lpm_tree->proto = proto; @@ -283,13 +450,13 @@ static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_lpm_tree * mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_prefix_usage *prefix_usage, - enum mlxsw_sp_l3proto proto, bool one_reserved) + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_lpm_tree *lpm_tree; int i; - for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { - lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router.lpm.trees[i]; if (lpm_tree->ref_count != 0 && lpm_tree->proto == proto && mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, @@ -297,7 +464,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, goto inc_ref_count; } lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, - proto, one_reserved); + proto); if (IS_ERR(lpm_tree)) return lpm_tree; @@ -314,15 +481,41 @@ static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) +#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */ + +static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_lpm_tree *lpm_tree; + u64 max_trees; int i; - for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { - lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES)) + return -EIO; + + max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES); + mlxsw_sp->router.lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN; + mlxsw_sp->router.lpm.trees = kcalloc(mlxsw_sp->router.lpm.tree_count, + sizeof(struct mlxsw_sp_lpm_tree), + GFP_KERNEL); + if (!mlxsw_sp->router.lpm.trees) + return -ENOMEM; + + for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router.lpm.trees[i]; lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; } + + return 0; +} + +static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->router.lpm.trees); +} + +static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) +{ + return !!vr->fib4; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -332,31 +525,31 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { vr = &mlxsw_sp->router.vrs[i]; - if (!vr->used) + if (!mlxsw_sp_vr_is_used(vr)) return vr; } return NULL; } static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vr *vr) + const struct mlxsw_sp_fib *fib) { char raltb_pl[MLXSW_REG_RALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, - (enum mlxsw_reg_ralxx_protocol) vr->proto, - vr->lpm_tree->id); + mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, + fib->lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vr *vr) + const struct mlxsw_sp_fib *fib) { char raltb_pl[MLXSW_REG_RALTB_LEN]; /* Bind to tree 0 which is default */ - mlxsw_reg_raltb_pack(raltb_pl, vr->id, - (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); + mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -369,8 +562,7 @@ static u32 mlxsw_sp_fix_tb_id(u32 tb_id) } static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, - u32 tb_id, - enum mlxsw_sp_l3proto proto) + u32 tb_id) { struct mlxsw_sp_vr *vr; int i; @@ -379,69 +571,50 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { vr = &mlxsw_sp->router.vrs[i]; - if (vr->used && vr->proto == proto && vr->tb_id == tb_id) + if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id) return vr; } return NULL; } +static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return vr->fib4; + case MLXSW_SP_L3_PROTO_IPV6: + BUG_ON(1); + } + return NULL; +} + static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, - unsigned char prefix_len, - u32 tb_id, - enum mlxsw_sp_l3proto proto) + u32 tb_id) { - struct mlxsw_sp_prefix_usage req_prefix_usage; - struct mlxsw_sp_lpm_tree *lpm_tree; struct mlxsw_sp_vr *vr; - int err; vr = mlxsw_sp_vr_find_unused(mlxsw_sp); if (!vr) return ERR_PTR(-EBUSY); - vr->fib = mlxsw_sp_fib_create(); - if (IS_ERR(vr->fib)) - return ERR_CAST(vr->fib); - - vr->proto = proto; + vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(vr->fib4)) + return ERR_CAST(vr->fib4); vr->tb_id = tb_id; - mlxsw_sp_prefix_usage_zero(&req_prefix_usage); - mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); - lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, - proto, true); - if (IS_ERR(lpm_tree)) { - err = PTR_ERR(lpm_tree); - goto err_tree_get; - } - vr->lpm_tree = lpm_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); - if (err) - goto err_tree_bind; - - vr->used = true; return vr; - -err_tree_bind: - mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); -err_tree_get: - mlxsw_sp_fib_destroy(vr->fib); - - return ERR_PTR(err); } -static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vr *vr) +static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { - mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); - mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); - mlxsw_sp_fib_destroy(vr->fib); - vr->used = false; + mlxsw_sp_fib_destroy(vr->fib4); + vr->fib4 = NULL; } static int -mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, +mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib, struct mlxsw_sp_prefix_usage *req_prefix_usage) { - struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree; + struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree; struct mlxsw_sp_lpm_tree *new_tree; int err; @@ -449,7 +622,7 @@ mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, return 0; new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, - vr->proto, false); + fib->proto); if (IS_ERR(new_tree)) { /* We failed to get a tree according to the required * prefix usage. However, the current tree might be still good @@ -463,8 +636,8 @@ mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, } /* Prevent packet loss by overwriting existing binding */ - vr->lpm_tree = new_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); + fib->lpm_tree = new_tree; + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); if (err) goto err_tree_bind; mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); @@ -472,53 +645,26 @@ mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, return 0; err_tree_bind: - vr->lpm_tree = lpm_tree; + fib->lpm_tree = lpm_tree; mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); return err; } -static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, - unsigned char prefix_len, - u32 tb_id, - enum mlxsw_sp_l3proto proto) +static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { struct mlxsw_sp_vr *vr; - int err; tb_id = mlxsw_sp_fix_tb_id(tb_id); - vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); - if (!vr) { - vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); - if (IS_ERR(vr)) - return vr; - } else { - struct mlxsw_sp_prefix_usage req_prefix_usage; - - mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, - &vr->fib->prefix_usage); - mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); - /* Need to replace LPM tree in case new prefix is required. */ - err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, - &req_prefix_usage); - if (err) - return ERR_PTR(err); - } + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id); return vr; } -static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) +static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { - /* Destroy virtual router entity in case the associated FIB is empty - * and allow it to be used for other tables in future. Otherwise, - * check if some prefix usage did not disappear and change tree if - * that is the case. Note that in case new, smaller tree cannot be - * allocated, the original one will be kept being used. - */ - if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) - mlxsw_sp_vr_destroy(mlxsw_sp, vr); - else - mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, - &vr->fib->prefix_usage); + if (!vr->rif_count && list_empty(&vr->fib4->node_list)) + mlxsw_sp_vr_destroy(vr); } static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) @@ -627,14 +773,14 @@ static struct mlxsw_sp_neigh_entry * mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { struct mlxsw_sp_neigh_entry *neigh_entry; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; int err; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); - if (!r) + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); + if (!rif) return ERR_PTR(-EINVAL); - neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif); + neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index); if (!neigh_entry) return ERR_PTR(-ENOMEM); @@ -642,7 +788,7 @@ mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) if (err) goto err_neigh_entry_insert; - list_add(&neigh_entry->rif_list_node, &r->neigh_list); + list_add(&neigh_entry->rif_list_node, &rif->neigh_list); return neigh_entry; @@ -1050,22 +1196,22 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) } static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif *r) + const struct mlxsw_sp_rif *rif) { char rauht_pl[MLXSW_REG_RAUHT_LEN]; mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, - r->rif, r->addr); + rif->rif_index, rif->addr); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); } static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r) + struct mlxsw_sp_rif *rif) { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; - mlxsw_sp_neigh_rif_flush(mlxsw_sp, r); - list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list, + mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); + list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, rif_list_node) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); } @@ -1082,7 +1228,7 @@ struct mlxsw_sp_nexthop { */ struct rhash_head ht_node; struct mlxsw_sp_nexthop_key key; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. */ @@ -1109,7 +1255,7 @@ struct mlxsw_sp_nexthop_group { u16 ecmp_size; u16 count; struct mlxsw_sp_nexthop nexthops[0]; -#define nh_rif nexthops[0].r +#define nh_rif nexthops[0].rif }; static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { @@ -1171,7 +1317,7 @@ mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vr *vr, + const struct mlxsw_sp_fib *fib, u32 adj_index, u16 ecmp_size, u32 new_adj_index, u16 new_ecmp_size) @@ -1179,8 +1325,8 @@ static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, char raleu_pl[MLXSW_REG_RALEU_LEN]; mlxsw_reg_raleu_pack(raleu_pl, - (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, - adj_index, ecmp_size, new_adj_index, + (enum mlxsw_reg_ralxx_protocol) fib->proto, + fib->vr->id, adj_index, ecmp_size, new_adj_index, new_ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); } @@ -1190,14 +1336,14 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, u32 old_adj_index, u16 old_ecmp_size) { struct mlxsw_sp_fib_entry *fib_entry; - struct mlxsw_sp_vr *vr = NULL; + struct mlxsw_sp_fib *fib = NULL; int err; list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (vr == fib_entry->fib_node->vr) + if (fib == fib_entry->fib_node->fib) continue; - vr = fib_entry->fib_node->vr; - err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, + fib = fib_entry->fib_node->fib; + err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib, old_adj_index, old_ecmp_size, nh_grp->adj_index, @@ -1280,7 +1426,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, bool old_adj_index_valid; u32 old_adj_index; u16 old_ecmp_size; - int ret; int i; int err; @@ -1318,15 +1463,14 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, */ goto set_trap; - ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); - if (ret < 0) { + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index); + if (err) { /* We ran out of KVD linear space, just set the * trap and let everything flow through kernel. */ dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); goto set_trap; } - adj_index = ret; old_adj_index_valid = nh_grp->adj_index_valid; old_adj_index = nh_grp->adj_index; old_ecmp_size = nh_grp->ecmp_size; @@ -1399,22 +1543,22 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, } static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, - struct mlxsw_sp_rif *r) + struct mlxsw_sp_rif *rif) { - if (nh->r) + if (nh->rif) return; - nh->r = r; - list_add(&nh->rif_list_node, &r->nexthop_list); + nh->rif = rif; + list_add(&nh->rif_list_node, &rif->nexthop_list); } static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) { - if (!nh->r) + if (!nh->rif) return; list_del(&nh->rif_list_node); - nh->r = NULL; + nh->rif = NULL; } static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, @@ -1505,7 +1649,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, { struct net_device *dev = fib_nh->nh_dev; struct in_device *in_dev; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; int err; nh->nh_grp = nh_grp; @@ -1514,15 +1658,18 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, if (err) return err; + if (!dev) + return 0; + in_dev = __in_dev_get_rtnl(dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) return 0; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!r) + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) return 0; - mlxsw_sp_nexthop_rif_init(nh, r); + mlxsw_sp_nexthop_rif_init(nh, rif); err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); if (err) @@ -1548,7 +1695,7 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_key key; struct mlxsw_sp_nexthop *nh; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; if (mlxsw_sp->router.aborted) return; @@ -1558,13 +1705,13 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, if (WARN_ON_ONCE(!nh)) return; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); - if (!r) + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); + if (!rif) return; switch (event) { case FIB_EVENT_NH_ADD: - mlxsw_sp_nexthop_rif_init(nh, r); + mlxsw_sp_nexthop_rif_init(nh, rif); mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); break; case FIB_EVENT_NH_DEL: @@ -1577,11 +1724,11 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, } static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r) + struct mlxsw_sp_rif *rif) { struct mlxsw_sp_nexthop *nh, *tmp; - list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) { + list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); @@ -1699,7 +1846,7 @@ static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) { fib_entry->offloaded = true; - switch (fib_entry->fib_node->vr->proto) { + switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: fib_info_offload_inc(fib_entry->nh_group->key.fi); break; @@ -1711,7 +1858,7 @@ static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) static void mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) { - switch (fib_entry->fib_node->vr->proto) { + switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: fib_info_offload_dec(fib_entry->nh_group->key.fi); break; @@ -1751,8 +1898,8 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, enum mlxsw_reg_ralue_op op) { char ralue_pl[MLXSW_REG_RALUE_LEN]; + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -1772,8 +1919,8 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, } mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) vr->proto, op, - vr->id, fib_entry->fib_node->key.prefix_len, + (enum mlxsw_reg_ralxx_protocol) fib->proto, op, + fib->vr->id, fib_entry->fib_node->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); @@ -1784,27 +1931,28 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif; + struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; u16 trap_id = 0; - u16 rif = 0; + u16 rif_index = 0; if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; - rif = r->rif; + rif_index = rif->rif_index; } else { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) vr->proto, op, - vr->id, fib_entry->fib_node->key.prefix_len, + (enum mlxsw_reg_ralxx_protocol) fib->proto, op, + fib->vr->id, fib_entry->fib_node->key.prefix_len, *p_dip); - mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, + rif_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } @@ -1812,13 +1960,13 @@ static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; char ralue_pl[MLXSW_REG_RALUE_LEN]; u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) vr->proto, op, - vr->id, fib_entry->fib_node->key.prefix_len, + (enum mlxsw_reg_ralxx_protocol) fib->proto, op, + fib->vr->id, fib_entry->fib_node->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -1845,7 +1993,7 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, { int err = -EINVAL; - switch (fib_entry->fib_node->vr->proto) { + switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); break; @@ -1877,17 +2025,29 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, { struct fib_info *fi = fen_info->fi; - if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { + switch (fen_info->type) { + case RTN_BROADCAST: /* fall through */ + case RTN_LOCAL: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; - } - if (fen_info->type != RTN_UNICAST) - return -EINVAL; - if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) + case RTN_UNREACHABLE: /* fall through */ + case RTN_BLACKHOLE: /* fall through */ + case RTN_PROHIBIT: + /* Packets hitting these routes need to be trapped, but + * can do so with a lower priority than packets directed + * at the host, so use action type local instead of trap. + */ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - else - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; - return 0; + return 0; + case RTN_UNICAST: + if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + return 0; + default: + return -EINVAL; + } } static struct mlxsw_sp_fib_entry * @@ -1996,7 +2156,7 @@ mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, } static struct mlxsw_sp_fib_node * -mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr, +mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr, size_t addr_len, unsigned char prefix_len) { struct mlxsw_sp_fib_node *fib_node; @@ -2006,18 +2166,15 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr, return NULL; INIT_LIST_HEAD(&fib_node->entry_list); - list_add(&fib_node->list, &vr->fib->node_list); + list_add(&fib_node->list, &fib->node_list); memcpy(fib_node->key.addr, addr, addr_len); fib_node->key.prefix_len = prefix_len; - mlxsw_sp_fib_node_insert(vr->fib, fib_node); - fib_node->vr = vr; return fib_node; } static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node) { - mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node); list_del(&fib_node->list); WARN_ON(!list_empty(&fib_node->entry_list)); kfree(fib_node); @@ -2034,7 +2191,7 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) { unsigned char prefix_len = fib_node->key.prefix_len; - struct mlxsw_sp_fib *fib = fib_node->vr->fib; + struct mlxsw_sp_fib *fib = fib_node->fib; if (fib->prefix_ref_count[prefix_len]++ == 0) mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); @@ -2043,32 +2200,98 @@ static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node) { unsigned char prefix_len = fib_node->key.prefix_len; - struct mlxsw_sp_fib *fib = fib_node->vr->fib; + struct mlxsw_sp_fib *fib = fib_node->fib; if (--fib->prefix_ref_count[prefix_len] == 0) mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); } +static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage; + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + err = mlxsw_sp_fib_node_insert(fib, fib_node); + if (err) + return err; + fib_node->fib = fib; + + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); + + if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { + err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, + &req_prefix_usage); + if (err) + goto err_tree_check; + } else { + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + return PTR_ERR(lpm_tree); + fib->lpm_tree = lpm_tree; + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); + if (err) + goto err_tree_bind; + } + + mlxsw_sp_fib_node_prefix_inc(fib_node); + + return 0; + +err_tree_bind: + fib->lpm_tree = NULL; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); +err_tree_check: + fib_node->fib = NULL; + mlxsw_sp_fib_node_remove(fib, fib_node); + return err; +} + +static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree; + struct mlxsw_sp_fib *fib = fib_node->fib; + + mlxsw_sp_fib_node_prefix_dec(fib_node); + + if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); + fib->lpm_tree = NULL; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); + } else { + mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage); + } + + fib_node->fib = NULL; + mlxsw_sp_fib_node_remove(fib, fib_node); +} + static struct mlxsw_sp_fib_node * mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, - MLXSW_SP_L3_PROTO_IPV4); + vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id); if (IS_ERR(vr)) return ERR_CAST(vr); + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); - fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst, + fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, sizeof(fen_info->dst), fen_info->dst_len); if (fib_node) return fib_node; - fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst, + fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst, sizeof(fen_info->dst), fen_info->dst_len); if (!fib_node) { @@ -2076,22 +2299,29 @@ mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, goto err_fib_node_create; } + err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib); + if (err) + goto err_fib_node_init; + return fib_node; +err_fib_node_init: + mlxsw_sp_fib_node_destroy(fib_node); err_fib_node_create: - mlxsw_sp_vr_put(mlxsw_sp, vr); + mlxsw_sp_vr_put(vr); return ERR_PTR(err); } static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_vr *vr = fib_node->vr; + struct mlxsw_sp_vr *vr = fib_node->fib->vr; if (!list_empty(&fib_node->entry_list)) return; + mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node); mlxsw_sp_fib_node_destroy(fib_node); - mlxsw_sp_vr_put(mlxsw_sp, vr); + mlxsw_sp_vr_put(vr); } static struct mlxsw_sp_fib_entry * @@ -2236,8 +2466,6 @@ static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, if (err) goto err_fib4_node_entry_add; - mlxsw_sp_fib_node_prefix_inc(fib_node); - return 0; err_fib4_node_entry_add: @@ -2251,7 +2479,6 @@ mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - mlxsw_sp_fib_node_prefix_dec(fib_node); mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); mlxsw_sp_fib4_node_list_remove(fib_entry); } @@ -2340,9 +2567,7 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) { char ralta_pl[MLXSW_REG_RALTA_LEN]; char ralst_pl[MLXSW_REG_RALST_LEN]; - char raltb_pl[MLXSW_REG_RALTB_LEN]; - char ralue_pl[MLXSW_REG_RALUE_LEN]; - int err; + int i, err; mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, MLXSW_SP_LPM_TREE_MIN); @@ -2355,16 +2580,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (err) return err; - mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); - if (err) - return err; + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i]; + char raltb_pl[MLXSW_REG_RALTB_LEN]; + char ralue_pl[MLXSW_REG_RALUE_LEN]; - mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, - MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); - mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + if (!mlxsw_sp_vr_is_used(vr)) + continue; + + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_SP_LPM_TREE_MIN); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), + raltb_pl); + if (err) + return err; + + mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, + MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0, + 0); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), + ralue_pl); + if (err) + return err; + } + + return 0; } static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, @@ -2390,7 +2632,7 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - switch (fib_node->vr->proto) { + switch (fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); break; @@ -2400,26 +2642,32 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto) { + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); struct mlxsw_sp_fib_node *fib_node, *tmp; - struct mlxsw_sp_vr *vr; + + list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) { + bool do_break = &tmp->list == &fib->node_list; + + mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node); + if (do_break) + break; + } +} + +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) +{ int i; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { - vr = &mlxsw_sp->router.vrs[i]; + struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i]; - if (!vr->used) + if (!mlxsw_sp_vr_is_used(vr)) continue; - - list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list, - list) { - bool do_break = &tmp->list == &vr->fib->node_list; - - mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node); - if (do_break) - break; - } + mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); } } @@ -2437,74 +2685,11 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); } -static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) -{ - char ritr_pl[MLXSW_REG_RITR_LEN]; - int err; - - mlxsw_reg_ritr_rif_pack(ritr_pl, rif); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); - if (WARN_ON_ONCE(err)) - return err; - - mlxsw_reg_ritr_enable_set(ritr_pl, false); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} - -void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r) -{ - mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif); - mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r); - mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r); -} - -static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) -{ - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - u64 max_rifs; - int err; - - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) - return -EIO; - - max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); - mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), - GFP_KERNEL); - if (!mlxsw_sp->rifs) - return -ENOMEM; - - mlxsw_reg_rgcr_pack(rgcr_pl, true); - mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - if (err) - goto err_rgcr_fail; - - return 0; - -err_rgcr_fail: - kfree(mlxsw_sp->rifs); - return err; -} - -static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) -{ - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - int i; - - mlxsw_reg_rgcr_pack(rgcr_pl, false); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) - WARN_ON_ONCE(mlxsw_sp->rifs[i]); - - kfree(mlxsw_sp->rifs); -} - struct mlxsw_sp_fib_event_work { struct work_struct work; union { struct fib_entry_notifier_info fen_info; + struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; }; struct mlxsw_sp *mlxsw_sp; @@ -2516,6 +2701,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; bool replace, append; int err; @@ -2539,7 +2725,10 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) break; case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - mlxsw_sp_router_fib4_abort(mlxsw_sp); + rule = fib_work->fr_info.rule; + if (!fib4_rule_default(rule) && !rule->l3mdev) + mlxsw_sp_router_fib4_abort(mlxsw_sp); + fib_rule_put(rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: @@ -2582,6 +2771,11 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, */ fib_info_hold(fib_work->fen_info.fi); break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); @@ -2594,6 +2788,707 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, return NOTIFY_DONE; } +static struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) + return mlxsw_sp->rifs[i]; + + return NULL; +} + +static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (WARN_ON_ONCE(err)) + return err; + + mlxsw_reg_ritr_enable_set(ritr_pl, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index); + mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif); + mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); +} + +static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, + const struct in_device *in_dev, + unsigned long event) +{ + switch (event) { + case NETDEV_UP: + if (!rif) + return true; + return false; + case NETDEV_DOWN: + if (rif && !in_dev->ifa_list && + !netif_is_l3_slave(rif->dev)) + return true; + /* It is possible we already removed the RIF ourselves + * if it was assigned to a netdev that is now a bridge + * or LAG slave. + */ + return false; + } + + return false; +} + +#define MLXSW_SP_INVALID_INDEX_RIF 0xffff +static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + if (!mlxsw_sp->rifs[i]) + return i; + + return MLXSW_SP_INVALID_INDEX_RIF; +} + +static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, + bool *p_lagged, u16 *p_system_port) +{ + u8 local_port = mlxsw_sp_vport->local_port; + + *p_lagged = mlxsw_sp_vport->lagged; + *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port; +} + +static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport, + u16 vr_id, struct net_device *l3_dev, + u16 rif_index, bool create) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + bool lagged = mlxsw_sp_vport->lagged; + char ritr_pl[MLXSW_REG_RITR_LEN]; + u16 system_port; + + mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif_index, + vr_id, l3_dev->mtu, l3_dev->dev_addr); + + mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port); + mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port, + mlxsw_sp_vport_vid_get(mlxsw_sp_vport)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport); + +static u16 mlxsw_sp_rif_sp_to_fid(u16 rif_index) +{ + return MLXSW_SP_RFID_BASE + rif_index; +} + +static struct mlxsw_sp_fid * +mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev) +{ + struct mlxsw_sp_fid *f; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + f->leave = mlxsw_sp_vport_rif_sp_leave; + f->ref_count = 0; + f->dev = l3_dev; + f->fid = fid; + + return f; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_alloc(u16 rif_index, u16 vr_id, struct net_device *l3_dev, + struct mlxsw_sp_fid *f) +{ + struct mlxsw_sp_rif *rif; + + rif = kzalloc(sizeof(*rif), GFP_KERNEL); + if (!rif) + return NULL; + + INIT_LIST_HEAD(&rif->nexthop_list); + INIT_LIST_HEAD(&rif->neigh_list); + ether_addr_copy(rif->addr, l3_dev->dev_addr); + rif->mtu = l3_dev->mtu; + rif->vr_id = vr_id; + rif->dev = l3_dev; + rif->rif_index = rif_index; + rif->f = f; + + return rif; +} + +u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif) +{ + return rif->rif_index; +} + +int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) +{ + return rif->dev->ifindex; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + u32 tb_id = l3mdev_fib_table(l3_dev); + struct mlxsw_sp_vr *vr; + struct mlxsw_sp_fid *f; + struct mlxsw_sp_rif *rif; + u16 fid, rif_index; + int err; + + rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif_index == MLXSW_SP_INVALID_INDEX_RIF) + return ERR_PTR(-ERANGE); + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); + if (IS_ERR(vr)) + return ERR_CAST(vr); + + err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, + rif_index, true); + if (err) + goto err_vport_rif_sp_op; + + fid = mlxsw_sp_rif_sp_to_fid(rif_index); + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true); + if (err) + goto err_rif_fdb_op; + + f = mlxsw_sp_rfid_alloc(fid, l3_dev); + if (!f) { + err = -ENOMEM; + goto err_rfid_alloc; + } + + rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f); + if (!rif) { + err = -ENOMEM; + goto err_rif_alloc; + } + + if (devlink_dpipe_table_counter_enabled(priv_to_devlink(mlxsw_sp->core), + MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) { + err = mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, + MLXSW_SP_RIF_COUNTER_EGRESS); + if (err) + netdev_dbg(mlxsw_sp_vport->dev, + "Counter alloc Failed err=%d\n", err); + } + + f->rif = rif; + mlxsw_sp->rifs[rif_index] = rif; + vr->rif_count++; + + return rif; + +err_rif_alloc: + kfree(f); +err_rfid_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); +err_rif_fdb_op: + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index, + false); +err_vport_rif_sp_op: + mlxsw_sp_vr_put(vr); + return ERR_PTR(err); +} + +static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id]; + struct net_device *l3_dev = rif->dev; + struct mlxsw_sp_fid *f = rif->f; + u16 rif_index = rif->rif_index; + u16 fid = f->fid; + + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); + + mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_INGRESS); + + vr->rif_count--; + mlxsw_sp->rifs[rif_index] = NULL; + f->rif = NULL; + + kfree(rif); + + kfree(f); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); + + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index, + false); + mlxsw_sp_vr_put(vr); +} + +static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (!rif) { + rif = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev); + if (IS_ERR(rif)) + return PTR_ERR(rif); + } + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, rif->f); + rif->f->ref_count++; + + netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", rif->f->fid); + + return 0; +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); + if (--f->ref_count == 0) + mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->rif); +} + +static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev, + struct net_device *port_dev, + unsigned long event, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_vport)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev); + case NETDEV_DOWN: + mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, + unsigned long event) +{ + if (netif_is_bridge_port(port_dev) || + netif_is_lag_port(port_dev) || + netif_is_ovs_port(port_dev)) + return 0; + + return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1); +} + +static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, + struct net_device *lag_dev, + unsigned long event, u16 vid) +{ + struct net_device *port_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(lag_dev, port_dev, iter) { + if (mlxsw_sp_port_dev_check(port_dev)) { + err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev, + event, vid); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, + unsigned long event) +{ + if (netif_is_bridge_port(lag_dev)) + return 0; + + return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); +} + +static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + u16 fid; + + if (is_vlan_dev(l3_dev)) + fid = vlan_dev_vlan_id(l3_dev); + else if (mlxsw_sp->master_bridge.dev == l3_dev) + fid = 1; + else + return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev); + + return mlxsw_sp_fid_find(mlxsw_sp, fid); +} + +static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_core_max_ports(mlxsw_sp->core) + 1; +} + +static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID : + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; +} + +static u16 mlxsw_sp_flood_table_index_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid; +} + +static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid, + bool set) +{ + u8 router_port = mlxsw_sp_router_port(mlxsw_sp); + enum mlxsw_flood_table_type table_type; + char *sftr_pl; + u16 index; + int err; + + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); + if (!sftr_pl) + return -ENOMEM; + + table_type = mlxsw_sp_flood_table_type_get(fid); + index = mlxsw_sp_flood_table_index_get(fid); + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type, + 1, router_port, set); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + + kfree(sftr_pl); + return err; +} + +static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) +{ + if (mlxsw_sp_fid_is_vfid(fid)) + return MLXSW_REG_RITR_FID_IF; + else + return MLXSW_REG_RITR_VLAN_IF; +} + +static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, u16 vr_id, + struct net_device *l3_dev, + u16 fid, u16 rif, + bool create) +{ + enum mlxsw_reg_ritr_if_type rif_type; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + rif_type = mlxsw_sp_rif_type_get(fid); + mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, vr_id, l3_dev->mtu, + l3_dev->dev_addr); + mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + struct mlxsw_sp_fid *f) +{ + u32 tb_id = l3mdev_fib_table(l3_dev); + struct mlxsw_sp_rif *rif; + struct mlxsw_sp_vr *vr; + u16 rif_index; + int err; + + rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif_index == MLXSW_SP_INVALID_INDEX_RIF) + return -ERANGE; + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); + if (err) + goto err_port_flood_set; + + err = mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, + rif_index, true); + if (err) + goto err_rif_bridge_op; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); + if (err) + goto err_rif_fdb_op; + + rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f); + if (!rif) { + err = -ENOMEM; + goto err_rif_alloc; + } + + f->rif = rif; + mlxsw_sp->rifs[rif_index] = rif; + vr->rif_count++; + + netdev_dbg(l3_dev, "RIF=%d created\n", rif_index); + + return 0; + +err_rif_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); +err_rif_fdb_op: + mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index, + false); +err_rif_bridge_op: + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); +err_port_flood_set: + mlxsw_sp_vr_put(vr); + return err; +} + +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id]; + struct net_device *l3_dev = rif->dev; + struct mlxsw_sp_fid *f = rif->f; + u16 rif_index = rif->rif_index; + + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); + + vr->rif_count--; + mlxsw_sp->rifs[rif_index] = NULL; + f->rif = NULL; + + kfree(rif); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); + + mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index, + false); + + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); + + mlxsw_sp_vr_put(vr); + + netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif_index); +} + +static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, + struct net_device *br_dev, + unsigned long event) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); + struct mlxsw_sp_fid *f; + + /* FID can either be an actual FID if the L3 device is the + * VLAN-aware bridge or a VLAN device on top. Otherwise, the + * L3 device is a VLAN-unaware bridge and we get a vFID. + */ + f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); + if (WARN_ON(!f)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f); + case NETDEV_DOWN: + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, + unsigned long event) +{ + struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); + u16 vid = vlan_dev_vlan_id(vlan_dev); + + if (mlxsw_sp_port_dev_check(real_dev)) + return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_lag_master(real_dev)) + return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_bridge_master(real_dev) && + mlxsw_sp->master_bridge.dev == real_dev) + return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev, + event); + + return 0; +} + +static int __mlxsw_sp_inetaddr_event(struct net_device *dev, + unsigned long event) +{ + if (mlxsw_sp_port_dev_check(dev)) + return mlxsw_sp_inetaddr_port_event(dev, event); + else if (netif_is_lag_master(dev)) + return mlxsw_sp_inetaddr_lag_event(dev, event); + else if (netif_is_bridge_master(dev)) + return mlxsw_sp_inetaddr_bridge_event(dev, dev, event); + else if (is_vlan_dev(dev)) + return mlxsw_sp_inetaddr_vlan_event(dev, event); + else + return 0; +} + +int mlxsw_sp_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event)) + goto out; + + err = __mlxsw_sp_inetaddr_event(dev, event); +out: + return notifier_from_errno(err); +} + +static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + const char *mac, int mtu) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_mtu_set(ritr_pl, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac); + mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) +{ + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + return 0; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, false); + if (err) + return err; + + err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr, + dev->mtu); + if (err) + goto err_rif_edit; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, rif->f->fid, true); + if (err) + goto err_rif_fdb_op; + + ether_addr_copy(rif->addr, dev->dev_addr); + rif->mtu = dev->mtu; + + netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index); + + return 0; + +err_rif_fdb_op: + mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu); +err_rif_edit: + mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, true); + return err; +} + +static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + struct mlxsw_sp_rif *rif; + + /* If netdev is already associated with a RIF, then we need to + * destroy it and create a new one with the new virtual router ID. + */ + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (rif) + __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN); + + return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP); +} + +static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (!rif) + return; + __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN); +} + +int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, + struct netdev_notifier_changeupper_info *info) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); + int err = 0; + + if (!mlxsw_sp) + return 0; + + switch (event) { + case NETDEV_PRECHANGEUPPER: + return 0; + case NETDEV_CHANGEUPPER: + if (info->linking) + err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev); + else + mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev); + break; + } + + return err; +} + static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) { struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); @@ -2606,6 +3501,48 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) mlxsw_sp_router_fib_flush(mlxsw_sp); } +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + u64 max_rifs; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) + return -EIO; + + max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), + GFP_KERNEL); + if (!mlxsw_sp->rifs) + return -ENOMEM; + + mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + if (err) + goto err_rgcr_fail; + + return 0; + +err_rgcr_fail: + kfree(mlxsw_sp->rifs); + return err; +} + +static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int i; + + mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); + + kfree(mlxsw_sp->rifs); +} + int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { int err; @@ -2625,7 +3562,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_nexthop_group_ht_init; - mlxsw_sp_lpm_init(mlxsw_sp); + err = mlxsw_sp_lpm_init(mlxsw_sp); + if (err) + goto err_lpm_init; + err = mlxsw_sp_vrs_init(mlxsw_sp); if (err) goto err_vrs_init; @@ -2647,6 +3587,8 @@ err_register_fib_notifier: err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: + mlxsw_sp_lpm_fini(mlxsw_sp); +err_lpm_init: rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); err_nexthop_group_ht_init: rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); @@ -2660,6 +3602,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_fib_notifier(&mlxsw_sp->fib_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); + mlxsw_sp_lpm_fini(mlxsw_sp); rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); __mlxsw_sp_router_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h new file mode 100644 index 000000000000..c3095fef6697 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -0,0 +1,58 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arkadis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_ROUTER_H_ +#define _MLXSW_ROUTER_H_ + +#include "spectrum.h" + +enum mlxsw_sp_rif_counter_dir { + MLXSW_SP_RIF_COUNTER_INGRESS, + MLXSW_SP_RIF_COUNTER_EGRESS, +}; + +u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + u64 *cnt); +void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir); +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir); + +#endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 598727d578c1..0d8411f1f954 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -568,8 +568,8 @@ void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f) list_del(&f->list); - if (f->r) - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + if (f->rif) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); kfree(f); @@ -745,27 +745,6 @@ err_port_allow_untagged_set: return err; } -static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, bool is_member, - bool untagged) -{ - u16 vid, vid_e; - int err; - - for (vid = vid_begin; vid <= vid_end; - vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { - vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), - vid_end); - - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, - is_member, untagged); - if (err) - return err; - } - - return 0; -} - static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool learn_enable) @@ -804,8 +783,8 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, return err; } - err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, - true, flag_untagged); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid_begin, vid_end, + true, flag_untagged); if (err) { netdev_err(dev, "Unable to add VIDs %d-%d\n", vid_begin, vid_end); @@ -863,8 +842,8 @@ err_port_vid_learning_set: if (old_pvid != mlxsw_sp_port->pvid) mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid); err_port_pvid_set: - __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, - false); + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid_begin, vid_end, + false, false); err_port_vlans_set: mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); return err; @@ -1012,7 +991,7 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, mlxsw_reg_smid_pack(smid_pl, mid, mlxsw_sp_port->local_port, add); if (clear_all_ports) { - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) if (mlxsw_sp->ports[i]) mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); } @@ -1171,8 +1150,8 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, if (pvid >= vid_begin && pvid <= vid_end) mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); - __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, - false); + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid_begin, vid_end, + false, false); mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index ec1e886d4566..3b0f72455681 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1321,7 +1321,7 @@ static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx) { int i; - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sx->core); i++) if (mlxsw_sx_port_created(mlxsw_sx, i)) mlxsw_sx_port_remove(mlxsw_sx, i); kfree(mlxsw_sx->ports); @@ -1329,17 +1329,18 @@ static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx) static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx) { + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sx->core); size_t alloc_size; u8 module, width; int i; int err; - alloc_size = sizeof(struct mlxsw_sx_port *) * MLXSW_PORT_MAX_PORTS; + alloc_size = sizeof(struct mlxsw_sx_port *) * max_ports; mlxsw_sx->ports = kzalloc(alloc_size, GFP_KERNEL); if (!mlxsw_sx->ports) return -ENOMEM; - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { + for (i = 1; i < max_ports; i++) { err = mlxsw_sx_port_module_info_get(mlxsw_sx, i, &module, &width); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 02ea48b15eb5..e008fdbed20f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -55,6 +55,7 @@ enum { MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33, MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, MLXSW_TRAP_ID_PKT_SAMPLE = 0x38, + MLXSW_TRAP_ID_FID_MISS = 0x3D, MLXSW_TRAP_ID_ARPBC = 0x50, MLXSW_TRAP_ID_ARPUC = 0x51, MLXSW_TRAP_ID_MTUERROR = 0x52, |