diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 02:40:27 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-05-03 02:40:27 +0300 |
commit | 8d65b08debc7e62b2c6032d7fe7389d895b92cbc (patch) | |
tree | 0c3141b60c3a03cc32742b5750c5e763b9dae489 /drivers/net/ethernet/mellanox | |
parent | 5a0387a8a8efb90ae7fea1e2e5c62de3efa74691 (diff) | |
parent | 5d15af6778b8e4ed1fd41b040283af278e7a9a72 (diff) | |
download | linux-8d65b08debc7e62b2c6032d7fe7389d895b92cbc.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Millar:
"Here are some highlights from the 2065 networking commits that
happened this development cycle:
1) XDP support for IXGBE (John Fastabend) and thunderx (Sunil Kowuri)
2) Add a generic XDP driver, so that anyone can test XDP even if they
lack a networking device whose driver has explicit XDP support
(me).
3) Sparc64 now has an eBPF JIT too (me)
4) Add a BPF program testing framework via BPF_PROG_TEST_RUN (Alexei
Starovoitov)
5) Make netfitler network namespace teardown less expensive (Florian
Westphal)
6) Add symmetric hashing support to nft_hash (Laura Garcia Liebana)
7) Implement NAPI and GRO in netvsc driver (Stephen Hemminger)
8) Support TC flower offload statistics in mlxsw (Arkadi Sharshevsky)
9) Multiqueue support in stmmac driver (Joao Pinto)
10) Remove TCP timewait recycling, it never really could possibly work
well in the real world and timestamp randomization really zaps any
hint of usability this feature had (Soheil Hassas Yeganeh)
11) Support level3 vs level4 ECMP route hashing in ipv4 (Nikolay
Aleksandrov)
12) Add socket busy poll support to epoll (Sridhar Samudrala)
13) Netlink extended ACK support (Johannes Berg, Pablo Neira Ayuso,
and several others)
14) IPSEC hw offload infrastructure (Steffen Klassert)"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (2065 commits)
tipc: refactor function tipc_sk_recv_stream()
tipc: refactor function tipc_sk_recvmsg()
net: thunderx: Optimize page recycling for XDP
net: thunderx: Support for XDP header adjustment
net: thunderx: Add support for XDP_TX
net: thunderx: Add support for XDP_DROP
net: thunderx: Add basic XDP support
net: thunderx: Cleanup receive buffer allocation
net: thunderx: Optimize CQE_TX handling
net: thunderx: Optimize RBDR descriptor handling
net: thunderx: Support for page recycling
ipx: call ipxitf_put() in ioctl error path
net: sched: add helpers to handle extended actions
qed*: Fix issues in the ptp filter config implementation.
qede: Fix concurrency issue in PTP Tx path processing.
stmmac: Add support for SIMATIC IOT2000 platform
net: hns: fix ethtool_get_strings overflow in hns driver
tcp: fix wraparound issue in tcp_lp
bpf, arm64: fix jit branch offset related to ldimm64
bpf, arm64: implement jiting of BPF_XADD
...
Diffstat (limited to 'drivers/net/ethernet/mellanox')
69 files changed, 8028 insertions, 3854 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index c4d714fcc7da..ffbcb27c05e5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -117,7 +117,7 @@ static const char main_strings[][ETH_GSTRING_LEN] = { /* port statistics */ "tso_packets", "xmit_more", - "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed", + "queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_pages", "rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload", /* pf statistics */ diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 61420473fe5f..94fab20ef146 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -92,7 +92,9 @@ static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto, if (tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return mlx4_en_setup_tc(dev, tc->tc); + tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + return mlx4_en_setup_tc(dev, tc->mqprio->num_tc); } #ifdef CONFIG_RFS_ACCEL diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c index 9166d90e7328..e0eb695318e6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_port.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c @@ -213,6 +213,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.rx_chksum_good = 0; priv->port_stats.rx_chksum_none = 0; priv->port_stats.rx_chksum_complete = 0; + priv->port_stats.rx_alloc_pages = 0; priv->xdp_stats.rx_xdp_drop = 0; priv->xdp_stats.rx_xdp_tx = 0; priv->xdp_stats.rx_xdp_tx_full = 0; @@ -223,6 +224,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset) priv->port_stats.rx_chksum_good += READ_ONCE(ring->csum_ok); priv->port_stats.rx_chksum_none += READ_ONCE(ring->csum_none); priv->port_stats.rx_chksum_complete += READ_ONCE(ring->csum_complete); + priv->port_stats.rx_alloc_pages += READ_ONCE(ring->rx_alloc_pages); priv->xdp_stats.rx_xdp_drop += READ_ONCE(ring->xdp_drop); priv->xdp_stats.rx_xdp_tx += READ_ONCE(ring->xdp_tx); priv->xdp_stats.rx_xdp_tx_full += READ_ONCE(ring->xdp_tx_full); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 867292880c07..aa074e57ce06 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -50,173 +50,62 @@ #include "mlx4_en.h" -static int mlx4_alloc_pages(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *page_alloc, - const struct mlx4_en_frag_info *frag_info, - gfp_t _gfp) +static int mlx4_alloc_page(struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frag, + gfp_t gfp) { - int order; struct page *page; dma_addr_t dma; - for (order = frag_info->order; ;) { - gfp_t gfp = _gfp; - - if (order) - gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC; - page = alloc_pages(gfp, order); - if (likely(page)) - break; - if (--order < 0 || - ((PAGE_SIZE << order) < frag_info->frag_size)) - return -ENOMEM; - } - dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, - frag_info->dma_dir); + page = alloc_page(gfp); + if (unlikely(!page)) + return -ENOMEM; + dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE, priv->dma_dir); if (unlikely(dma_mapping_error(priv->ddev, dma))) { - put_page(page); + __free_page(page); return -ENOMEM; } - page_alloc->page_size = PAGE_SIZE << order; - page_alloc->page = page; - page_alloc->dma = dma; - page_alloc->page_offset = 0; - /* Not doing get_page() for each frag is a big win - * on asymetric workloads. Note we can not use atomic_set(). - */ - page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1); + frag->page = page; + frag->dma = dma; + frag->page_offset = priv->rx_headroom; return 0; } static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, + struct mlx4_en_rx_ring *ring, struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, - struct mlx4_en_rx_alloc *ring_alloc, gfp_t gfp) { - struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; - const struct mlx4_en_frag_info *frag_info; - struct page *page; int i; - for (i = 0; i < priv->num_frags; i++) { - frag_info = &priv->frag_info[i]; - page_alloc[i] = ring_alloc[i]; - page_alloc[i].page_offset += frag_info->frag_stride; - - if (page_alloc[i].page_offset + frag_info->frag_stride <= - ring_alloc[i].page_size) - continue; - - if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], - frag_info, gfp))) - goto out; - } - - for (i = 0; i < priv->num_frags; i++) { - frags[i] = ring_alloc[i]; - frags[i].page_offset += priv->frag_info[i].rx_headroom; - rx_desc->data[i].addr = cpu_to_be64(frags[i].dma + - frags[i].page_offset); - ring_alloc[i] = page_alloc[i]; - } - - return 0; - -out: - while (i--) { - if (page_alloc[i].page != ring_alloc[i].page) { - dma_unmap_page(priv->ddev, page_alloc[i].dma, - page_alloc[i].page_size, - priv->frag_info[i].dma_dir); - page = page_alloc[i].page; - /* Revert changes done by mlx4_alloc_pages */ - page_ref_sub(page, page_alloc[i].page_size / - priv->frag_info[i].frag_stride - 1); - put_page(page); + for (i = 0; i < priv->num_frags; i++, frags++) { + if (!frags->page) { + if (mlx4_alloc_page(priv, frags, gfp)) + return -ENOMEM; + ring->rx_alloc_pages++; } - } - return -ENOMEM; -} - -static void mlx4_en_free_frag(struct mlx4_en_priv *priv, - struct mlx4_en_rx_alloc *frags, - int i) -{ - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride; - - - if (next_frag_end > frags[i].page_size) - dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, - frag_info->dma_dir); - - if (frags[i].page) - put_page(frags[i].page); -} - -static int mlx4_en_init_allocator(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) -{ - int i; - struct mlx4_en_rx_alloc *page_alloc; - - for (i = 0; i < priv->num_frags; i++) { - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - - if (mlx4_alloc_pages(priv, &ring->page_alloc[i], - frag_info, GFP_KERNEL | __GFP_COLD)) - goto out; - - en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n", - i, ring->page_alloc[i].page_size, - page_ref_count(ring->page_alloc[i].page)); + rx_desc->data[i].addr = cpu_to_be64(frags->dma + + frags->page_offset); } return 0; - -out: - while (i--) { - struct page *page; - - page_alloc = &ring->page_alloc[i]; - dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, - priv->frag_info[i].dma_dir); - page = page_alloc->page; - /* Revert changes done by mlx4_alloc_pages */ - page_ref_sub(page, page_alloc->page_size / - priv->frag_info[i].frag_stride - 1); - put_page(page); - page_alloc->page = NULL; - } - return -ENOMEM; } -static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, - struct mlx4_en_rx_ring *ring) +static void mlx4_en_free_frag(const struct mlx4_en_priv *priv, + struct mlx4_en_rx_alloc *frag) { - struct mlx4_en_rx_alloc *page_alloc; - int i; - - for (i = 0; i < priv->num_frags; i++) { - const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i]; - - page_alloc = &ring->page_alloc[i]; - en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n", - i, page_count(page_alloc->page)); - - dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, frag_info->dma_dir); - while (page_alloc->page_offset + frag_info->frag_stride < - page_alloc->page_size) { - put_page(page_alloc->page); - page_alloc->page_offset += frag_info->frag_stride; - } - page_alloc->page = NULL; + if (frag->page) { + dma_unmap_page(priv->ddev, frag->dma, + PAGE_SIZE, priv->dma_dir); + __free_page(frag->page); } + /* We need to clear all fields, otherwise a change of priv->log_rx_info + * could lead to see garbage later in frag->page. + */ + memset(frag, 0, sizeof(*frag)); } -static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, +static void mlx4_en_init_rx_desc(const struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index; @@ -248,18 +137,23 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride); struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); - if (ring->page_cache.index > 0) { - frags[0] = ring->page_cache.buf[--ring->page_cache.index]; - rx_desc->data[0].addr = cpu_to_be64(frags[0].dma + - frags[0].page_offset); + /* XDP uses a single page per frame */ + if (!frags->page) { + ring->page_cache.index--; + frags->page = ring->page_cache.buf[ring->page_cache.index].page; + frags->dma = ring->page_cache.buf[ring->page_cache.index].dma; + } + frags->page_offset = XDP_PACKET_HEADROOM; + rx_desc->data[0].addr = cpu_to_be64(frags->dma + + XDP_PACKET_HEADROOM); return 0; } - return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp); + return mlx4_en_alloc_frags(priv, ring, rx_desc, frags, gfp); } -static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring) +static bool mlx4_en_is_ring_empty(const struct mlx4_en_rx_ring *ring) { return ring->prod == ring->cons; } @@ -269,7 +163,8 @@ static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring) *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff); } -static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, +/* slow path */ +static void mlx4_en_free_rx_desc(const struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring, int index) { @@ -279,7 +174,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, frags = ring->rx_info + (index << priv->log_rx_info); for (nr = 0; nr < priv->num_frags; nr++) { en_dbg(DRV, priv, "Freeing fragment:%d\n", nr); - mlx4_en_free_frag(priv, frags, nr); + mlx4_en_free_frag(priv, frags + nr); } } @@ -335,12 +230,12 @@ static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv, ring->cons, ring->prod); /* Unmap and free Rx buffers */ - while (!mlx4_en_is_ring_empty(ring)) { - index = ring->cons & ring->size_mask; + for (index = 0; index < ring->size; index++) { en_dbg(DRV, priv, "Processing descriptor:%d\n", index); mlx4_en_free_rx_desc(priv, ring, index); - ++ring->cons; } + ring->cons = 0; + ring->prod = 0; } void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev) @@ -392,9 +287,9 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS * sizeof(struct mlx4_en_rx_alloc)); - ring->rx_info = vmalloc_node(tmp, node); + ring->rx_info = vzalloc_node(tmp, node); if (!ring->rx_info) { - ring->rx_info = vmalloc(tmp); + ring->rx_info = vzalloc(tmp); if (!ring->rx_info) { err = -ENOMEM; goto err_ring; @@ -464,16 +359,6 @@ int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv) /* Initialize all descriptors */ for (i = 0; i < ring->size; i++) mlx4_en_init_rx_desc(priv, ring, i); - - /* Initialize page allocators */ - err = mlx4_en_init_allocator(priv, ring); - if (err) { - en_err(priv, "Failed initializing ring allocator\n"); - if (ring->stride <= TXBB_SIZE) - ring->buf -= TXBB_SIZE; - ring_ind--; - goto err_allocator; - } } err = mlx4_en_fill_rx_buffers(priv); if (err) @@ -493,11 +378,9 @@ err_buffers: mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]); ring_ind = priv->rx_ring_num - 1; -err_allocator: while (ring_ind >= 0) { if (priv->rx_ring[ring_ind]->stride <= TXBB_SIZE) priv->rx_ring[ring_ind]->buf -= TXBB_SIZE; - mlx4_en_destroy_allocator(priv, priv->rx_ring[ring_ind]); ring_ind--; } return err; @@ -537,7 +420,9 @@ bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, if (cache->index >= MLX4_EN_CACHE_SIZE) return false; - cache->buf[cache->index++] = *frame; + cache->buf[cache->index].page = frame->page; + cache->buf[cache->index].dma = frame->dma; + cache->index++; return true; } @@ -567,136 +452,91 @@ void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, int i; for (i = 0; i < ring->page_cache.index; i++) { - struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i]; - - dma_unmap_page(priv->ddev, frame->dma, frame->page_size, - priv->frag_info[0].dma_dir); - put_page(frame->page); + dma_unmap_page(priv->ddev, ring->page_cache.buf[i].dma, + PAGE_SIZE, priv->dma_dir); + put_page(ring->page_cache.buf[i].page); } ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; - mlx4_en_destroy_allocator(priv, ring); } static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, struct mlx4_en_rx_alloc *frags, struct sk_buff *skb, int length) { - struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags; - struct mlx4_en_frag_info *frag_info; - int nr; + const struct mlx4_en_frag_info *frag_info = priv->frag_info; + unsigned int truesize = 0; + int nr, frag_size; + struct page *page; dma_addr_t dma; + bool release; /* Collect used fragments while replacing them in the HW descriptors */ - for (nr = 0; nr < priv->num_frags; nr++) { - frag_info = &priv->frag_info[nr]; - if (length <= frag_info->frag_prefix_size) - break; - if (unlikely(!frags[nr].page)) + for (nr = 0;; frags++) { + frag_size = min_t(int, length, frag_info->frag_size); + + page = frags->page; + if (unlikely(!page)) goto fail; - dma = be64_to_cpu(rx_desc->data[nr].addr); - dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size, - DMA_FROM_DEVICE); + dma = frags->dma; + dma_sync_single_range_for_cpu(priv->ddev, dma, frags->page_offset, + frag_size, priv->dma_dir); + + __skb_fill_page_desc(skb, nr, page, frags->page_offset, + frag_size); - __skb_fill_page_desc(skb, nr, frags[nr].page, - frags[nr].page_offset, - frag_info->frag_size); + truesize += frag_info->frag_stride; + if (frag_info->frag_stride == PAGE_SIZE / 2) { + frags->page_offset ^= PAGE_SIZE / 2; + release = page_count(page) != 1 || + page_is_pfmemalloc(page) || + page_to_nid(page) != numa_mem_id(); + } else { + u32 sz_align = ALIGN(frag_size, SMP_CACHE_BYTES); - skb->truesize += frag_info->frag_stride; - frags[nr].page = NULL; + frags->page_offset += sz_align; + release = frags->page_offset + frag_info->frag_size > PAGE_SIZE; + } + if (release) { + dma_unmap_page(priv->ddev, dma, PAGE_SIZE, priv->dma_dir); + frags->page = NULL; + } else { + page_ref_inc(page); + } + + nr++; + length -= frag_size; + if (!length) + break; + frag_info++; } - /* Adjust size of last fragment to match actual length */ - if (nr > 0) - skb_frag_size_set(&skb_frags_rx[nr - 1], - length - priv->frag_info[nr - 1].frag_prefix_size); + skb->truesize += truesize; return nr; fail: while (nr > 0) { nr--; - __skb_frag_unref(&skb_frags_rx[nr]); + __skb_frag_unref(skb_shinfo(skb)->frags + nr); } return 0; } - -static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, - struct mlx4_en_rx_desc *rx_desc, - struct mlx4_en_rx_alloc *frags, - unsigned int length) -{ - struct sk_buff *skb; - void *va; - int used_frags; - dma_addr_t dma; - - skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN); - if (unlikely(!skb)) { - en_dbg(RX_ERR, priv, "Failed allocating skb\n"); - return NULL; - } - skb_reserve(skb, NET_IP_ALIGN); - skb->len = length; - - /* Get pointer to first fragment so we could copy the headers into the - * (linear part of the) skb */ - va = page_address(frags[0].page) + frags[0].page_offset; - - if (length <= SMALL_PACKET_SIZE) { - /* We are copying all relevant data to the skb - temporarily - * sync buffers for the copy */ - dma = be64_to_cpu(rx_desc->data[0].addr); - dma_sync_single_for_cpu(priv->ddev, dma, length, - DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, va, length); - skb->tail += length; - } else { - unsigned int pull_len; - - /* Move relevant fragments to skb */ - used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, frags, - skb, length); - if (unlikely(!used_frags)) { - kfree_skb(skb); - return NULL; - } - skb_shinfo(skb)->nr_frags = used_frags; - - pull_len = eth_get_headlen(va, SMALL_PACKET_SIZE); - /* Copy headers into the skb linear buffer */ - memcpy(skb->data, va, pull_len); - skb->tail += pull_len; - - /* Skip headers in first fragment */ - skb_shinfo(skb)->frags[0].page_offset += pull_len; - - /* Adjust size of first fragment */ - skb_frag_size_sub(&skb_shinfo(skb)->frags[0], pull_len); - skb->data_len = length - pull_len; - } - return skb; -} - -static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb) +static void validate_loopback(struct mlx4_en_priv *priv, void *va) { + const unsigned char *data = va + ETH_HLEN; int i; - int offset = ETH_HLEN; - for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) { - if (*(skb->data + offset) != (unsigned char) (i & 0xff)) - goto out_loopback; + for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++) { + if (data[i] != (unsigned char)i) + return; } /* Loopback found */ priv->loopback_ok = 1; - -out_loopback: - dev_kfree_skb_any(skb); } static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv, @@ -801,7 +641,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct mlx4_cqe *cqe; struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mlx4_en_rx_alloc *frags; - struct mlx4_en_rx_desc *rx_desc; struct bpf_prog *xdp_prog; int doorbell_pending; struct sk_buff *skb; @@ -834,10 +673,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Process all completed CQEs */ while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK, cq->mcq.cons_index & cq->size)) { + void *va; frags = ring->rx_info + (index << priv->log_rx_info); - rx_desc = ring->buf + (index << ring->log_stride); - + va = page_address(frags[0].page) + frags[0].page_offset; /* * make sure we read the CQE after we read the ownership bit */ @@ -860,16 +699,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud * and not performing the selftest or flb disabled */ if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) { - struct ethhdr *ethh; + const struct ethhdr *ethh = va; dma_addr_t dma; /* Get pointer to first fragment since we haven't * skb yet and cast it to ethhdr struct */ - dma = be64_to_cpu(rx_desc->data[0].addr); + dma = frags[0].dma + frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh), DMA_FROM_DEVICE); - ethh = (struct ethhdr *)(page_address(frags[0].page) + - frags[0].page_offset); if (is_multicast_ether_addr(ethh->h_dest)) { struct mlx4_mac_entry *entry; @@ -887,13 +724,16 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud } } + if (unlikely(priv->validate_loopback)) { + validate_loopback(priv, va); + goto next; + } + /* * Packet is OK - process it. */ length = be32_to_cpu(cqe->byte_cnt); length -= ring->fcs_del; - l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && - (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); /* A bpf program gets first chance to drop the packet. It may * read bytes but not past the end of the frag. @@ -904,13 +744,13 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud void *orig_data; u32 act; - dma = be64_to_cpu(rx_desc->data[0].addr); + dma = frags[0].dma + frags[0].page_offset; dma_sync_single_for_cpu(priv->ddev, dma, priv->frag_info[0].frag_size, DMA_FROM_DEVICE); - xdp.data_hard_start = page_address(frags[0].page); - xdp.data = xdp.data_hard_start + frags[0].page_offset; + xdp.data_hard_start = va - frags[0].page_offset; + xdp.data = va; xdp.data_end = xdp.data + length; orig_data = xdp.data; @@ -920,6 +760,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud length = xdp.data_end - xdp.data; frags[0].page_offset = xdp.data - xdp.data_hard_start; + va = xdp.data; } switch (act) { @@ -928,8 +769,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_TX: if (likely(!mlx4_en_xmit_frame(ring, frags, dev, length, cq->ring, - &doorbell_pending))) - goto consumed; + &doorbell_pending))) { + frags[0].page = NULL; + goto next; + } trace_xdp_exception(dev, xdp_prog, act); goto xdp_drop_no_cnt; /* Drop on xmit failure */ default: @@ -939,8 +782,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_DROP: ring->xdp_drop++; xdp_drop_no_cnt: - if (likely(mlx4_en_rx_recycle(ring, frags))) - goto consumed; goto next; } } @@ -948,129 +789,51 @@ xdp_drop_no_cnt: ring->bytes += length; ring->packets++; + skb = napi_get_frags(&cq->napi); + if (!skb) + goto next; + + if (unlikely(ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL)) { + timestamp = mlx4_en_get_cqe_ts(cqe); + mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb), + timestamp); + } + skb_record_rx_queue(skb, cq->ring); + if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && cqe->checksum == cpu_to_be16(0xffff)) { ip_summed = CHECKSUM_UNNECESSARY; + l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && + (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + if (l2_tunnel) + skb->csum_level = 1; ring->csum_ok++; } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + goto csum_none; } } else { if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP && (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV6))) { - ip_summed = CHECKSUM_COMPLETE; - ring->csum_complete++; + if (check_csum(cqe, skb, va, dev->features)) { + goto csum_none; + } else { + ip_summed = CHECKSUM_COMPLETE; + ring->csum_complete++; + } } else { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; + goto csum_none; } } } else { +csum_none: ip_summed = CHECKSUM_NONE; ring->csum_none++; } - - /* This packet is eligible for GRO if it is: - * - DIX Ethernet (type interpretation) - * - TCP/IP (v4) - * - without IP options - * - not an IP fragment - */ - if (dev->features & NETIF_F_GRO) { - struct sk_buff *gro_skb = napi_get_frags(&cq->napi); - if (!gro_skb) - goto next; - - nr = mlx4_en_complete_rx_desc(priv, - rx_desc, frags, gro_skb, - length); - if (!nr) - goto next; - - if (ip_summed == CHECKSUM_COMPLETE) { - void *va = skb_frag_address(skb_shinfo(gro_skb)->frags); - if (check_csum(cqe, gro_skb, va, - dev->features)) { - ip_summed = CHECKSUM_NONE; - ring->csum_none++; - ring->csum_complete--; - } - } - - skb_shinfo(gro_skb)->nr_frags = nr; - gro_skb->len = length; - gro_skb->data_len = length; - gro_skb->ip_summed = ip_summed; - - if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY) - gro_skb->csum_level = 1; - - if ((cqe->vlan_my_qpn & - cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && - (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) { - u16 vid = be16_to_cpu(cqe->sl_vid); - - __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid); - } else if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_SVLAN_PRESENT_MASK) && - (dev->features & NETIF_F_HW_VLAN_STAG_RX)) { - __vlan_hwaccel_put_tag(gro_skb, - htons(ETH_P_8021AD), - be16_to_cpu(cqe->sl_vid)); - } - - if (dev->features & NETIF_F_RXHASH) - skb_set_hash(gro_skb, - be32_to_cpu(cqe->immed_rss_invalid), - (ip_summed == CHECKSUM_UNNECESSARY) ? - PKT_HASH_TYPE_L4 : - PKT_HASH_TYPE_L3); - - skb_record_rx_queue(gro_skb, cq->ring); - - if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, - skb_hwtstamps(gro_skb), - timestamp); - } - - napi_gro_frags(&cq->napi); - goto next; - } - - /* GRO not possible, complete processing here */ - skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); - if (unlikely(!skb)) { - ring->dropped++; - goto next; - } - - if (unlikely(priv->validate_loopback)) { - validate_loopback(priv, skb); - goto next; - } - - if (ip_summed == CHECKSUM_COMPLETE) { - if (check_csum(cqe, skb, skb->data, dev->features)) { - ip_summed = CHECKSUM_NONE; - ring->csum_complete--; - ring->csum_none++; - } - } - skb->ip_summed = ip_summed; - skb->protocol = eth_type_trans(skb, dev); - skb_record_rx_queue(skb, cq->ring); - - if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY) - skb->csum_level = 1; - if (dev->features & NETIF_F_RXHASH) skb_set_hash(skb, be32_to_cpu(cqe->immed_rss_invalid), @@ -1078,36 +841,36 @@ xdp_drop_no_cnt: PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); - if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_CVLAN_PRESENT_MASK) && + + if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) && (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid)); - else if ((be32_to_cpu(cqe->vlan_my_qpn) & - MLX4_CQE_SVLAN_PRESENT_MASK) && + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), + be16_to_cpu(cqe->sl_vid)); + else if ((cqe->vlan_my_qpn & + cpu_to_be32(MLX4_CQE_SVLAN_PRESENT_MASK)) && (dev->features & NETIF_F_HW_VLAN_STAG_RX)) __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), be16_to_cpu(cqe->sl_vid)); - if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) { - timestamp = mlx4_en_get_cqe_ts(cqe); - mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb), - timestamp); + nr = mlx4_en_complete_rx_desc(priv, frags, skb, length); + if (likely(nr)) { + skb_shinfo(skb)->nr_frags = nr; + skb->len = length; + skb->data_len = length; + napi_gro_frags(&cq->napi); + } else { + skb->vlan_tci = 0; + skb_clear_hash(skb); } - - napi_gro_receive(&cq->napi, skb); next: - for (nr = 0; nr < priv->num_frags; nr++) - mlx4_en_free_frag(priv, frags, nr); - -consumed: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; if (++polled == budget) - goto out; + break; } -out: rcu_read_unlock(); if (polled) { @@ -1178,13 +941,6 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget) return done; } -static const int frag_sizes[] = { - FRAG_SZ0, - FRAG_SZ1, - FRAG_SZ2, - FRAG_SZ3 -}; - void mlx4_en_calc_rx_buf(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1195,33 +951,43 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) * This only works when num_frags == 1. */ if (priv->tx_ring_num[TX_XDP]) { - priv->frag_info[0].order = 0; priv->frag_info[0].frag_size = eff_mtu; - priv->frag_info[0].frag_prefix_size = 0; /* This will gain efficient xdp frame recycling at the * expense of more costly truesize accounting */ priv->frag_info[0].frag_stride = PAGE_SIZE; - priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL; - priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM; + priv->dma_dir = PCI_DMA_BIDIRECTIONAL; + priv->rx_headroom = XDP_PACKET_HEADROOM; i = 1; } else { - int buf_size = 0; + int frag_size_max = 2048, buf_size = 0; + + /* should not happen, right ? */ + if (eff_mtu > PAGE_SIZE + (MLX4_EN_MAX_RX_FRAGS - 1) * 2048) + frag_size_max = PAGE_SIZE; while (buf_size < eff_mtu) { - priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER; - priv->frag_info[i].frag_size = - (eff_mtu > buf_size + frag_sizes[i]) ? - frag_sizes[i] : eff_mtu - buf_size; - priv->frag_info[i].frag_prefix_size = buf_size; - priv->frag_info[i].frag_stride = - ALIGN(priv->frag_info[i].frag_size, - SMP_CACHE_BYTES); - priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE; - priv->frag_info[i].rx_headroom = 0; - buf_size += priv->frag_info[i].frag_size; + int frag_stride, frag_size = eff_mtu - buf_size; + int pad, nb; + + if (i < MLX4_EN_MAX_RX_FRAGS - 1) + frag_size = min(frag_size, frag_size_max); + + priv->frag_info[i].frag_size = frag_size; + frag_stride = ALIGN(frag_size, SMP_CACHE_BYTES); + /* We can only pack 2 1536-bytes frames in on 4K page + * Therefore, each frame would consume more bytes (truesize) + */ + nb = PAGE_SIZE / frag_stride; + pad = (PAGE_SIZE - nb * frag_stride) / nb; + pad &= ~(SMP_CACHE_BYTES - 1); + priv->frag_info[i].frag_stride = frag_stride + pad; + + buf_size += frag_size; i++; } + priv->dma_dir = PCI_DMA_FROMDEVICE; + priv->rx_headroom = 0; } priv->num_frags = i; @@ -1232,10 +998,9 @@ void mlx4_en_calc_rx_buf(struct net_device *dev) eff_mtu, priv->num_frags); for (i = 0; i < priv->num_frags; i++) { en_err(priv, - " frag:%d - size:%d prefix:%d stride:%d\n", + " frag:%d - size:%d stride:%d\n", i, priv->frag_info[i].frag_size, - priv->frag_info[i].frag_prefix_size, priv->frag_info[i].frag_stride); } } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c index 95290e1fc9fe..17112faafbcc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c @@ -81,14 +81,11 @@ static int mlx4_en_test_loopback(struct mlx4_en_priv *priv) { u32 loopback_ok = 0; int i; - bool gro_enabled; priv->loopback_ok = 0; priv->validate_loopback = 1; - gro_enabled = priv->dev->features & NETIF_F_GRO; mlx4_en_update_loopback_state(priv->dev, priv->dev->features); - priv->dev->features &= ~NETIF_F_GRO; /* xmit */ if (mlx4_en_test_loopback_xmit(priv)) { @@ -111,9 +108,6 @@ mlx4_en_test_loopback_exit: priv->validate_loopback = 0; - if (gro_enabled) - priv->dev->features |= NETIF_F_GRO; - mlx4_en_update_loopback_state(priv->dev, priv->dev->features); return !loopback_ok; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 3ed42199d3f1..3ba89bc43d74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -354,13 +354,11 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc frame = { .page = tx_info->page, .dma = tx_info->map0_dma, - .page_offset = XDP_PACKET_HEADROOM, - .page_size = PAGE_SIZE, }; if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { dma_unmap_page(priv->ddev, tx_info->map0_dma, - PAGE_SIZE, priv->frag_info[0].dma_dir); + PAGE_SIZE, priv->dma_dir); put_page(tx_info->page); } @@ -980,8 +978,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) ring->tso_packets++; - i = ((skb->len - lso_header_size) / shinfo->gso_size) + - !!((skb->len - lso_header_size) % shinfo->gso_size); + i = shinfo->gso_segs; tx_info->nr_bytes = skb->len + (i - 1) * lso_header_size; ring->packets += i; } else { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 3629ce11a68b..39f401aa3047 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -102,17 +102,6 @@ /* Use the maximum between 16384 and a single page */ #define MLX4_EN_ALLOC_SIZE PAGE_ALIGN(16384) -#define MLX4_EN_ALLOC_PREFER_ORDER min_t(int, get_order(32768), \ - PAGE_ALLOC_COSTLY_ORDER) - -/* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU - * and 4K allocations) */ -enum { - FRAG_SZ0 = 1536 - NET_IP_ALIGN, - FRAG_SZ1 = 4096, - FRAG_SZ2 = 4096, - FRAG_SZ3 = MLX4_EN_ALLOC_SIZE -}; #define MLX4_EN_MAX_RX_FRAGS 4 /* Maximum ring sizes */ @@ -264,13 +253,16 @@ struct mlx4_en_rx_alloc { struct page *page; dma_addr_t dma; u32 page_offset; - u32 page_size; }; #define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) + struct mlx4_en_page_cache { u32 index; - struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE]; + struct { + struct page *page; + dma_addr_t dma; + } buf[MLX4_EN_CACHE_SIZE]; }; struct mlx4_en_priv; @@ -335,7 +327,6 @@ struct mlx4_en_rx_desc { struct mlx4_en_rx_ring { struct mlx4_hwq_resources wqres; - struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS]; u32 size ; /* number of Rx descs*/ u32 actual_size; u32 size_mask; @@ -355,6 +346,7 @@ struct mlx4_en_rx_ring { unsigned long csum_ok; unsigned long csum_none; unsigned long csum_complete; + unsigned long rx_alloc_pages; unsigned long xdp_drop; unsigned long xdp_tx; unsigned long xdp_tx_full; @@ -472,11 +464,7 @@ struct mlx4_en_mc_list { struct mlx4_en_frag_info { u16 frag_size; - u16 frag_prefix_size; u32 frag_stride; - enum dma_data_direction dma_dir; - u16 order; - u16 rx_headroom; }; #ifdef CONFIG_MLX4_EN_DCB @@ -584,8 +572,10 @@ struct mlx4_en_priv { u32 rx_ring_num; u32 rx_skb_size; struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; - u16 num_frags; - u16 log_rx_info; + u8 num_frags; + u8 log_rx_info; + u8 dma_dir; + u16 rx_headroom; struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES]; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h index 48641cb0367f..926f3c3f3665 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h @@ -37,7 +37,7 @@ struct mlx4_en_port_stats { unsigned long queue_stopped; unsigned long wake_queue; unsigned long tx_timeout; - unsigned long rx_alloc_failed; + unsigned long rx_alloc_pages; unsigned long rx_chksum_good; unsigned long rx_chksum_none; unsigned long rx_chksum_complete; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index d8d5d161b8c7..4aa29ee93013 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2749,7 +2749,7 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave, int err; int index = vhcr->in_modifier; struct res_mtt *mtt; - struct res_mpt *mpt; + struct res_mpt *mpt = NULL; int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz; int phys; int id; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 117170014e88..a84b652f9b54 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -31,3 +31,10 @@ config MLX5_CORE_EN_DCB This flag is depended on the kernel's DCB support. If unsure, set to Y + +config MLX5_CORE_IPOIB + bool "Mellanox Technologies ConnectX-4 IPoIB offloads support" + depends on MLX5_CORE_EN + default y + ---help--- + MLX5 IPoIB offloads & acceleration support. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9f43beb86250..9e644615f07a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -11,3 +11,5 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o + +mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index a380353a78c2..5bdaf3d545b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -279,6 +279,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_XRC_SRQ: case MLX5_CMD_OP_DESTROY_DCT: case MLX5_CMD_OP_DEALLOC_Q_COUNTER: + case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: case MLX5_CMD_OP_DEALLOC_PD: case MLX5_CMD_OP_DEALLOC_UAR: case MLX5_CMD_OP_DETACH_FROM_MCG: @@ -305,8 +307,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: - case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT: + case MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -363,6 +364,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_Q_COUNTER: case MLX5_CMD_OP_SET_RATE_LIMIT: case MLX5_CMD_OP_QUERY_RATE_LIMIT: + case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: + case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: case MLX5_CMD_OP_ALLOC_PD: case MLX5_CMD_OP_ALLOC_UAR: case MLX5_CMD_OP_CONFIG_INT_MODERATION: @@ -414,10 +419,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: - case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT: - case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT: + case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -501,6 +503,12 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT); MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT); + MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); + MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); MLX5_COMMAND_STR_CASE(ALLOC_PD); MLX5_COMMAND_STR_CASE(DEALLOC_PD); MLX5_COMMAND_STR_CASE(ALLOC_UAR); @@ -576,12 +584,8 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); - MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT); - MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT); - MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT); + MLX5_COMMAND_STR_CASE(ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_COMMAND_STR_CASE(DEALLOC_MODIFY_HEADER_CONTEXT); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3d9490cd2db1..0099a3e397bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -37,6 +37,7 @@ #include <linux/timecounter.h> #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> +#include <linux/crash_dump.h> #include <linux/mlx5/driver.h> #include <linux/mlx5/qp.h> #include <linux/mlx5/cq.h> @@ -111,18 +112,13 @@ #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ -#define MLX5E_SQ_BF_BUDGET 16 #define MLX5E_ICOSQ_MAX_WQEBBS \ (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) #define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) -#define MLX5E_XDP_IHS_DS_COUNT \ - DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) #define MLX5E_XDP_TX_DS_COUNT \ ((sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) -#define MLX5E_XDP_TX_WQEBBS \ - DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) #define MLX5E_NUM_MAIN_GROUPS 9 @@ -158,6 +154,14 @@ static inline int mlx5_max_log_rq_size(int wq_type) } } +static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) +{ + return is_kdump_kernel() ? + MLX5E_MIN_NUM_CHANNELS : + min_t(int, mdev->priv.eq_table.num_comp_vectors, + MLX5E_MAX_NUM_CHANNELS); +} + struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; @@ -187,15 +191,15 @@ enum mlx5e_priv_flag { MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 1), }; -#define MLX5E_SET_PFLAG(priv, pflag, enable) \ +#define MLX5E_SET_PFLAG(params, pflag, enable) \ do { \ if (enable) \ - (priv)->params.pflags |= (pflag); \ + (params)->pflags |= (pflag); \ else \ - (priv)->params.pflags &= ~(pflag); \ + (params)->pflags &= ~(pflag); \ } while (0) -#define MLX5E_GET_PFLAG(priv, pflag) (!!((priv)->params.pflags & (pflag))) +#define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (pflag))) #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ @@ -218,7 +222,6 @@ struct mlx5e_params { bool rx_cqe_compress_def; struct mlx5e_cq_moder rx_cq_moderation; struct mlx5e_cq_moder tx_cq_moderation; - u16 min_rx_wqes; bool lro_en; u32 lro_wqe_sz; u16 tx_max_inline; @@ -227,9 +230,11 @@ struct mlx5e_params { u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; bool vlan_strip_disable; + bool scatter_fcs_en; bool rx_am_enabled; u32 lro_timeout; u32 pflags; + struct bpf_prog *xdp_prog; }; #ifdef CONFIG_MLX5_CORE_EN_DCB @@ -285,7 +290,6 @@ struct mlx5e_cq { struct napi_struct *napi; struct mlx5_core_cq mcq; struct mlx5e_channel *channel; - struct mlx5e_priv *priv; /* cqe decompression */ struct mlx5_cqe64 title; @@ -295,22 +299,163 @@ struct mlx5e_cq { u16 decmprs_wqe_counter; /* control */ + struct mlx5_core_dev *mdev; struct mlx5_frag_wq_ctrl wq_ctrl; } ____cacheline_aligned_in_smp; -struct mlx5e_rq; -typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe); -typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, - u16 ix); +struct mlx5e_tx_wqe_info { + struct sk_buff *skb; + u32 num_bytes; + u8 num_wqebbs; + u8 num_dma; +}; + +enum mlx5e_dma_map_type { + MLX5E_DMA_MAP_SINGLE, + MLX5E_DMA_MAP_PAGE +}; + +struct mlx5e_sq_dma { + dma_addr_t addr; + u32 size; + enum mlx5e_dma_map_type type; +}; + +enum { + MLX5E_SQ_STATE_ENABLED, +}; + +struct mlx5e_sq_wqe_info { + u8 opcode; + u8 num_wqebbs; +}; + +struct mlx5e_txqsq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + u32 dma_fifo_cc; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + struct mlx5e_sq_stats stats; + + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 dma_fifo_mask; + void __iomem *uar_map; + struct netdev_queue *txq; + u32 sqn; + u16 max_inline; + u8 min_inline_mode; + u16 edge; + struct device *pdev; + struct mlx5e_tstamp *tstamp; + __be32 mkey_be; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; + int txq_ix; + u32 rate_limit; +} ____cacheline_aligned_in_smp; + +struct mlx5e_xdpsq { + /* data path */ + + /* dirtied @rx completion */ + u16 cc; + u16 pc; + + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_dma_info *di; + bool doorbell; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + struct device *pdev; + __be32 mkey_be; + u8 min_inline_mode; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +struct mlx5e_icosq { + /* data path */ + + /* dirtied @completion */ + u16 cc; + + /* dirtied @xmit */ + u16 pc ____cacheline_aligned_in_smp; + u32 dma_fifo_pc; + u16 prev_cc; -typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq *rq, u16 ix); + struct mlx5e_cq cq; + + /* write@xmit, read@completion */ + struct { + struct mlx5e_sq_wqe_info *ico_wqe; + } db; + + /* read only */ + struct mlx5_wq_cyc wq; + void __iomem *uar_map; + u32 sqn; + u16 edge; + struct device *pdev; + __be32 mkey_be; + unsigned long state; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + struct mlx5e_channel *channel; +} ____cacheline_aligned_in_smp; + +static inline bool +mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n) +{ + return (((wq->sz_m1 & (cc - pc)) >= n) || (cc == pc)); +} struct mlx5e_dma_info { struct page *page; dma_addr_t addr; }; +struct mlx5e_umr_dma_info { + __be64 *mtt; + dma_addr_t mtt_addr; + struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; + struct mlx5e_umr_wqe wqe; +}; + +struct mlx5e_mpw_info { + struct mlx5e_umr_dma_info umr; + u16 consumed_strides; + u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; +}; + struct mlx5e_rx_am_stats { int ppms; /* packets per msec */ int epms; /* events per msec */ @@ -347,6 +492,11 @@ struct mlx5e_page_cache { struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; }; +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); +typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16); +typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; @@ -381,7 +531,10 @@ struct mlx5e_rq { u16 rx_headroom; struct mlx5e_rx_am am; /* Adaptive Moderation */ + + /* XDP */ struct bpf_prog *xdp_prog; + struct mlx5e_xdpsq xdpsq; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -390,118 +543,10 @@ struct mlx5e_rq { u32 mpwqe_num_strides; u32 rqn; struct mlx5e_channel *channel; - struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; } ____cacheline_aligned_in_smp; -struct mlx5e_umr_dma_info { - __be64 *mtt; - dma_addr_t mtt_addr; - struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; - struct mlx5e_umr_wqe wqe; -}; - -struct mlx5e_mpw_info { - struct mlx5e_umr_dma_info umr; - u16 consumed_strides; - u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; -}; - -struct mlx5e_tx_wqe_info { - u32 num_bytes; - u8 num_wqebbs; - u8 num_dma; -}; - -enum mlx5e_dma_map_type { - MLX5E_DMA_MAP_SINGLE, - MLX5E_DMA_MAP_PAGE -}; - -struct mlx5e_sq_dma { - dma_addr_t addr; - u32 size; - enum mlx5e_dma_map_type type; -}; - -enum { - MLX5E_SQ_STATE_ENABLED, - MLX5E_SQ_STATE_BF_ENABLE, -}; - -struct mlx5e_sq_wqe_info { - u8 opcode; - u8 num_wqebbs; -}; - -enum mlx5e_sq_type { - MLX5E_SQ_TXQ, - MLX5E_SQ_ICO, - MLX5E_SQ_XDP -}; - -struct mlx5e_sq { - /* data path */ - - /* dirtied @completion */ - u16 cc; - u32 dma_fifo_cc; - - /* dirtied @xmit */ - u16 pc ____cacheline_aligned_in_smp; - u32 dma_fifo_pc; - u16 bf_offset; - u16 prev_cc; - u8 bf_budget; - struct mlx5e_sq_stats stats; - - struct mlx5e_cq cq; - - /* pointers to per tx element info: write@xmit, read@completion */ - union { - struct { - struct sk_buff **skb; - struct mlx5e_sq_dma *dma_fifo; - struct mlx5e_tx_wqe_info *wqe_info; - } txq; - struct mlx5e_sq_wqe_info *ico_wqe; - struct { - struct mlx5e_sq_wqe_info *wqe_info; - struct mlx5e_dma_info *di; - bool doorbell; - } xdp; - } db; - - /* read only */ - struct mlx5_wq_cyc wq; - u32 dma_fifo_mask; - void __iomem *uar_map; - struct netdev_queue *txq; - u32 sqn; - u16 bf_buf_size; - u16 max_inline; - u8 min_inline_mode; - u16 edge; - struct device *pdev; - struct mlx5e_tstamp *tstamp; - __be32 mkey_be; - unsigned long state; - - /* control path */ - struct mlx5_wq_ctrl wq_ctrl; - struct mlx5_sq_bfreg bfreg; - struct mlx5e_channel *channel; - int tc; - u32 rate_limit; - u8 type; -} ____cacheline_aligned_in_smp; - -static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) -{ - return (((sq->wq.sz_m1 & (sq->cc - sq->pc)) >= n) || - (sq->cc == sq->pc)); -} - enum channel_flags { MLX5E_CHANNEL_NAPI_SCHED = 1, }; @@ -509,9 +554,8 @@ enum channel_flags { struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; - struct mlx5e_sq xdp_sq; - struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; - struct mlx5e_sq icosq; /* internal control operations */ + struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_icosq icosq; /* internal control operations */ bool xdp; struct napi_struct napi; struct device *pdev; @@ -522,10 +566,18 @@ struct mlx5e_channel { /* control */ struct mlx5e_priv *priv; + struct mlx5_core_dev *mdev; + struct mlx5e_tstamp *tstamp; int ix; int cpu; }; +struct mlx5e_channels { + struct mlx5e_channel **c; + unsigned int num; + struct mlx5e_params params; +}; + enum mlx5e_traffic_types { MLX5E_TT_IPV4_TCP, MLX5E_TT_IPV6_TCP, @@ -675,34 +727,17 @@ enum { MLX5E_NIC_PRIO }; -struct mlx5e_profile { - void (*init)(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, void *ppriv); - void (*cleanup)(struct mlx5e_priv *priv); - int (*init_rx)(struct mlx5e_priv *priv); - void (*cleanup_rx)(struct mlx5e_priv *priv); - int (*init_tx)(struct mlx5e_priv *priv); - void (*cleanup_tx)(struct mlx5e_priv *priv); - void (*enable)(struct mlx5e_priv *priv); - void (*disable)(struct mlx5e_priv *priv); - void (*update_stats)(struct mlx5e_priv *priv); - int (*max_nch)(struct mlx5_core_dev *mdev); - int max_tc; -}; - struct mlx5e_priv { /* priv data path fields - start */ - struct mlx5e_sq **txq_to_sq_map; - int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; - struct bpf_prog *xdp_prog; + struct mlx5e_txqsq *txq2sq[MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC]; + int channel_tc2txq[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; /* priv data path fields - end */ unsigned long state; struct mutex state_lock; /* Protects Interface state */ struct mlx5e_rq drop_rq; - struct mlx5e_channel **channel; + struct mlx5e_channels channels; u32 tisn[MLX5E_MAX_NUM_TC]; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; @@ -712,7 +747,6 @@ struct mlx5e_priv { struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; - struct mlx5e_params params; struct workqueue_struct *wq; struct work_struct update_carrier_work; struct work_struct set_rx_mode_work; @@ -732,9 +766,28 @@ struct mlx5e_priv { void *ppriv; }; +struct mlx5e_profile { + void (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, void *ppriv); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + struct { + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_handle_rx_cqe handle_rx_cqe_mpwqe; + } rx_handlers; + int max_tc; +}; + void mlx5e_build_ptys2ethtool_map(void); -void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); @@ -744,7 +797,9 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); -void mlx5e_free_sq_descs(struct mlx5e_sq *sq); +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq); +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq); +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq); void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, bool recycle); @@ -792,7 +847,7 @@ void mlx5e_pps_event_handler(struct mlx5e_priv *priv, struct ptp_clock_event *event); int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); -void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); @@ -801,14 +856,40 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); -int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); +struct mlx5e_redirect_rqt_param { + bool is_rss; + union { + u32 rqn; /* Direct RQN (Non-RSS) */ + struct { + u8 hfunc; + struct mlx5e_channels *channels; + } rss; /* RSS data */ + }; +}; -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix); -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, - enum mlx5e_traffic_types tt); +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, + struct mlx5e_redirect_rqt_param rrp); +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, + enum mlx5e_traffic_types tt, + void *tirc); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); + +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs); +void mlx5e_close_channels(struct mlx5e_channels *chs); + +/* Function pointer to be used to modify WH settings while + * switching channels + */ +typedef int (*mlx5e_fp_hw_modify)(struct mlx5e_priv *priv); +void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify); +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); + void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, u32 *indirection_rqt, int len, int num_channels); @@ -816,30 +897,43 @@ int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode); -void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type); +void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u8 rq_type); -static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, - struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) +static inline +struct mlx5e_tx_wqe *mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc) { - u16 ofst = sq->bf_offset; + u16 pi = *pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + + memset(cseg, 0, sizeof(*cseg)); + + cseg->opmod_idx_opcode = cpu_to_be32((*pc << 8) | MLX5_OPCODE_NOP); + cseg->qpn_ds = cpu_to_be32((sqn << 8) | 0x01); + (*pc)++; + + return wqe; +} + +static inline +void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, + void __iomem *uar_map, + struct mlx5_wqe_ctrl_seg *ctrl) +{ + ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; /* ensure wqe is visible to device before updating doorbell record */ dma_wmb(); - *sq->wq.db = cpu_to_be32(sq->pc); + *wq->db = cpu_to_be32(pc); /* ensure doorbell record is visible to device before ringing the * doorbell */ wmb(); - if (bf_sz) - __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz); - else - mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL); - /* flush the write-combining mapped buffer */ - wmb(); - sq->bf_offset ^= sq->bf_buf_size; + mlx5_write64((__be32 *)ctrl, uar_map, NULL); } static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) @@ -895,44 +989,43 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); -int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, - bool enable_uc_lb); - -struct mlx5_eswitch_rep; -int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); -void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep); -int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); -void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); -int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); -void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); -void mlx5e_update_hw_rep_counters(struct mlx5e_priv *priv); +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb); + +/* common netdev helpers */ +int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); + +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); -void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn); +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); + +int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, + u32 underlay_qpn, u32 *tisn); +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); + int mlx5e_create_tises(struct mlx5e_priv *priv); void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); -struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, - void *ppriv); -void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); -int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); -void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); -int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, - void *sp); -bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); +/* mlx5e generic netdev management API */ +struct net_device* +mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, + void *ppriv); +int mlx5e_attach_netdev(struct mlx5e_priv *priv); +void mlx5e_detach_netdev(struct mlx5e_priv *priv); +void mlx5e_destroy_netdev(struct mlx5e_priv *priv); +void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 max_channels); -bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); -bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 68419a01db36..c8a005326e30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -174,13 +174,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, enum arfs_type type) { struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; - struct mlx5_flow_destination dest; struct mlx5e_tir *tir = priv->indir_tir; + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; int err = 0; @@ -325,10 +321,16 @@ static int arfs_create_table(struct mlx5e_priv *priv, { struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; + struct mlx5_flow_table_attr ft_attr = {}; int err; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL, 0); + ft->num_groups = 0; + + ft_attr.max_fte = MLX5E_ARFS_TABLE_SIZE; + ft_attr.level = MLX5E_ARFS_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -469,15 +471,11 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, struct arfs_rule *arfs_rule) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; struct arfs_tuple *tuple = &arfs_rule->tuple; struct mlx5_flow_handle *rule = NULL; struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); struct arfs_table *arfs_table; struct mlx5_flow_spec *spec; struct mlx5_flow_table *ft; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 37e66eef6fb5..e706a87fc8b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -90,6 +90,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) { struct mlx5e_priv *priv = netdev_priv(dev); struct hwtstamp_config config; + int err; if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz)) return -EOPNOTSUPP; @@ -111,7 +112,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: /* Reset CQE compression to Admin default */ - mlx5e_modify_rx_cqe_compression_locked(priv, priv->params.rx_cqe_compress_def); + mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def); break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -129,7 +130,12 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: /* Disable CQE compression */ netdev_warn(dev, "Disabling cqe compression"); - mlx5e_modify_rx_cqe_compression_locked(priv, false); + err = mlx5e_modify_rx_cqe_compression_locked(priv, false); + if (err) { + netdev_err(dev, "Failed disabling cqe compression err=%d\n", err); + mutex_unlock(&priv->state_lock); + return err; + } config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index bd898d8deda0..f1f17f7a3cd0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -107,10 +107,18 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) goto err_dealloc_transport_domain; } + err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false); + if (err) { + mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err); + goto err_destroy_mkey; + } + INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); return 0; +err_destroy_mkey: + mlx5_core_destroy_mkey(mdev, &res->mkey); err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); err_dealloc_pd: @@ -122,23 +130,26 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) { struct mlx5e_resources *res = &mdev->mlx5e_res; + mlx5_free_bfreg(mdev, &res->bfreg); mlx5_core_destroy_mkey(mdev, &res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); } -int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, - bool enable_uc_lb) +int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb) { + struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_tir *tir; - void *in; + int err = -ENOMEM; + u32 tirn = 0; int inlen; - int err = 0; + void *in; + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); if (!in) - return -ENOMEM; + goto out; if (enable_uc_lb) MLX5_SET(modify_tir_in, in, ctx.self_lb_block, @@ -147,13 +158,16 @@ int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { - err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen); + tirn = tir->tirn; + err = mlx5_core_modify_tir(mdev, tirn, in, inlen); if (err) goto out; } out: kvfree(in); + if (err) + netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index a004a5a1a4c2..ce7b09d72ff6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -42,8 +42,9 @@ static void mlx5e_get_drvinfo(struct net_device *dev, strlcpy(drvinfo->version, DRIVER_VERSION " (" DRIVER_RELDATE ")", sizeof(drvinfo->version)); snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), - "%d.%d.%d", - fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev)); + "%d.%d.%04d (%.16s)", + fw_rev_maj(mdev), fw_rev_min(mdev), fw_rev_sub(mdev), + mdev->board_id); strlcpy(drvinfo->bus_info, pci_name(mdev->pdev), sizeof(drvinfo->bus_info)); } @@ -152,12 +153,9 @@ static bool mlx5e_query_global_pause_combined(struct mlx5e_priv *priv) } #define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) -#define MLX5E_NUM_RQ_STATS(priv) \ - (NUM_RQ_STATS * priv->params.num_channels * \ - test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_RQ_STATS(priv) (NUM_RQ_STATS * (priv)->channels.num) #define MLX5E_NUM_SQ_STATS(priv) \ - (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ - test_bit(MLX5E_STATE_OPENED, &priv->state)) + (NUM_SQ_STATS * (priv)->channels.num * (priv)->channels.params.num_tc) #define MLX5E_NUM_PFC_COUNTERS(priv) \ ((mlx5e_query_global_pause_combined(priv) + hweight8(mlx5e_query_pfc_combined(priv))) * \ NUM_PPORT_PER_PRIO_PFC_COUNTERS) @@ -262,17 +260,17 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) return; /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) + for (i = 0; i < priv->channels.num; i++) for (j = 0; j < NUM_RQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, rq_stats_desc[j].format, i); - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < priv->channels.num; i++) for (j = 0; j < NUM_SQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, sq_stats_desc[j].format, - priv->channeltc_to_txq_map[i][tc]); + priv->channel_tc2txq[i][tc]); } static void mlx5e_get_strings(struct net_device *dev, @@ -303,6 +301,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *channels; struct mlx5_priv *mlx5_priv; int i, j, tc, prio, idx = 0; unsigned long pfc_combined; @@ -313,6 +312,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_update_stats(priv); + channels = &priv->channels; mutex_unlock(&priv->state_lock); for (i = 0; i < NUM_SW_COUNTERS; i++) @@ -382,16 +382,16 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, return; /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) + for (i = 0; i < channels->num; i++) for (j = 0; j < NUM_RQ_STATS; j++) data[idx++] = - MLX5E_READ_CTR64_CPU(&priv->channel[i]->rq.stats, + MLX5E_READ_CTR64_CPU(&channels->c[i]->rq.stats, rq_stats_desc, j); - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) + for (tc = 0; tc < priv->channels.params.num_tc; tc++) + for (i = 0; i < channels->num; i++) for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel[i]->sq[tc].stats, + data[idx++] = MLX5E_READ_CTR64_CPU(&channels->c[i]->sq[tc].stats, sq_stats_desc, j); } @@ -406,8 +406,8 @@ static u32 mlx5e_rx_wqes_to_packets(struct mlx5e_priv *priv, int rq_wq_type, if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return num_wqe; - stride_size = 1 << priv->params.mpwqe_log_stride_sz; - num_strides = 1 << priv->params.mpwqe_log_num_strides; + stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; + num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; wqe_size = stride_size * num_strides; packets_per_wqe = wqe_size / @@ -427,8 +427,8 @@ static u32 mlx5e_packets_to_rx_wqes(struct mlx5e_priv *priv, int rq_wq_type, if (rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) return num_packets; - stride_size = 1 << priv->params.mpwqe_log_stride_sz; - num_strides = 1 << priv->params.mpwqe_log_num_strides; + stride_size = 1 << priv->channels.params.mpwqe_log_stride_sz; + num_strides = 1 << priv->channels.params.mpwqe_log_num_strides; wqe_size = stride_size * num_strides; num_packets = (1 << order_base_2(num_packets)); @@ -443,26 +443,25 @@ static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); - int rq_wq_type = priv->params.rq_wq_type; + int rq_wq_type = priv->channels.params.rq_wq_type; param->rx_max_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, 1 << mlx5_max_log_rq_size(rq_wq_type)); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = mlx5e_rx_wqes_to_packets(priv, rq_wq_type, - 1 << priv->params.log_rq_size); - param->tx_pending = 1 << priv->params.log_sq_size; + 1 << priv->channels.params.log_rq_size); + param->tx_pending = 1 << priv->channels.params.log_sq_size; } static int mlx5e_set_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); - bool was_opened; - int rq_wq_type = priv->params.rq_wq_type; + int rq_wq_type = priv->channels.params.rq_wq_type; + struct mlx5e_channels new_channels = {}; u32 rx_pending_wqes; u32 min_rq_size; u32 max_rq_size; - u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; u32 num_mtts; @@ -500,7 +499,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, } num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes); - if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + if (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && !MLX5E_VALID_NUM_MTTS(num_mtts)) { netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", __func__, param->rx_pending); @@ -522,26 +521,29 @@ static int mlx5e_set_ringparam(struct net_device *dev, log_rq_size = order_base_2(rx_pending_wqes); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, rx_pending_wqes); - if (log_rq_size == priv->params.log_rq_size && - log_sq_size == priv->params.log_sq_size && - min_rx_wqes == priv->params.min_rx_wqes) + if (log_rq_size == priv->channels.params.log_rq_size && + log_sq_size == priv->channels.params.log_sq_size) return 0; mutex_lock(&priv->state_lock); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(dev); + new_channels.params = priv->channels.params; + new_channels.params.log_rq_size = log_rq_size; + new_channels.params.log_sq_size = log_sq_size; - priv->params.log_rq_size = log_rq_size; - priv->params.log_sq_size = log_sq_size; - priv->params.min_rx_wqes = min_rx_wqes; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto unlock; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto unlock; - if (was_opened) - err = mlx5e_open_locked(dev); + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +unlock: mutex_unlock(&priv->state_lock); return err; @@ -553,7 +555,7 @@ static void mlx5e_get_channels(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); ch->max_combined = priv->profile->max_nch(priv->mdev); - ch->combined_count = priv->params.num_channels; + ch->combined_count = priv->channels.params.num_channels; } static int mlx5e_set_channels(struct net_device *dev, @@ -561,8 +563,8 @@ static int mlx5e_set_channels(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); unsigned int count = ch->combined_count; + struct mlx5e_channels new_channels = {}; bool arfs_enabled; - bool was_opened; int err = 0; if (!count) { @@ -571,27 +573,32 @@ static int mlx5e_set_channels(struct net_device *dev, return -EINVAL; } - if (priv->params.num_channels == count) + if (priv->channels.params.num_channels == count) return 0; mutex_lock(&priv->state_lock); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(dev); + new_channels.params = priv->channels.params; + new_channels.params.num_channels = count; + mlx5e_build_default_indir_rqt(priv->mdev, new_channels.params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + + /* Create fresh channels with new parameters */ + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; arfs_enabled = dev->features & NETIF_F_NTUPLE; if (arfs_enabled) mlx5e_arfs_disable(priv); - priv->params.num_channels = count; - mlx5e_build_default_indir_rqt(priv->mdev, priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, count); - - if (was_opened) - err = mlx5e_open_locked(dev); - if (err) - goto out; + /* Switch to new channels, set new parameters and close old ones */ + mlx5e_switch_priv_channels(priv, &new_channels, NULL); if (arfs_enabled) { err = mlx5e_arfs_enable(priv); @@ -614,49 +621,24 @@ static int mlx5e_get_coalesce(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -EOPNOTSUPP; - coal->rx_coalesce_usecs = priv->params.rx_cq_moderation.usec; - coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts; - coal->tx_coalesce_usecs = priv->params.tx_cq_moderation.usec; - coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts; - coal->use_adaptive_rx_coalesce = priv->params.rx_am_enabled; + coal->rx_coalesce_usecs = priv->channels.params.rx_cq_moderation.usec; + coal->rx_max_coalesced_frames = priv->channels.params.rx_cq_moderation.pkts; + coal->tx_coalesce_usecs = priv->channels.params.tx_cq_moderation.usec; + coal->tx_max_coalesced_frames = priv->channels.params.tx_cq_moderation.pkts; + coal->use_adaptive_rx_coalesce = priv->channels.params.rx_am_enabled; return 0; } -static int mlx5e_set_coalesce(struct net_device *netdev, - struct ethtool_coalesce *coal) +static void +mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { - struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_channel *c; - bool restart = - !!coal->use_adaptive_rx_coalesce != priv->params.rx_am_enabled; - bool was_opened; - int err = 0; int tc; int i; - if (!MLX5_CAP_GEN(mdev, cq_moderation)) - return -EOPNOTSUPP; - - mutex_lock(&priv->state_lock); - - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened && restart) { - mlx5e_close_locked(netdev); - priv->params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce; - } - - priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; - priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; - priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; - priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; - - if (!was_opened || restart) - goto out; - - for (i = 0; i < priv->params.num_channels; ++i) { - c = priv->channel[i]; + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; for (tc = 0; tc < c->num_tc; tc++) { mlx5_core_modify_cq_moderation(mdev, @@ -669,11 +651,50 @@ static int mlx5e_set_coalesce(struct net_device *netdev, coal->rx_coalesce_usecs, coal->rx_max_coalesced_frames); } +} -out: - if (was_opened && restart) - err = mlx5e_open_locked(netdev); +static int mlx5e_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *coal) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; + int err = 0; + bool reset; + + if (!MLX5_CAP_GEN(mdev, cq_moderation)) + return -EOPNOTSUPP; + + mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + + new_channels.params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; + new_channels.params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; + new_channels.params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; + new_channels.params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; + new_channels.params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce; + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + /* we are opened */ + + reset = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_am_enabled; + if (!reset) { + mlx5e_set_priv_channels_coalesce(priv, coal); + priv->channels.params = new_channels.params; + goto out; + } + + /* open fresh channels with new coal parameters */ + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +out: mutex_unlock(&priv->state_lock); return err; } @@ -968,7 +989,7 @@ static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - return sizeof(priv->params.toeplitz_hash_key); + return sizeof(priv->channels.params.toeplitz_hash_key); } static u32 mlx5e_get_rxfh_indir_size(struct net_device *netdev) @@ -982,15 +1003,15 @@ static int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, struct mlx5e_priv *priv = netdev_priv(netdev); if (indir) - memcpy(indir, priv->params.indirection_rqt, - sizeof(priv->params.indirection_rqt)); + memcpy(indir, priv->channels.params.indirection_rqt, + sizeof(priv->channels.params.indirection_rqt)); if (key) - memcpy(key, priv->params.toeplitz_hash_key, - sizeof(priv->params.toeplitz_hash_key)); + memcpy(key, priv->channels.params.toeplitz_hash_key, + sizeof(priv->channels.params.toeplitz_hash_key)); if (hfunc) - *hfunc = priv->params.rss_hfunc; + *hfunc = priv->channels.params.rss_hfunc; return 0; } @@ -1006,7 +1027,7 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(tirc, 0, ctxlen); - mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); } } @@ -1030,25 +1051,37 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); - if (indir) { - u32 rqtn = priv->indir_rqt.rqtn; - - memcpy(priv->params.indirection_rqt, indir, - sizeof(priv->params.indirection_rqt)); - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); - } - if (hfunc != ETH_RSS_HASH_NO_CHANGE && - hfunc != priv->params.rss_hfunc) { - priv->params.rss_hfunc = hfunc; + hfunc != priv->channels.params.rss_hfunc) { + priv->channels.params.rss_hfunc = hfunc; hash_changed = true; } + if (indir) { + memcpy(priv->channels.params.indirection_rqt, indir, + sizeof(priv->channels.params.indirection_rqt)); + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + u32 rqtn = priv->indir_rqt.rqtn; + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .hfunc = priv->channels.params.rss_hfunc, + .channels = &priv->channels, + }, + }, + }; + + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); + } + } + if (key) { - memcpy(priv->params.toeplitz_hash_key, key, - sizeof(priv->params.toeplitz_hash_key)); + memcpy(priv->channels.params.toeplitz_hash_key, key, + sizeof(priv->channels.params.toeplitz_hash_key)); hash_changed = hash_changed || - priv->params.rss_hfunc == ETH_RSS_HASH_TOP; + priv->channels.params.rss_hfunc == ETH_RSS_HASH_TOP; } if (hash_changed) @@ -1069,7 +1102,7 @@ static int mlx5e_get_rxnfc(struct net_device *netdev, switch (info->cmd) { case ETHTOOL_GRXRINGS: - info->data = priv->params.num_channels; + info->data = priv->channels.params.num_channels; break; case ETHTOOL_GRXCLSRLCNT: info->rule_cnt = priv->fs.ethtool.tot_num_rules; @@ -1097,7 +1130,7 @@ static int mlx5e_get_tunable(struct net_device *dev, switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: - *(u32 *)data = priv->params.tx_max_inline; + *(u32 *)data = priv->channels.params.tx_max_inline; break; default: err = -EINVAL; @@ -1113,9 +1146,11 @@ static int mlx5e_set_tunable(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - bool was_opened; - u32 val; + struct mlx5e_channels new_channels = {}; int err = 0; + u32 val; + + mutex_lock(&priv->state_lock); switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: @@ -1125,24 +1160,26 @@ static int mlx5e_set_tunable(struct net_device *dev, break; } - mutex_lock(&priv->state_lock); + new_channels.params = priv->channels.params; + new_channels.params.tx_max_inline = val; - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(dev); - - priv->params.tx_max_inline = val; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + break; + } - if (was_opened) - err = mlx5e_open_locked(dev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + break; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); - mutex_unlock(&priv->state_lock); break; default: err = -EINVAL; break; } + mutex_unlock(&priv->state_lock); return err; } @@ -1442,15 +1479,15 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_channels new_channels = {}; bool rx_mode_changed; u8 rx_cq_period_mode; int err = 0; - bool reset; rx_cq_period_mode = enable ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - rx_mode_changed = rx_cq_period_mode != priv->params.rx_cq_period_mode; + rx_mode_changed = rx_cq_period_mode != priv->channels.params.rx_cq_period_mode; if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) @@ -1459,16 +1496,51 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) if (!rx_mode_changed) return 0; - reset = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (reset) - mlx5e_close_locked(netdev); + new_channels.params = priv->channels.params; + mlx5e_set_rx_cq_mode_params(&new_channels.params, rx_cq_period_mode); - mlx5e_set_rx_cq_mode_params(&priv->params, rx_cq_period_mode); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } - if (reset) - err = mlx5e_open_locked(netdev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; - return err; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; +} + +int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val) +{ + bool curr_val = MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS); + struct mlx5e_channels new_channels = {}; + int err = 0; + + if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) + return new_val ? -EOPNOTSUPP : 0; + + if (curr_val == new_val) + return 0; + + new_channels.params = priv->channels.params; + MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val); + + mlx5e_set_rq_type_params(priv->mdev, &new_channels.params, + new_channels.params.rq_wq_type); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + return 0; + } + + err = mlx5e_open_channels(priv, &new_channels); + if (err) + return err; + + mlx5e_switch_priv_channels(priv, &new_channels, NULL); + return 0; } static int set_pflag_rx_cqe_compress(struct net_device *netdev, @@ -1486,8 +1558,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, } mlx5e_modify_rx_cqe_compression_locked(priv, enable); - priv->params.rx_cqe_compress_def = enable; - mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); + priv->channels.params.rx_cqe_compress_def = enable; return 0; } @@ -1499,7 +1570,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); bool enable = !!(wanted_flags & flag); - u32 changes = wanted_flags ^ priv->params.pflags; + u32 changes = wanted_flags ^ priv->channels.params.pflags; int err; if (!(changes & flag)) @@ -1512,7 +1583,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev, return err; } - MLX5E_SET_PFLAG(priv, flag, enable); + MLX5E_SET_PFLAG(&priv->channels.params, flag, enable); return 0; } @@ -1541,7 +1612,7 @@ static u32 mlx5e_get_priv_flags(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - return priv->params.pflags; + return priv->channels.params.pflags; } static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index f2762e45c8ae..576d6787b484 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -159,14 +159,10 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, struct mlx5_flow_spec *spec) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; struct mlx5_flow_handle **rule_p; + MLX5_DECLARE_FLOW_ACT(flow_act); int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; @@ -659,11 +655,7 @@ mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, u16 etype, u8 proto) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; + MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; @@ -800,7 +792,7 @@ err: return err; } -static void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; @@ -808,14 +800,19 @@ static void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) mlx5e_destroy_flow_table(&ttc->ft); } -static int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5e_flow_table *ft = &ttc->ft; int err; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL, 0); + ft_attr.max_fte = MLX5E_TTC_TABLE_SIZE; + ft_attr.level = MLX5E_TTC_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.underlay_qpn = underlay_qpn; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -848,13 +845,9 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, struct mlx5e_l2_rule *ai, int type) { - struct mlx5_flow_act flow_act = { - .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - .flow_tag = MLX5_FS_DEFAULT_FLOW_TAG, - .encap_id = 0, - }; struct mlx5_flow_table *ft = priv->fs.l2.ft.t; struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_spec *spec; int err = 0; u8 *mc_dmac; @@ -985,12 +978,16 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv) { struct mlx5e_l2_table *l2_table = &priv->fs.l2; struct mlx5e_flow_table *ft = &l2_table->ft; + struct mlx5_flow_table_attr ft_attr = {}; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL, 0); + ft_attr.max_fte = MLX5E_L2_TABLE_SIZE; + ft_attr.level = MLX5E_L2_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; @@ -1088,11 +1085,16 @@ static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) { struct mlx5e_flow_table *ft = &priv->fs.vlan.ft; + struct mlx5_flow_table_attr ft_attr = {}; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, - MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL, 0); + + ft_attr.max_fte = MLX5E_VLAN_TABLE_SIZE; + ft_attr.level = MLX5E_VLAN_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + + ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1145,7 +1147,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_ttc_table(priv); + err = mlx5e_create_ttc_table(priv, 0); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 26fc77e80f7b..85bf4a389295 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -390,7 +390,7 @@ static int validate_flow(struct mlx5e_priv *priv, if (fs->location >= MAX_NUM_OF_ETHTOOL_RULES) return -EINVAL; - if (fs->ring_cookie >= priv->params.num_channels && + if (fs->ring_cookie >= priv->channels.params.num_channels && fs->ring_cookie != RX_CLS_FLOW_DISC) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 15cc7b469d2e..a61b71b6fff3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -31,28 +31,24 @@ */ #include <net/tc_act/tc_gact.h> -#include <linux/crash_dump.h> #include <net/pkt_cls.h> #include <linux/mlx5/fs.h> #include <net/vxlan.h> #include <linux/bpf.h> +#include "eswitch.h" #include "en.h" #include "en_tc.h" -#include "eswitch.h" +#include "en_rep.h" #include "vxlan.h" struct mlx5e_rq_param { u32 rqc[MLX5_ST_SZ_DW(rqc)]; struct mlx5_wq_param wq; - bool am_enabled; }; struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; - u16 max_inline; - u8 min_inline_mode; - enum mlx5e_sq_type type; }; struct mlx5e_cq_param { @@ -79,49 +75,47 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, reg_umr_sq); } -void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, u8 rq_type) { - priv->params.rq_wq_type = rq_type; - priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; - switch (priv->params.rq_wq_type) { + params->rq_wq_type = rq_type; + params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = is_kdump_kernel() ? + params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - priv->params.mpwqe_log_stride_sz = - MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ? - MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(priv->mdev) : - MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(priv->mdev); - priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - priv->params.mpwqe_log_stride_sz; + params->mpwqe_log_stride_sz = + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ? + MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : + MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev); + params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + params->mpwqe_log_stride_sz; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = is_kdump_kernel() ? + params->log_rq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; /* Extra room needed for build_skb */ - priv->params.lro_wqe_sz -= MLX5_RX_HEADROOM + + params->lro_wqe_sz -= MLX5_RX_HEADROOM + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); - mlx5_core_info(priv->mdev, - "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(priv->params.log_rq_size), - BIT(priv->params.mpwqe_log_stride_sz), - MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS)); + mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(params->log_rq_size), + BIT(params->mpwqe_log_stride_sz), + MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)); } -static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) +static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params) { - u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) && - !priv->xdp_prog ? + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) && + !params->xdp_prog ? MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; - mlx5e_set_rq_type_params(priv, rq_type); + mlx5e_set_rq_type_params(mdev, params, rq_type); } static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -181,8 +175,10 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) int i, j; memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->params.num_channels; i++) { - rq_stats = &priv->channel[i]->rq.stats; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + rq_stats = &c->rq.stats; s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; @@ -204,8 +200,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_cache_empty += rq_stats->cache_empty; s->rx_cache_busy += rq_stats->cache_busy; - for (j = 0; j < priv->params.num_tc; j++) { - sq_stats = &priv->channel[i]->sq[j].stats; + for (j = 0; j < priv->channels.params.num_tc; j++) { + sq_stats = &c->sq[j].stats; s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; @@ -402,8 +398,10 @@ static inline int mlx5e_get_wqe_mtt_sz(void) MLX5_UMR_MTT_ALIGNMENT); } -static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, - struct mlx5e_umr_wqe *wqe, u16 ix) +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_icosq *sq, + struct mlx5e_umr_wqe *wqe, + u16 ix) { struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; @@ -493,11 +491,10 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) kfree(rq->mpwqe.info); } -static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv, +static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, u64 npages, u8 page_shift, struct mlx5_core_mkey *umr_mkey) { - struct mlx5_core_dev *mdev = priv->mdev; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; u32 *in; @@ -531,21 +528,20 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv, return err; } -static int mlx5e_create_rq_umr_mkey(struct mlx5e_rq *rq) +static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) { - struct mlx5e_priv *priv = rq->priv; - u64 num_mtts = MLX5E_REQUIRED_MTTS(BIT(priv->params.log_rq_size)); + u64 num_mtts = MLX5E_REQUIRED_MTTS(mlx5_wq_ll_get_size(&rq->wq)); - return mlx5e_create_umr_mkey(priv, num_mtts, PAGE_SHIFT, &rq->umr_mkey); + return mlx5e_create_umr_mkey(mdev, num_mtts, PAGE_SHIFT, &rq->umr_mkey); } -static int mlx5e_create_rq(struct mlx5e_channel *c, - struct mlx5e_rq_param *param, - struct mlx5e_rq *rq) +static int mlx5e_alloc_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_rq_param *rqp, + struct mlx5e_rq *rq) { - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - void *rqc = param->rqc; + struct mlx5_core_dev *mdev = c->mdev; + void *rqc = rqp->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; u32 frag_sz; @@ -554,9 +550,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, int err; int i; - param->wq.db_numa_node = cpu_to_node(c->cpu); + rqp->wq.db_numa_node = cpu_to_node(c->cpu); - err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, + err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); if (err) return err; @@ -565,15 +561,15 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->wq); - rq->wq_type = priv->params.rq_wq_type; + rq->wq_type = params->rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; - rq->tstamp = &priv->tstamp; + rq->tstamp = c->tstamp; rq->channel = c; rq->ix = c->ix; - rq->priv = c->priv; + rq->mdev = mdev; - rq->xdp_prog = priv->xdp_prog ? bpf_prog_inc(priv->xdp_prog) : NULL; + rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; if (IS_ERR(rq->xdp_prog)) { err = PTR_ERR(rq->xdp_prog); rq->xdp_prog = NULL; @@ -588,24 +584,26 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->rx_headroom = MLX5_RX_HEADROOM; } - switch (priv->params.rq_wq_type) { + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - if (mlx5e_is_vf_vport_rep(priv)) { - err = -EINVAL; - goto err_rq_wq_destroy; - } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); - rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe; + if (!rq->handle_rx_cqe) { + err = -EINVAL; + netdev_err(c->netdev, "RX handler of MPWQE RQ is not set, err %d\n", err); + goto err_rq_wq_destroy; + } + + rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz); + rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides); rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; byte_count = rq->buff.wqe_sz; - err = mlx5e_create_rq_umr_mkey(rq); + err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) goto err_rq_wq_destroy; rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); @@ -621,18 +619,20 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, err = -ENOMEM; goto err_rq_wq_destroy; } - - if (mlx5e_is_vf_vport_rep(priv)) - rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep; - else - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; - rq->alloc_wqe = mlx5e_alloc_rx_wqe; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - rq->buff.wqe_sz = (priv->params.lro_en) ? - priv->params.lro_wqe_sz : - MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe; + if (!rq->handle_rx_cqe) { + kfree(rq->dma_info); + err = -EINVAL; + netdev_err(c->netdev, "RX handler of RQ is not set, err %d\n", err); + goto err_rq_wq_destroy; + } + + rq->buff.wqe_sz = params->lro_en ? + params->lro_wqe_sz : + MLX5E_SW2HW_MTU(c->netdev->mtu); byte_count = rq->buff.wqe_sz; /* calc the required page order */ @@ -656,8 +656,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, } INIT_WORK(&rq->am.work, mlx5e_rx_am_work); - rq->am.mode = priv->params.rx_cq_period_mode; - + rq->am.mode = params->rx_cq_period_mode; rq->page_cache.head = 0; rq->page_cache.tail = 0; @@ -674,7 +673,7 @@ err_rq_wq_destroy: return err; } -static void mlx5e_destroy_rq(struct mlx5e_rq *rq) +static void mlx5e_free_rq(struct mlx5e_rq *rq) { int i; @@ -684,7 +683,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); - mlx5_core_destroy_mkey(rq->priv->mdev, &rq->umr_mkey); + mlx5_core_destroy_mkey(rq->mdev, &rq->umr_mkey); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ kfree(rq->dma_info); @@ -699,10 +698,10 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) mlx5_wq_destroy(&rq->wq_ctrl); } -static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) +static int mlx5e_create_rq(struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) { - struct mlx5e_priv *priv = rq->priv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *mdev = rq->mdev; void *in; void *rqc; @@ -723,7 +722,6 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); - MLX5_SET(rqc, rqc, vsd, priv->params.vlan_strip_disable); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); @@ -742,8 +740,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5e_channel *c = rq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *mdev = c->mdev; void *in; void *rqc; @@ -767,7 +764,7 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, return err; } -static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; @@ -787,6 +784,35 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS); + MLX5_SET(rqc, rqc, scatter_fcs, enable); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5_core_dev *mdev = c->mdev; + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); MLX5_SET(rqc, rqc, vsd, vsd); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); @@ -798,25 +824,28 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) return err; } -static void mlx5e_disable_rq(struct mlx5e_rq *rq) +static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { - mlx5_core_destroy_rq(rq->priv->mdev, rq->rqn); + mlx5_core_destroy_rq(rq->mdev, rq->rqn); } static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) { unsigned long exp_time = jiffies + msecs_to_jiffies(20000); struct mlx5e_channel *c = rq->channel; - struct mlx5e_priv *priv = c->priv; + struct mlx5_wq_ll *wq = &rq->wq; + u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5_wq_ll_get_size(wq)); while (time_before(jiffies, exp_time)) { - if (wq->cur_sz >= priv->params.min_rx_wqes) + if (wq->cur_sz >= min_wqes) return 0; msleep(20); } + netdev_warn(c->netdev, "Failed to get min RX wqes on RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n", + rq->rqn, wq->cur_sz, min_wqes); return -ETIMEDOUT; } @@ -842,83 +871,128 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) } static int mlx5e_open_rq(struct mlx5e_channel *c, + struct mlx5e_params *params, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { - struct mlx5e_sq *sq = &c->icosq; - u16 pi = sq->pc & sq->wq.sz_m1; int err; - err = mlx5e_create_rq(c, param, rq); + err = mlx5e_alloc_rq(c, params, param, rq); if (err) return err; - err = mlx5e_enable_rq(rq, param); + err = mlx5e_create_rq(rq, param); if (err) - goto err_destroy_rq; + goto err_free_rq; - set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) - goto err_disable_rq; + goto err_destroy_rq; - if (param->am_enabled) + if (params->rx_am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; - mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ - return 0; -err_disable_rq: - clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - mlx5e_disable_rq(rq); err_destroy_rq: mlx5e_destroy_rq(rq); +err_free_rq: + mlx5e_free_rq(rq); return err; } -static void mlx5e_close_rq(struct mlx5e_rq *rq) +static void mlx5e_activate_rq(struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *sq = &rq->channel->icosq; + u16 pi = sq->pc & sq->wq.sz_m1; + struct mlx5e_tx_wqe *nopwqe; + + set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; + nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nopwqe->ctrl); +} + +static void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - cancel_work_sync(&rq->am.work); +} - mlx5e_disable_rq(rq); - mlx5e_free_rx_descs(rq); +static void mlx5e_close_rq(struct mlx5e_rq *rq) +{ + cancel_work_sync(&rq->am.work); mlx5e_destroy_rq(rq); + mlx5e_free_rx_descs(rq); + mlx5e_free_rq(rq); } -static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) +static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq) { - kfree(sq->db.xdp.di); - kfree(sq->db.xdp.wqe_info); + kfree(sq->db.di); } -static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, + sq->db.di = kzalloc_node(sizeof(*sq->db.di) * wq_sz, GFP_KERNEL, numa); - sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, - GFP_KERNEL, numa); - if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) { - mlx5e_free_sq_xdp_db(sq); + if (!sq->db.di) { + mlx5e_free_xdpsq_db(sq); return -ENOMEM; } return 0; } -static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) +static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq) +{ + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + int err; + + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; + + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); + if (err) + return err; + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; + + err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; +} + +static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq) +{ + mlx5e_free_xdpsq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq) { kfree(sq->db.ico_wqe); } -static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa) { u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); @@ -930,155 +1004,128 @@ static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) return 0; } -static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq) +static int mlx5e_alloc_icosq(struct mlx5e_channel *c, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq) { - kfree(sq->db.txq.wqe_info); - kfree(sq->db.txq.dma_fifo); - kfree(sq->db.txq.skb); -} + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; + int err; -static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa) -{ - int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + sq->pdev = c->pdev; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.bfreg.map; - sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb), - GFP_KERNEL, numa); - sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo), - GFP_KERNEL, numa); - sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info), - GFP_KERNEL, numa); - if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) { - mlx5e_free_sq_txq_db(sq); - return -ENOMEM; - } + param->wq.db_numa_node = cpu_to_node(c->cpu); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); + if (err) + return err; + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - sq->dma_fifo_mask = df_sz - 1; + err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu)); + if (err) + goto err_sq_wq_destroy; + + sq->edge = (sq->wq.sz_m1 + 1) - MLX5E_ICOSQ_MAX_WQEBBS; return 0; + +err_sq_wq_destroy: + mlx5_wq_destroy(&sq->wq_ctrl); + + return err; } -static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +static void mlx5e_free_icosq(struct mlx5e_icosq *sq) { - switch (sq->type) { - case MLX5E_SQ_TXQ: - mlx5e_free_sq_txq_db(sq); - break; - case MLX5E_SQ_ICO: - mlx5e_free_sq_ico_db(sq); - break; - case MLX5E_SQ_XDP: - mlx5e_free_sq_xdp_db(sq); - break; - } + mlx5e_free_icosq_db(sq); + mlx5_wq_destroy(&sq->wq_ctrl); } -static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) { - switch (sq->type) { - case MLX5E_SQ_TXQ: - return mlx5e_alloc_sq_txq_db(sq, numa); - case MLX5E_SQ_ICO: - return mlx5e_alloc_sq_ico_db(sq, numa); - case MLX5E_SQ_XDP: - return mlx5e_alloc_sq_xdp_db(sq, numa); - } - - return 0; + kfree(sq->db.wqe_info); + kfree(sq->db.dma_fifo); } -static int mlx5e_sq_get_max_wqebbs(u8 sq_type) +static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa) { - switch (sq_type) { - case MLX5E_SQ_ICO: - return MLX5E_ICOSQ_MAX_WQEBBS; - case MLX5E_SQ_XDP: - return MLX5E_XDP_TX_WQEBBS; + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; + + sq->db.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.dma_fifo), + GFP_KERNEL, numa); + sq->db.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.wqe_info), + GFP_KERNEL, numa); + if (!sq->db.dma_fifo || !sq->db.wqe_info) { + mlx5e_free_txqsq_db(sq); + return -ENOMEM; } - return MLX5_SEND_WQE_MAX_WQEBBS; + + sq->dma_fifo_mask = df_sz - 1; + + return 0; } -static int mlx5e_create_sq(struct mlx5e_channel *c, - int tc, - struct mlx5e_sq_param *param, - struct mlx5e_sq *sq) +static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq) { - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - - void *sqc = param->sqc; - void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); + void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); + struct mlx5_core_dev *mdev = c->mdev; int err; - sq->type = param->type; sq->pdev = c->pdev; - sq->tstamp = &priv->tstamp; + sq->tstamp = c->tstamp; sq->mkey_be = c->mkey_be; sq->channel = c; - sq->tc = tc; - - err = mlx5_alloc_bfreg(mdev, &sq->bfreg, MLX5_CAP_GEN(mdev, bf), false); - if (err) - return err; + sq->txq_ix = txq_ix; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->max_inline = params->tx_max_inline; + sq->min_inline_mode = params->tx_min_inline_mode; - sq->uar_map = sq->bfreg.map; param->wq.db_numa_node = cpu_to_node(c->cpu); - - err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, - &sq->wq_ctrl); + err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) - goto err_unmap_free_uar; - - sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - if (sq->bfreg.wc) - set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state); - - sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; - sq->max_inline = param->max_inline; - sq->min_inline_mode = param->min_inline_mode; + return err; + sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu)); + err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu)); if (err) goto err_sq_wq_destroy; - if (sq->type == MLX5E_SQ_TXQ) { - int txq_ix; - - txq_ix = c->ix + tc * priv->params.num_channels; - sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); - priv->txq_to_sq_map[txq_ix] = sq; - } - - sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type); - sq->bf_budget = MLX5E_SQ_BF_BUDGET; + sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; return 0; err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); -err_unmap_free_uar: - mlx5_free_bfreg(mdev, &sq->bfreg); - return err; } -static void mlx5e_destroy_sq(struct mlx5e_sq *sq) +static void mlx5e_free_txqsq(struct mlx5e_txqsq *sq) { - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - - mlx5e_free_sq_db(sq); + mlx5e_free_txqsq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); - mlx5_free_bfreg(priv->mdev, &sq->bfreg); } -static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) -{ - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; +struct mlx5e_create_sq_param { + struct mlx5_wq_ctrl *wq_ctrl; + u32 cqn; + u32 tisn; + u8 tis_lst_sz; + u8 min_inline_mode; +}; +static int mlx5e_create_sq(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) +{ void *in; void *sqc; void *wq; @@ -1086,7 +1133,7 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) int err; inlen = MLX5_ST_SZ_BYTES(create_sq_in) + - sizeof(u64) * sq->wq_ctrl.buf.npages; + sizeof(u64) * csp->wq_ctrl->buf.npages; in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; @@ -1095,40 +1142,40 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) wq = MLX5_ADDR_OF(sqc, sqc, wq); memcpy(sqc, param->sqc, sizeof(param->sqc)); - - MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? - 0 : priv->tisn[sq->tc]); - MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + MLX5_SET(sqc, sqc, tis_lst_sz, csp->tis_lst_sz); + MLX5_SET(sqc, sqc, tis_num_0, csp->tisn); + MLX5_SET(sqc, sqc, cqn, csp->cqn); if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT) - MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); + MLX5_SET(sqc, sqc, min_wqe_inline_mode, csp->min_inline_mode); - MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, sq->bfreg.index); - MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); - MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); - mlx5_fill_page_array(&sq->wq_ctrl.buf, - (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + mlx5_fill_page_array(&csp->wq_ctrl->buf, (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); - err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + err = mlx5_core_create_sq(mdev, in, inlen, sqn); kvfree(in); return err; } -static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, - int next_state, bool update_rl, int rl_index) -{ - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; +struct mlx5e_modify_sq_param { + int curr_state; + int next_state; + bool rl_update; + int rl_index; +}; +static int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn, + struct mlx5e_modify_sq_param *p) +{ void *in; void *sqc; int inlen; @@ -1141,68 +1188,94 @@ static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); - MLX5_SET(modify_sq_in, in, sq_state, curr_state); - MLX5_SET(sqc, sqc, state, next_state); - if (update_rl && next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET(modify_sq_in, in, sq_state, p->curr_state); + MLX5_SET(sqc, sqc, state, p->next_state); + if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) { MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); - MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index); } - err = mlx5_core_modify_sq(mdev, sq->sqn, in, inlen); + err = mlx5_core_modify_sq(mdev, sqn, in, inlen); kvfree(in); return err; } -static void mlx5e_disable_sq(struct mlx5e_sq *sq) +static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn) { - struct mlx5e_channel *c = sq->channel; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - - mlx5_core_destroy_sq(mdev, sq->sqn); - if (sq->rate_limit) - mlx5_rl_remove_rate(mdev, sq->rate_limit); + mlx5_core_destroy_sq(mdev, sqn); } -static int mlx5e_open_sq(struct mlx5e_channel *c, - int tc, - struct mlx5e_sq_param *param, - struct mlx5e_sq *sq) +static int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev, + struct mlx5e_sq_param *param, + struct mlx5e_create_sq_param *csp, + u32 *sqn) { + struct mlx5e_modify_sq_param msp = {0}; int err; - err = mlx5e_create_sq(c, tc, param, sq); + err = mlx5e_create_sq(mdev, param, csp, sqn); if (err) return err; - err = mlx5e_enable_sq(sq, param); + msp.curr_state = MLX5_SQC_STATE_RST; + msp.next_state = MLX5_SQC_STATE_RDY; + err = mlx5e_modify_sq(mdev, *sqn, &msp); if (err) - goto err_destroy_sq; + mlx5e_destroy_sq(mdev, *sqn); - set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, - false, 0); + return err; +} + +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_txqsq *sq, u32 rate); + +static int mlx5e_open_txqsq(struct mlx5e_channel *c, + u32 tisn, + int txq_ix, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_txqsq *sq) +{ + struct mlx5e_create_sq_param csp = {}; + u32 tx_rate; + int err; + + err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq); if (err) - goto err_disable_sq; + return err; - if (sq->txq) { - netdev_tx_reset_queue(sq->txq); - netif_tx_start_queue(sq->txq); - } + csp.tisn = tisn; + csp.tis_lst_sz = 1; + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_txqsq; + + tx_rate = c->priv->tx_rates[sq->txq_ix]; + if (tx_rate) + mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate); return 0; -err_disable_sq: +err_free_txqsq: clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - mlx5e_disable_sq(sq); -err_destroy_sq: - mlx5e_destroy_sq(sq); + mlx5e_free_txqsq(sq); return err; } +static void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) +{ + sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); +} + static inline void netif_tx_disable_queue(struct netdev_queue *txq) { __netif_tx_lock_bh(txq); @@ -1210,43 +1283,153 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) __netif_tx_unlock_bh(txq); } -static void mlx5e_close_sq(struct mlx5e_sq *sq) +static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) { + struct mlx5e_channel *c = sq->channel; + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); /* prevent netif_tx_wake_queue */ - napi_synchronize(&sq->channel->napi); + napi_synchronize(&c->napi); - if (sq->txq) { - netif_tx_disable_queue(sq->txq); + netif_tx_disable_queue(sq->txq); - /* last doorbell out, godspeed .. */ - if (mlx5e_sq_has_room_for(sq, 1)) { - sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL; - mlx5e_send_nop(sq, true); - } + /* last doorbell out, godspeed .. */ + if (mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1)) { + struct mlx5e_tx_wqe *nop; + + sq->db.wqe_info[(sq->pc & sq->wq.sz_m1)].skb = NULL; + nop = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nop->ctrl); } +} + +static void mlx5e_close_txqsq(struct mlx5e_txqsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + struct mlx5_core_dev *mdev = c->mdev; - mlx5e_disable_sq(sq); - mlx5e_free_sq_descs(sq); - mlx5e_destroy_sq(sq); + mlx5e_destroy_sq(mdev, sq->sqn); + if (sq->rate_limit) + mlx5_rl_remove_rate(mdev, sq->rate_limit); + mlx5e_free_txqsq_descs(sq); + mlx5e_free_txqsq(sq); } -static int mlx5e_create_cq(struct mlx5e_channel *c, - struct mlx5e_cq_param *param, - struct mlx5e_cq *cq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_icosq *sq) +{ + struct mlx5e_create_sq_param csp = {}; + int err; + + err = mlx5e_alloc_icosq(c, param, sq); + if (err) + return err; + + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = params->tx_min_inline_mode; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_icosq; + + return 0; + +err_free_icosq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_icosq(sq); + + return err; +} + +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + napi_synchronize(&c->napi); + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_icosq(sq); +} + +static int mlx5e_open_xdpsq(struct mlx5e_channel *c, + struct mlx5e_params *params, + struct mlx5e_sq_param *param, + struct mlx5e_xdpsq *sq) +{ + unsigned int ds_cnt = MLX5E_XDP_TX_DS_COUNT; + struct mlx5e_create_sq_param csp = {}; + unsigned int inline_hdr_sz = 0; + int err; + int i; + + err = mlx5e_alloc_xdpsq(c, params, param, sq); + if (err) + return err; + + csp.tis_lst_sz = 1; + csp.tisn = c->priv->tisn[0]; /* tc = 0 */ + csp.cqn = sq->cq.mcq.cqn; + csp.wq_ctrl = &sq->wq_ctrl; + csp.min_inline_mode = sq->min_inline_mode; + set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + err = mlx5e_create_sq_rdy(c->mdev, param, &csp, &sq->sqn); + if (err) + goto err_free_xdpsq; + + if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { + inline_hdr_sz = MLX5E_XDP_MIN_INLINE; + ds_cnt++; + } + + /* Pre initialize fixed WQE fields */ + for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) { + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(&sq->wq, i); + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1); + dseg->lkey = sq->mkey_be; + } + + return 0; + +err_free_xdpsq: + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + mlx5e_free_xdpsq(sq); + + return err; +} + +static void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_channel *c = sq->channel; + + clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); + napi_synchronize(&c->napi); + + mlx5e_destroy_sq(c->mdev, sq->sqn); + mlx5e_free_xdpsq_descs(sq); + mlx5e_free_xdpsq(sq); +} + +static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) { - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; int eqn_not_used; unsigned int irqn; int err; u32 i; - param->wq.buf_numa_node = cpu_to_node(c->cpu); - param->wq.db_numa_node = cpu_to_node(c->cpu); - param->eq_ix = c->ix; - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) @@ -1254,8 +1437,6 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - cq->napi = &c->napi; - mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; @@ -1272,21 +1453,38 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, cqe->op_own = 0xf1; } - cq->channel = c; - cq->priv = priv; + cq->mdev = mdev; return 0; } -static void mlx5e_destroy_cq(struct mlx5e_cq *cq) +static int mlx5e_alloc_cq(struct mlx5e_channel *c, + struct mlx5e_cq_param *param, + struct mlx5e_cq *cq) +{ + struct mlx5_core_dev *mdev = c->priv->mdev; + int err; + + param->wq.buf_numa_node = cpu_to_node(c->cpu); + param->wq.db_numa_node = cpu_to_node(c->cpu); + param->eq_ix = c->ix; + + err = mlx5e_alloc_cq_common(mdev, param, cq); + + cq->napi = &c->napi; + cq->channel = c; + + return err; +} + +static void mlx5e_free_cq(struct mlx5e_cq *cq) { mlx5_cqwq_destroy(&cq->wq_ctrl); } -static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) +static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { - struct mlx5e_priv *priv = cq->priv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_core_dev *mdev = cq->mdev; struct mlx5_core_cq *mcq = &cq->mcq; void *in; @@ -1330,47 +1528,41 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) return 0; } -static void mlx5e_disable_cq(struct mlx5e_cq *cq) +static void mlx5e_destroy_cq(struct mlx5e_cq *cq) { - struct mlx5e_priv *priv = cq->priv; - struct mlx5_core_dev *mdev = priv->mdev; - - mlx5_core_destroy_cq(mdev, &cq->mcq); + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); } static int mlx5e_open_cq(struct mlx5e_channel *c, + struct mlx5e_cq_moder moder, struct mlx5e_cq_param *param, - struct mlx5e_cq *cq, - struct mlx5e_cq_moder moderation) + struct mlx5e_cq *cq) { + struct mlx5_core_dev *mdev = c->mdev; int err; - struct mlx5e_priv *priv = c->priv; - struct mlx5_core_dev *mdev = priv->mdev; - err = mlx5e_create_cq(c, param, cq); + err = mlx5e_alloc_cq(c, param, cq); if (err) return err; - err = mlx5e_enable_cq(cq, param); + err = mlx5e_create_cq(cq, param); if (err) - goto err_destroy_cq; + goto err_free_cq; if (MLX5_CAP_GEN(mdev, cq_moderation)) - mlx5_core_modify_cq_moderation(mdev, &cq->mcq, - moderation.usec, - moderation.pkts); + mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts); return 0; -err_destroy_cq: - mlx5e_destroy_cq(cq); +err_free_cq: + mlx5e_free_cq(cq); return err; } static void mlx5e_close_cq(struct mlx5e_cq *cq) { - mlx5e_disable_cq(cq); mlx5e_destroy_cq(cq); + mlx5e_free_cq(cq); } static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) @@ -1379,15 +1571,15 @@ static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix) } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, + struct mlx5e_params *params, struct mlx5e_channel_param *cparam) { - struct mlx5e_priv *priv = c->priv; int err; int tc; for (tc = 0; tc < c->num_tc; tc++) { - err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, - priv->params.tx_cq_moderation); + err = mlx5e_open_cq(c, params->tx_cq_moderation, + &cparam->tx_cq, &c->sq[tc].cq); if (err) goto err_close_tx_cqs; } @@ -1410,13 +1602,17 @@ static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) } static int mlx5e_open_sqs(struct mlx5e_channel *c, + struct mlx5e_params *params, struct mlx5e_channel_param *cparam) { int err; int tc; - for (tc = 0; tc < c->num_tc; tc++) { - err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); + for (tc = 0; tc < params->num_tc; tc++) { + int txq_ix = c->ix + tc * params->num_channels; + + err = mlx5e_open_txqsq(c, c->priv->tisn[tc], txq_ix, + params, &cparam->sq, &c->sq[tc]); if (err) goto err_close_sqs; } @@ -1425,7 +1621,7 @@ static int mlx5e_open_sqs(struct mlx5e_channel *c, err_close_sqs: for (tc--; tc >= 0; tc--) - mlx5e_close_sq(&c->sq[tc]); + mlx5e_close_txqsq(&c->sq[tc]); return err; } @@ -1435,23 +1631,15 @@ static void mlx5e_close_sqs(struct mlx5e_channel *c) int tc; for (tc = 0; tc < c->num_tc; tc++) - mlx5e_close_sq(&c->sq[tc]); -} - -static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) -{ - int i; - - for (i = 0; i < priv->profile->max_tc; i++) - priv->channeltc_to_txq_map[ix][i] = - ix + i * priv->params.num_channels; + mlx5e_close_txqsq(&c->sq[tc]); } static int mlx5e_set_sq_maxrate(struct net_device *dev, - struct mlx5e_sq *sq, u32 rate) + struct mlx5e_txqsq *sq, u32 rate) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_modify_sq_param msp = {0}; u16 rl_index = 0; int err; @@ -1474,8 +1662,11 @@ static int mlx5e_set_sq_maxrate(struct net_device *dev, } } - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, - MLX5_SQC_STATE_RDY, true, rl_index); + msp.curr_state = MLX5_SQC_STATE_RDY; + msp.next_state = MLX5_SQC_STATE_RDY; + msp.rl_index = rl_index; + msp.rl_update = true; + err = mlx5e_modify_sq(mdev, sq->sqn, &msp); if (err) { netdev_err(dev, "Failed configuring rate %u: %d\n", rate, err); @@ -1493,7 +1684,7 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_sq *sq = priv->txq_to_sq_map[index]; + struct mlx5e_txqsq *sq = priv->txq2sq[index]; int err = 0; if (!mlx5_rl_is_supported(mdev)) { @@ -1520,114 +1711,87 @@ static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) return err; } -static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) -{ - return is_kdump_kernel() ? - MLX5E_MIN_NUM_CHANNELS : - min_t(int, mdev->priv.eq_table.num_comp_vectors, - MLX5E_MAX_NUM_CHANNELS); -} - static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, + struct mlx5e_params *params, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { - struct mlx5e_cq_moder icosq_cq_moder = {0, 0}; + struct mlx5e_cq_moder icocq_moder = {0, 0}; struct net_device *netdev = priv->netdev; - struct mlx5e_cq_moder rx_cq_profile; int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; - struct mlx5e_sq *sq; int err; - int i; c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) return -ENOMEM; c->priv = priv; + c->mdev = priv->mdev; + c->tstamp = &priv->tstamp; c->ix = ix; c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); - c->num_tc = priv->params.num_tc; - c->xdp = !!priv->xdp_prog; - - if (priv->params.rx_am_enabled) - rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode); - else - rx_cq_profile = priv->params.rx_cq_moderation; - - mlx5e_build_channeltc_to_txq_map(priv, ix); + c->num_tc = params->num_tc; + c->xdp = !!params->xdp_prog; netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, icosq_cq_moder); + err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); if (err) goto err_napi_del; - err = mlx5e_open_tx_cqs(c, cparam); + err = mlx5e_open_tx_cqs(c, params, cparam); if (err) goto err_close_icosq_cq; - err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, - rx_cq_profile); + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->rq.cq); if (err) goto err_close_tx_cqs; /* XDP SQ CQ params are same as normal TXQ sq CQ params */ - err = c->xdp ? mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, - priv->params.tx_cq_moderation) : 0; + err = c->xdp ? mlx5e_open_cq(c, params->tx_cq_moderation, + &cparam->tx_cq, &c->rq.xdpsq.cq) : 0; if (err) goto err_close_rx_cq; napi_enable(&c->napi); - err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); if (err) goto err_disable_napi; - err = mlx5e_open_sqs(c, cparam); + err = mlx5e_open_sqs(c, params, cparam); if (err) goto err_close_icosq; - for (i = 0; i < priv->params.num_tc; i++) { - u32 txq_ix = priv->channeltc_to_txq_map[ix][i]; - - if (priv->tx_rates[txq_ix]) { - sq = priv->txq_to_sq_map[txq_ix]; - mlx5e_set_sq_maxrate(priv->netdev, sq, - priv->tx_rates[txq_ix]); - } - } - - err = c->xdp ? mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq) : 0; + err = c->xdp ? mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, &c->rq.xdpsq) : 0; if (err) goto err_close_sqs; - err = mlx5e_open_rq(c, &cparam->rq, &c->rq); + err = mlx5e_open_rq(c, params, &cparam->rq, &c->rq); if (err) goto err_close_xdp_sq; - netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); *cp = c; return 0; err_close_xdp_sq: if (c->xdp) - mlx5e_close_sq(&c->xdp_sq); + mlx5e_close_xdpsq(&c->rq.xdpsq); err_close_sqs: mlx5e_close_sqs(c); err_close_icosq: - mlx5e_close_sq(&c->icosq); + mlx5e_close_icosq(&c->icosq); err_disable_napi: napi_disable(&c->napi); if (c->xdp) - mlx5e_close_cq(&c->xdp_sq.cq); + mlx5e_close_cq(&c->rq.xdpsq.cq); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); @@ -1645,16 +1809,35 @@ err_napi_del: return err; } +static void mlx5e_activate_channel(struct mlx5e_channel *c) +{ + int tc; + + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_activate_txqsq(&c->sq[tc]); + mlx5e_activate_rq(&c->rq); + netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix); +} + +static void mlx5e_deactivate_channel(struct mlx5e_channel *c) +{ + int tc; + + mlx5e_deactivate_rq(&c->rq); + for (tc = 0; tc < c->num_tc; tc++) + mlx5e_deactivate_txqsq(&c->sq[tc]); +} + static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); if (c->xdp) - mlx5e_close_sq(&c->xdp_sq); + mlx5e_close_xdpsq(&c->rq.xdpsq); mlx5e_close_sqs(c); - mlx5e_close_sq(&c->icosq); + mlx5e_close_icosq(&c->icosq); napi_disable(&c->napi); if (c->xdp) - mlx5e_close_cq(&c->xdp_sq.cq); + mlx5e_close_cq(&c->rq.xdpsq.cq); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); @@ -1664,17 +1847,16 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) } static void mlx5e_build_rq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - switch (priv->params.rq_wq_type) { + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - MLX5_SET(wq, wq, log_wqe_num_of_strides, - priv->params.mpwqe_log_num_strides - 9); - MLX5_SET(wq, wq, log_wqe_stride_size, - priv->params.mpwqe_log_stride_sz - 6); + MLX5_SET(wq, wq, log_wqe_num_of_strides, params->mpwqe_log_num_strides - 9); + MLX5_SET(wq, wq, log_wqe_stride_size, params->mpwqe_log_stride_sz - 6); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ @@ -1683,14 +1865,14 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); + MLX5_SET(wq, wq, log_wq_sz, params->log_rq_size); MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); + MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); + MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; - - param->am_enabled = priv->params.rx_am_enabled; } static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) @@ -1715,17 +1897,14 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, } static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); - - param->max_inline = priv->params.tx_max_inline; - param->min_inline_mode = priv->params.tx_min_inline_mode; - param->type = MLX5E_SQ_TXQ; + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1737,37 +1916,36 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_cq_param *param) { void *cqc = param->cqc; u8 log_cq_size; - switch (priv->params.rq_wq_type) { + switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - log_cq_size = priv->params.log_rq_size + - priv->params.mpwqe_log_num_strides; + log_cq_size = params->log_rq_size + params->mpwqe_log_num_strides; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - log_cq_size = priv->params.log_rq_size; + log_cq_size = params->log_rq_size; } MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS)) { + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); MLX5_SET(cqc, cqc, cqe_comp_en, 1); } mlx5e_build_common_cq_param(priv, param); - - param->cq_period_mode = priv->params.rx_cq_period_mode; } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_cq_param *param) { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size); mlx5e_build_common_cq_param(priv, param); @@ -1775,8 +1953,8 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, } static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, - struct mlx5e_cq_param *param, - u8 log_wq_size) + u8 log_wq_size, + struct mlx5e_cq_param *param) { void *cqc = param->cqc; @@ -1788,8 +1966,8 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, } static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param, - u8 log_wq_size) + u8 log_wq_size, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); @@ -1798,162 +1976,119 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, log_wq_size); MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - - param->type = MLX5E_SQ_ICO; } static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); mlx5e_build_sq_param_common(priv, param); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); - - param->max_inline = priv->params.tx_max_inline; - param->min_inline_mode = priv->params.tx_min_inline_mode; - param->type = MLX5E_SQ_XDP; + MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); } -static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, + struct mlx5e_params *params, + struct mlx5e_channel_param *cparam) { u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - mlx5e_build_rq_param(priv, &cparam->rq); - mlx5e_build_sq_param(priv, &cparam->sq); - mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq); - mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); - mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); - mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); - mlx5e_build_ico_cq_param(priv, &cparam->icosq_cq, icosq_log_wq_sz); + mlx5e_build_rq_param(priv, params, &cparam->rq); + mlx5e_build_sq_param(priv, params, &cparam->sq); + mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq); + mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq); + mlx5e_build_rx_cq_param(priv, params, &cparam->rx_cq); + mlx5e_build_tx_cq_param(priv, params, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, icosq_log_wq_sz, &cparam->icosq_cq); } -static int mlx5e_open_channels(struct mlx5e_priv *priv) +int mlx5e_open_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) { struct mlx5e_channel_param *cparam; - int nch = priv->params.num_channels; int err = -ENOMEM; int i; - int j; - - priv->channel = kcalloc(nch, sizeof(struct mlx5e_channel *), - GFP_KERNEL); - priv->txq_to_sq_map = kcalloc(nch * priv->params.num_tc, - sizeof(struct mlx5e_sq *), GFP_KERNEL); + chs->num = chs->params.num_channels; + chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL); cparam = kzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); + if (!chs->c || !cparam) + goto err_free; - if (!priv->channel || !priv->txq_to_sq_map || !cparam) - goto err_free_txq_to_sq_map; - - mlx5e_build_channel_param(priv, cparam); - - for (i = 0; i < nch; i++) { - err = mlx5e_open_channel(priv, i, cparam, &priv->channel[i]); - if (err) - goto err_close_channels; - } - - for (j = 0; j < nch; j++) { - err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq); + mlx5e_build_channel_param(priv, &chs->params, cparam); + for (i = 0; i < chs->num; i++) { + err = mlx5e_open_channel(priv, i, &chs->params, cparam, &chs->c[i]); if (err) goto err_close_channels; } - /* FIXME: This is a W/A for tx timeout watch dog false alarm when - * polling for inactive tx queues. - */ - netif_tx_start_all_queues(priv->netdev); - kfree(cparam); return 0; err_close_channels: for (i--; i >= 0; i--) - mlx5e_close_channel(priv->channel[i]); + mlx5e_close_channel(chs->c[i]); -err_free_txq_to_sq_map: - kfree(priv->txq_to_sq_map); - kfree(priv->channel); +err_free: + kfree(chs->c); kfree(cparam); - + chs->num = 0; return err; } -static void mlx5e_close_channels(struct mlx5e_priv *priv) +static void mlx5e_activate_channels(struct mlx5e_channels *chs) { int i; - /* FIXME: This is a W/A only for tx timeout watch dog false alarm when - * polling for inactive tx queues. - */ - netif_tx_stop_all_queues(priv->netdev); - netif_tx_disable(priv->netdev); - - for (i = 0; i < priv->params.num_channels; i++) - mlx5e_close_channel(priv->channel[i]); - - kfree(priv->txq_to_sq_map); - kfree(priv->channel); + for (i = 0; i < chs->num; i++) + mlx5e_activate_channel(chs->c[i]); } -static int mlx5e_rx_hash_fn(int hfunc) +static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs) { - return (hfunc == ETH_RSS_HASH_TOP) ? - MLX5_RX_HASH_FN_TOEPLITZ : - MLX5_RX_HASH_FN_INVERTED_XOR8; -} - -static int mlx5e_bits_invert(unsigned long a, int size) -{ - int inv = 0; + int err = 0; int i; - for (i = 0; i < size; i++) - inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + for (i = 0; i < chs->num; i++) { + err = mlx5e_wait_for_min_rx_wqes(&chs->c[i]->rq); + if (err) + break; + } - return inv; + return err; } -static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) +static void mlx5e_deactivate_channels(struct mlx5e_channels *chs) { int i; - for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { - int ix = i; - u32 rqn; - - if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR) - ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); - - ix = priv->params.indirection_rqt[ix]; - rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[ix]->rq.rqn : - priv->drop_rq.rqn; - MLX5_SET(rqtc, rqtc, rq_num[i], rqn); - } + for (i = 0; i < chs->num; i++) + mlx5e_deactivate_channel(chs->c[i]); } -static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, - int ix) +void mlx5e_close_channels(struct mlx5e_channels *chs) { - u32 rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[ix]->rq.rqn : - priv->drop_rq.rqn; + int i; - MLX5_SET(rqtc, rqtc, rq_num[0], rqn); + for (i = 0; i < chs->num; i++) + mlx5e_close_channel(chs->c[i]); + + kfree(chs->c); + chs->num = 0; } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, - int ix, struct mlx5e_rqt *rqt) +static int +mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, struct mlx5e_rqt *rqt) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; int inlen; int err; u32 *in; + int i; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); @@ -1965,10 +2100,8 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - if (sz > 1) /* RSS */ - mlx5e_fill_indir_rqt_rqns(priv, rqtc); - else - mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); + for (i = 0; i < sz; i++) + MLX5_SET(rqtc, rqtc, rq_num[i], priv->drop_rq.rqn); err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); if (!err) @@ -1984,11 +2117,15 @@ void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } -static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) +int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) { struct mlx5e_rqt *rqt = &priv->indir_rqt; + int err; - return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); + err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, rqt); + if (err) + mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err); + return err; } int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) @@ -1999,7 +2136,7 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { rqt = &priv->direct_tir[ix].rqt; - err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); + err = mlx5e_create_rqt(priv, 1 /*size */, rqt); if (err) goto err_destroy_rqts; } @@ -2007,13 +2144,64 @@ int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) return 0; err_destroy_rqts: + mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); for (ix--; ix >= 0; ix--) mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); return err; } -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) +{ + int i; + + for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +} + +static int mlx5e_rx_hash_fn(int hfunc) +{ + return (hfunc == ETH_RSS_HASH_TOP) ? + MLX5_RX_HASH_FN_TOEPLITZ : + MLX5_RX_HASH_FN_INVERTED_XOR8; +} + +static int mlx5e_bits_invert(unsigned long a, int size) +{ + int inv = 0; + int i; + + for (i = 0; i < size; i++) + inv |= (test_bit(size - i - 1, &a) ? 1 : 0) << i; + + return inv; +} + +static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, int sz, + struct mlx5e_redirect_rqt_param rrp, void *rqtc) +{ + int i; + + for (i = 0; i < sz; i++) { + u32 rqn; + + if (rrp.is_rss) { + int ix = i; + + if (rrp.rss.hfunc == ETH_RSS_HASH_XOR) + ix = mlx5e_bits_invert(i, ilog2(sz)); + + ix = priv->channels.params.indirection_rqt[ix]; + rqn = rrp.rss.channels->c[ix]->rq.rqn; + } else { + rqn = rrp.rqn; + } + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); + } +} + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, + struct mlx5e_redirect_rqt_param rrp) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; @@ -2029,41 +2217,86 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - if (sz > 1) /* RSS */ - mlx5e_fill_indir_rqt_rqns(priv, rqtc); - else - mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); - + mlx5e_fill_rqt_rqns(priv, sz, rrp, rqtc); err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen); kvfree(in); - return err; } -static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) +static u32 mlx5e_get_direct_rqn(struct mlx5e_priv *priv, int ix, + struct mlx5e_redirect_rqt_param rrp) +{ + if (!rrp.is_rss) + return rrp.rqn; + + if (ix >= rrp.rss.channels->num) + return priv->drop_rq.rqn; + + return rrp.rss.channels->c[ix]->rq.rqn; +} + +static void mlx5e_redirect_rqts(struct mlx5e_priv *priv, + struct mlx5e_redirect_rqt_param rrp) { u32 rqtn; int ix; if (priv->indir_rqt.enabled) { + /* RSS RQ table */ rqtn = priv->indir_rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, rrp); } - for (ix = 0; ix < priv->params.num_channels; ix++) { + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { + struct mlx5e_redirect_rqt_param direct_rrp = { + .is_rss = false, + { + .rqn = mlx5e_get_direct_rqn(priv, ix, rrp) + }, + }; + + /* Direct RQ Tables */ if (!priv->direct_tir[ix].rqt.enabled) continue; + rqtn = priv->direct_tir[ix].rqt.rqtn; - mlx5e_redirect_rqt(priv, rqtn, 1, ix); + mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp); } } -static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) +static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *chs) +{ + struct mlx5e_redirect_rqt_param rrp = { + .is_rss = true, + { + .rss = { + .channels = chs, + .hfunc = chs->params.rss_hfunc, + } + }, + }; + + mlx5e_redirect_rqts(priv, rrp); +} + +static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) +{ + struct mlx5e_redirect_rqt_param drop_rrp = { + .is_rss = false, + { + .rqn = priv->drop_rq.rqn, + }, + }; + + mlx5e_redirect_rqts(priv, drop_rrp); +} + +static void mlx5e_build_tir_ctx_lro(struct mlx5e_params *params, void *tirc) { - if (!priv->params.lro_en) + if (!params->lro_en) return; #define ROUGH_MAX_L2_L3_HDR_SZ 256 @@ -2072,13 +2305,13 @@ static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); MLX5_SET(tirc, tirc, lro_max_ip_payload_size, - (priv->params.lro_wqe_sz - - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); - MLX5_SET(tirc, tirc, lro_timeout_period_usecs, priv->params.lro_timeout); + (params->lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); + MLX5_SET(tirc, tirc, lro_timeout_period_usecs, params->lro_timeout); } -void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, - enum mlx5e_traffic_types tt) +void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_params *params, + enum mlx5e_traffic_types tt, + void *tirc) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); @@ -2094,16 +2327,15 @@ void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_priv *priv, void *tirc, MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) - MLX5_SET(tirc, tirc, rx_hash_fn, - mlx5e_rx_hash_fn(priv->params.rss_hfunc)); - if (priv->params.rss_hfunc == ETH_RSS_HASH_TOP) { + MLX5_SET(tirc, tirc, rx_hash_fn, mlx5e_rx_hash_fn(params->rss_hfunc)); + if (params->rss_hfunc == ETH_RSS_HASH_TOP) { void *rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); size_t len = MLX5_FLD_SZ_BYTES(tirc, rx_hash_toeplitz_key); MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); - memcpy(rss_key, priv->params.toeplitz_hash_key, len); + memcpy(rss_key, params->toeplitz_hash_key, len); } switch (tt) { @@ -2208,7 +2440,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) MLX5_SET(modify_tir_in, in, bitmask.lro, 1); tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx); - mlx5e_build_tir_ctx_lro(tirc, priv); + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, @@ -2258,9 +2490,9 @@ static void mlx5e_query_mtu(struct mlx5e_priv *priv, u16 *mtu) *mtu = MLX5E_HW2SW_MTU(hw_mtu); } -static int mlx5e_set_dev_port_mtu(struct net_device *netdev) +static int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv) { - struct mlx5e_priv *priv = netdev_priv(netdev); + struct net_device *netdev = priv->netdev; u16 mtu; int err; @@ -2280,8 +2512,8 @@ static int mlx5e_set_dev_port_mtu(struct net_device *netdev) static void mlx5e_netdev_set_tcs(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - int nch = priv->params.num_channels; - int ntc = priv->params.num_tc; + int nch = priv->channels.params.num_channels; + int ntc = priv->channels.params.num_tc; int tc; netdev_reset_tc(netdev); @@ -2298,53 +2530,116 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) netdev_set_tc_queue(netdev, tc, nch, 0); } +static void mlx5e_build_channels_tx_maps(struct mlx5e_priv *priv) +{ + struct mlx5e_channel *c; + struct mlx5e_txqsq *sq; + int i, tc; + + for (i = 0; i < priv->channels.num; i++) + for (tc = 0; tc < priv->profile->max_tc; tc++) + priv->channel_tc2txq[i][tc] = i + tc * priv->channels.num; + + for (i = 0; i < priv->channels.num; i++) { + c = priv->channels.c[i]; + for (tc = 0; tc < c->num_tc; tc++) { + sq = &c->sq[tc]; + priv->txq2sq[sq->txq_ix] = sq; + } + } +} + +static bool mlx5e_is_eswitch_vport_mngr(struct mlx5_core_dev *mdev) +{ + return (MLX5_CAP_GEN(mdev, vport_group_manager) && + MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH); +} + +void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) +{ + int num_txqs = priv->channels.num * priv->channels.params.num_tc; + struct net_device *netdev = priv->netdev; + + mlx5e_netdev_set_tcs(netdev); + netif_set_real_num_tx_queues(netdev, num_txqs); + netif_set_real_num_rx_queues(netdev, priv->channels.num); + + mlx5e_build_channels_tx_maps(priv); + mlx5e_activate_channels(&priv->channels); + netif_tx_start_all_queues(priv->netdev); + + if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + mlx5e_add_sqs_fwd_rules(priv); + + mlx5e_wait_channels_min_rx_wqes(&priv->channels); + mlx5e_redirect_rqts_to_channels(priv, &priv->channels); +} + +void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) +{ + mlx5e_redirect_rqts_to_drop(priv); + + if (mlx5e_is_eswitch_vport_mngr(priv->mdev)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* FIXME: This is a W/A only for tx timeout watch dog false alarm when + * polling for inactive tx queues. + */ + netif_tx_stop_all_queues(priv->netdev); + netif_tx_disable(priv->netdev); + mlx5e_deactivate_channels(&priv->channels); +} + +void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, + struct mlx5e_channels *new_chs, + mlx5e_fp_hw_modify hw_modify) +{ + struct net_device *netdev = priv->netdev; + int new_num_txqs; + + new_num_txqs = new_chs->num * new_chs->params.num_tc; + + netif_carrier_off(netdev); + + if (new_num_txqs < netdev->real_num_tx_queues) + netif_set_real_num_tx_queues(netdev, new_num_txqs); + + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); + + priv->channels = *new_chs; + + /* New channels are ready to roll, modify HW settings if needed */ + if (hw_modify) + hw_modify(priv); + + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); + + mlx5e_update_carrier(priv); +} + int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; - int num_txqs; int err; set_bit(MLX5E_STATE_OPENED, &priv->state); - mlx5e_netdev_set_tcs(netdev); - - num_txqs = priv->params.num_channels * priv->params.num_tc; - netif_set_real_num_tx_queues(netdev, num_txqs); - netif_set_real_num_rx_queues(netdev, priv->params.num_channels); - - err = mlx5e_open_channels(priv); - if (err) { - netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", - __func__, err); + err = mlx5e_open_channels(priv, &priv->channels); + if (err) goto err_clear_state_opened_flag; - } - - err = mlx5e_refresh_tirs_self_loopback(priv->mdev, false); - if (err) { - netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", - __func__, err); - goto err_close_channels; - } - mlx5e_redirect_rqts(priv); + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); mlx5e_update_carrier(priv); mlx5e_timestamp_init(priv); -#ifdef CONFIG_RFS_ACCEL - priv->netdev->rx_cpu_rmap = priv->mdev->rmap; -#endif + if (priv->profile->update_stats) queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - err = mlx5e_add_sqs_fwd_rules(priv); - if (err) - goto err_close_channels; - } return 0; -err_close_channels: - mlx5e_close_channels(priv); err_clear_state_opened_flag: clear_bit(MLX5E_STATE_OPENED, &priv->state); return err; @@ -2365,7 +2660,6 @@ int mlx5e_open(struct net_device *netdev) int mlx5e_close_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_core_dev *mdev = priv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. @@ -2375,13 +2669,10 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - mlx5e_remove_sqs_fwd_rules(priv); - mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); - mlx5e_redirect_rqts(priv); - mlx5e_close_channels(priv); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); return 0; } @@ -2401,11 +2692,10 @@ int mlx5e_close(struct net_device *netdev) return err; } -static int mlx5e_create_drop_rq(struct mlx5e_priv *priv, - struct mlx5e_rq *rq, - struct mlx5e_rq_param *param) +static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev, + struct mlx5e_rq *rq, + struct mlx5e_rq_param *param) { - struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); int err; @@ -2417,111 +2707,85 @@ static int mlx5e_create_drop_rq(struct mlx5e_priv *priv, if (err) return err; - rq->priv = priv; + rq->mdev = mdev; return 0; } -static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, - struct mlx5e_cq *cq, - struct mlx5e_cq_param *param) +static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, + struct mlx5e_cq *cq, + struct mlx5e_cq_param *param) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_core_cq *mcq = &cq->mcq; - int eqn_not_used; - unsigned int irqn; - int err; - - err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, - &cq->wq_ctrl); - if (err) - return err; - - mlx5_vector2eqn(mdev, param->eq_ix, &eqn_not_used, &irqn); - - mcq->cqe_sz = 64; - mcq->set_ci_db = cq->wq_ctrl.db.db; - mcq->arm_db = cq->wq_ctrl.db.db + 1; - *mcq->set_ci_db = 0; - *mcq->arm_db = 0; - mcq->vector = param->eq_ix; - mcq->comp = mlx5e_completion_event; - mcq->event = mlx5e_cq_error_event; - mcq->irqn = irqn; - - cq->priv = priv; - - return 0; + return mlx5e_alloc_cq_common(mdev, param, cq); } -static int mlx5e_open_drop_rq(struct mlx5e_priv *priv) +static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev, + struct mlx5e_rq *drop_rq) { - struct mlx5e_cq_param cq_param; - struct mlx5e_rq_param rq_param; - struct mlx5e_rq *rq = &priv->drop_rq; - struct mlx5e_cq *cq = &priv->drop_rq.cq; + struct mlx5e_cq_param cq_param = {}; + struct mlx5e_rq_param rq_param = {}; + struct mlx5e_cq *cq = &drop_rq->cq; int err; - memset(&cq_param, 0, sizeof(cq_param)); - memset(&rq_param, 0, sizeof(rq_param)); mlx5e_build_drop_rq_param(&rq_param); - err = mlx5e_create_drop_cq(priv, cq, &cq_param); + err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param); if (err) return err; - err = mlx5e_enable_cq(cq, &cq_param); + err = mlx5e_create_cq(cq, &cq_param); if (err) - goto err_destroy_cq; + goto err_free_cq; - err = mlx5e_create_drop_rq(priv, rq, &rq_param); + err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param); if (err) - goto err_disable_cq; + goto err_destroy_cq; - err = mlx5e_enable_rq(rq, &rq_param); + err = mlx5e_create_rq(drop_rq, &rq_param); if (err) - goto err_destroy_rq; + goto err_free_rq; return 0; -err_destroy_rq: - mlx5e_destroy_rq(&priv->drop_rq); - -err_disable_cq: - mlx5e_disable_cq(&priv->drop_rq.cq); +err_free_rq: + mlx5e_free_rq(drop_rq); err_destroy_cq: - mlx5e_destroy_cq(&priv->drop_rq.cq); + mlx5e_destroy_cq(cq); + +err_free_cq: + mlx5e_free_cq(cq); return err; } -static void mlx5e_close_drop_rq(struct mlx5e_priv *priv) +static void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq) { - mlx5e_disable_rq(&priv->drop_rq); - mlx5e_destroy_rq(&priv->drop_rq); - mlx5e_disable_cq(&priv->drop_rq.cq); - mlx5e_destroy_cq(&priv->drop_rq.cq); + mlx5e_destroy_rq(drop_rq); + mlx5e_free_rq(drop_rq); + mlx5e_destroy_cq(&drop_rq->cq); + mlx5e_free_cq(&drop_rq->cq); } -static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) +int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, + u32 underlay_qpn, u32 *tisn) { - struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); MLX5_SET(tisc, tisc, prio, tc << 1); + MLX5_SET(tisc, tisc, underlay_qpn, underlay_qpn); MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); - return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); + return mlx5_core_create_tis(mdev, in, sizeof(in), tisn); } -static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc) +void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn) { - mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); + mlx5_core_destroy_tis(mdev, tisn); } int mlx5e_create_tises(struct mlx5e_priv *priv) @@ -2530,7 +2794,7 @@ int mlx5e_create_tises(struct mlx5e_priv *priv) int tc; for (tc = 0; tc < priv->profile->max_tc; tc++) { - err = mlx5e_create_tis(priv, tc); + err = mlx5e_create_tis(priv->mdev, tc, 0, &priv->tisn[tc]); if (err) goto err_close_tises; } @@ -2539,7 +2803,7 @@ int mlx5e_create_tises(struct mlx5e_priv *priv) err_close_tises: for (tc--; tc >= 0; tc--) - mlx5e_destroy_tis(priv, tc); + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); return err; } @@ -2549,34 +2813,34 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) int tc; for (tc = 0; tc < priv->profile->max_tc; tc++) - mlx5e_destroy_tis(priv, tc); + mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]); } -static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, - enum mlx5e_traffic_types tt) +static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, + enum mlx5e_traffic_types tt, + u32 *tirc) { MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); - mlx5e_build_tir_ctx_lro(tirc, priv); + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); - mlx5e_build_indir_tir_ctx_hash(priv, tirc, tt); + mlx5e_build_indir_tir_ctx_hash(&priv->channels.params, tt, tirc); } -static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, - u32 rqtn) +static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) { MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); - mlx5e_build_tir_ctx_lro(tirc, priv); + mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, rqtn); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); } -static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) +int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) { struct mlx5e_tir *tir; void *tirc; @@ -2594,7 +2858,7 @@ static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) memset(in, 0, inlen); tir = &priv->indir_tir[tt]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_indir_tir_ctx(priv, tirc, tt); + mlx5e_build_indir_tir_ctx(priv, tt, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_tirs; @@ -2605,6 +2869,7 @@ static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) return 0; err_destroy_tirs: + mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); for (tt--; tt >= 0; tt--) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); @@ -2632,8 +2897,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) memset(in, 0, inlen); tir = &priv->direct_tir[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); - mlx5e_build_direct_tir_ctx(priv, tirc, - priv->direct_tir[ix].rqt.rqtn); + mlx5e_build_direct_tir_ctx(priv, priv->direct_tir[ix].rqt.rqtn, tirc); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_ch_tirs; @@ -2644,6 +2908,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) return 0; err_destroy_ch_tirs: + mlx5_core_warn(priv->mdev, "create direct tirs failed, %d\n", err); for (ix--; ix >= 0; ix--) mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); @@ -2652,7 +2917,7 @@ err_destroy_ch_tirs: return err; } -static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) +void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) { int i; @@ -2669,16 +2934,27 @@ void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); } -int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) +static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) { int err = 0; int i; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - return 0; + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_scatter_fcs(&chs->c[i]->rq, enable); + if (err) + return err; + } - for (i = 0; i < priv->params.num_channels; i++) { - err = mlx5e_modify_rq_vsd(&priv->channel[i]->rq, vsd); + return 0; +} + +static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) +{ + int err = 0; + int i; + + for (i = 0; i < chs->num; i++) { + err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd); if (err) return err; } @@ -2689,7 +2965,7 @@ int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool was_opened; + struct mlx5e_channels new_channels = {}; int err = 0; if (tc && tc != MLX5E_MAX_NUM_TC) @@ -2697,17 +2973,21 @@ static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) mutex_lock(&priv->state_lock); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(priv->netdev); + new_channels.params = priv->channels.params; + new_channels.params.num_tc = tc ? tc : 1; - priv->params.num_tc = tc ? tc : 1; + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } - if (was_opened) - err = mlx5e_open_locked(priv->netdev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + mlx5e_switch_priv_channels(priv, &new_channels, NULL); +out: mutex_unlock(&priv->state_lock); - return err; } @@ -2737,7 +3017,9 @@ mqprio: if (tc->type != TC_SETUP_MQPRIO) return -EINVAL; - return mlx5e_setup_tc(dev, tc->tc); + tc->mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS; + + return mlx5e_setup_tc(dev, tc->mqprio->num_tc); } static void @@ -2822,26 +3104,31 @@ typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); static int set_feature_lro(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - int err; + struct mlx5e_channels new_channels = {}; + int err = 0; + bool reset; mutex_lock(&priv->state_lock); - if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) - mlx5e_close_locked(priv->netdev); + reset = (priv->channels.params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST); + reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); - priv->params.lro_en = enable; - err = mlx5e_modify_tirs_lro(priv); - if (err) { - netdev_err(netdev, "lro modify failed, %d\n", err); - priv->params.lro_en = !enable; + new_channels.params = priv->channels.params; + new_channels.params.lro_en = enable; + + if (!reset) { + priv->channels.params = new_channels.params; + err = mlx5e_modify_tirs_lro(priv); + goto out; } - if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) - mlx5e_open_locked(priv->netdev); + err = mlx5e_open_channels(priv, &new_channels); + if (err) + goto out; + mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_modify_tirs_lro); +out: mutex_unlock(&priv->state_lock); - return err; } @@ -2878,23 +3165,44 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable) return mlx5_set_port_fcs(mdev, !enable); } -static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +static int set_feature_rx_fcs(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; mutex_lock(&priv->state_lock); - priv->params.vlan_strip_disable = !enable; - err = mlx5e_modify_rqs_vsd(priv, !enable); + priv->channels.params.scatter_fcs_en = enable; + err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable); if (err) - priv->params.vlan_strip_disable = enable; + priv->channels.params.scatter_fcs_en = !enable; mutex_unlock(&priv->state_lock); return err; } +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err = 0; + + mutex_lock(&priv->state_lock); + + priv->channels.params.vlan_strip_disable = !enable; + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + err = mlx5e_modify_channels_vsd(&priv->channels, !enable); + if (err) + priv->channels.params.vlan_strip_disable = enable; + +unlock: + mutex_unlock(&priv->state_lock); + + return err; +} + #ifdef CONFIG_RFS_ACCEL static int set_feature_arfs(struct net_device *netdev, bool enable) { @@ -2947,6 +3255,8 @@ static int mlx5e_set_features(struct net_device *netdev, set_feature_tc_num_filters); err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, set_feature_rx_all); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXFCS, + set_feature_rx_fcs); err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan); #ifdef CONFIG_RFS_ACCEL @@ -2960,28 +3270,38 @@ static int mlx5e_set_features(struct net_device *netdev, static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu) { struct mlx5e_priv *priv = netdev_priv(netdev); - bool was_opened; + struct mlx5e_channels new_channels = {}; + int curr_mtu; int err = 0; bool reset; mutex_lock(&priv->state_lock); - reset = !priv->params.lro_en && - (priv->params.rq_wq_type != + reset = !priv->channels.params.lro_en && + (priv->channels.params.rq_wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened && reset) - mlx5e_close_locked(netdev); + reset = reset && test_bit(MLX5E_STATE_OPENED, &priv->state); + curr_mtu = netdev->mtu; netdev->mtu = new_mtu; - mlx5e_set_dev_port_mtu(netdev); - if (was_opened && reset) - err = mlx5e_open_locked(netdev); + if (!reset) { + mlx5e_set_dev_port_mtu(priv); + goto out; + } - mutex_unlock(&priv->state_lock); + new_channels.params = priv->channels.params; + err = mlx5e_open_channels(priv, &new_channels); + if (err) { + netdev->mtu = curr_mtu; + goto out; + } + + mlx5e_switch_priv_channels(priv, &new_channels, mlx5e_set_dev_port_mtu); +out: + mutex_unlock(&priv->state_lock); return err; } @@ -3186,8 +3506,8 @@ static void mlx5e_tx_timeout(struct net_device *dev) netdev_err(dev, "TX timeout detected\n"); - for (i = 0; i < priv->params.num_channels * priv->params.num_tc; i++) { - struct mlx5e_sq *sq = priv->txq_to_sq_map[i]; + for (i = 0; i < priv->channels.num * priv->channels.params.num_tc; i++) { + struct mlx5e_txqsq *sq = priv->txq2sq[i]; if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i))) continue; @@ -3219,7 +3539,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* no need for full reset when exchanging programs */ - reset = (!priv->xdp_prog || !prog); + reset = (!priv->channels.params.xdp_prog || !prog); if (was_opened && reset) mlx5e_close_locked(netdev); @@ -3227,7 +3547,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* num_channels is invariant here, so we can take the * batched reference right upfront. */ - prog = bpf_prog_add(prog, priv->params.num_channels); + prog = bpf_prog_add(prog, priv->channels.num); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto unlock; @@ -3237,12 +3557,12 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* exchange programs, extra prog reference we got from caller * as long as we don't fail from this point onwards. */ - old_prog = xchg(&priv->xdp_prog, prog); + old_prog = xchg(&priv->channels.params.xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); if (reset) /* change RQ type according to priv->xdp_prog */ - mlx5e_set_rq_priv_params(priv); + mlx5e_set_rq_params(priv->mdev, &priv->channels.params); if (was_opened && reset) mlx5e_open_locked(netdev); @@ -3253,8 +3573,8 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) /* exchanging programs w/o reset, we update ref counts on behalf * of the channels RQs here. */ - for (i = 0; i < priv->params.num_channels; i++) { - struct mlx5e_channel *c = priv->channel[i]; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); napi_synchronize(&c->napi); @@ -3280,7 +3600,7 @@ static bool mlx5e_xdp_attached(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - return !!priv->xdp_prog; + return !!priv->channels.params.xdp_prog; } static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) @@ -3303,10 +3623,12 @@ static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) static void mlx5e_netpoll(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *chs = &priv->channels; + int i; - for (i = 0; i < priv->params.num_channels; i++) - napi_schedule(&priv->channel[i]->napi); + for (i = 0; i < chs->num; i++) + napi_schedule(&chs->c[i]->napi); } #endif @@ -3463,6 +3785,12 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) (pci_bw < 40000) && (pci_bw < link_speed)); } +static bool hw_lro_heuristic(u32 link_speed, u32 pci_bw) +{ + return !(link_speed && pci_bw && + (pci_bw <= 16000) && (pci_bw < link_speed)); +} + void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) { params->rx_cq_period_mode = cq_period_mode; @@ -3475,6 +3803,13 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) params->rx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; + + if (params->rx_am_enabled) + params->rx_cq_moderation = + mlx5e_am_get_def_profile(params->rx_cq_period_mode); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER, + params->rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); } u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) @@ -3489,75 +3824,81 @@ u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); } -static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params, + u16 max_channels) { - struct mlx5e_priv *priv = netdev_priv(netdev); + u8 cq_period_mode = 0; u32 link_speed = 0; u32 pci_bw = 0; - u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? - MLX5_CQ_PERIOD_MODE_START_FROM_CQE : - MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; + params->num_channels = max_channels; + params->num_tc = 1; - priv->params.lro_timeout = - mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + mlx5e_get_max_linkspeed(mdev, &link_speed); + mlx5e_get_pci_bw(mdev, &pci_bw); + mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); - priv->params.log_sq_size = is_kdump_kernel() ? + /* SQ */ + params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ - priv->params.rx_cqe_compress_def = false; + params->rx_cqe_compress_def = false; if (MLX5_CAP_GEN(mdev, cqe_compression) && - MLX5_CAP_GEN(mdev, vport_group_manager)) { - mlx5e_get_max_linkspeed(mdev, &link_speed); - mlx5e_get_pci_bw(mdev, &pci_bw); - mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", - link_speed, pci_bw); - priv->params.rx_cqe_compress_def = - cqe_compress_heuristic(link_speed, pci_bw); - } - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, - priv->params.rx_cqe_compress_def); - - mlx5e_set_rq_priv_params(priv); - if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) - priv->params.lro_en = true; - - priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); - - priv->params.tx_cq_moderation.usec = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - priv->params.tx_cq_moderation.pkts = - MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; - priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - mlx5_query_min_inline(mdev, &priv->params.tx_min_inline_mode); - if (priv->params.tx_min_inline_mode == MLX5_INLINE_MODE_NONE && + MLX5_CAP_GEN(mdev, vport_group_manager)) + params->rx_cqe_compress_def = cqe_compress_heuristic(link_speed, pci_bw); + + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + + /* RQ */ + mlx5e_set_rq_params(mdev, params); + + /* HW LRO */ + /* TODO: && MLX5_CAP_ETH(mdev, lro_cap) */ + if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) + params->lro_en = hw_lro_heuristic(link_speed, pci_bw); + params->lro_timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); + + /* CQ moderation params */ + cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, cq_period_mode); + + params->tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; + params->tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; + + /* TX inline */ + params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); + if (params->tx_min_inline_mode == MLX5_INLINE_MODE_NONE && !MLX5_CAP_ETH(mdev, wqe_vlan_insert)) - priv->params.tx_min_inline_mode = MLX5_INLINE_MODE_L2; + params->tx_min_inline_mode = MLX5_INLINE_MODE_L2; - priv->params.num_tc = 1; - priv->params.rss_hfunc = ETH_RSS_HASH_XOR; + /* RSS */ + params->rss_hfunc = ETH_RSS_HASH_XOR; + netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); + mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt, + MLX5E_INDIR_RQT_SIZE, max_channels); +} - netdev_rss_key_fill(priv->params.toeplitz_hash_key, - sizeof(priv->params.toeplitz_hash_key)); +static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; - /* Initialize pflags */ - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, - priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); mutex_init(&priv->state_lock); @@ -3642,13 +3983,19 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (fcs_supported) netdev->hw_features |= NETIF_F_RXALL; + if (MLX5_CAP_ETH(mdev, scatter_fcs)) + netdev->hw_features |= NETIF_F_RXFCS; + netdev->features = netdev->hw_features; - if (!priv->params.lro_en) + if (!priv->channels.params.lro_en) netdev->features &= ~NETIF_F_LRO; if (fcs_enabled) netdev->features &= ~NETIF_F_RXALL; + if (!priv->channels.params.scatter_fcs_en) + netdev->features &= ~NETIF_F_RXFCS; + #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && FT_CAP(modify_root) && @@ -3708,39 +4055,30 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_vxlan_cleanup(priv); - if (priv->xdp_prog) - bpf_prog_put(priv->xdp_prog); + if (priv->channels.params.xdp_prog) + bpf_prog_put(priv->channels.params.xdp_prog); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; int err; - int i; - err = mlx5e_create_indirect_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create indirect rqts failed, %d\n", err); + err = mlx5e_create_indirect_rqt(priv); + if (err) return err; - } err = mlx5e_create_direct_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + if (err) goto err_destroy_indirect_rqts; - } err = mlx5e_create_indirect_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create indirect tirs failed, %d\n", err); + if (err) goto err_destroy_direct_rqts; - } err = mlx5e_create_direct_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + if (err) goto err_destroy_indirect_tirs; - } err = mlx5e_create_flow_steering(priv); if (err) { @@ -3761,8 +4099,7 @@ err_destroy_direct_tirs: err_destroy_indirect_tirs: mlx5e_destroy_indirect_tirs(priv); err_destroy_direct_rqts: - for (i = 0; i < priv->profile->max_nch(mdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); err_destroy_indirect_rqts: mlx5e_destroy_rqt(priv, &priv->indir_rqt); return err; @@ -3770,14 +4107,11 @@ err_destroy_indirect_rqts: static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) { - int i; - mlx5e_tc_cleanup(priv); mlx5e_destroy_flow_steering(priv); mlx5e_destroy_direct_tirs(priv); mlx5e_destroy_indirect_tirs(priv); - for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); mlx5e_destroy_rqt(priv, &priv->indir_rqt); } @@ -3801,21 +4135,22 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_eswitch_rep rep; + u16 max_mtu; + + mlx5e_init_l2_addr(priv); + + /* MTU range: 68 - hw-specific max */ + netdev->min_mtu = ETH_MIN_MTU; + mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); + netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu); + mlx5e_set_dev_port_mtu(priv); mlx5_lag_add(mdev, netdev); mlx5e_enable_async_events(priv); - if (MLX5_CAP_GEN(mdev, vport_group_manager)) { - mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); - rep.load = mlx5e_nic_rep_load; - rep.unload = mlx5e_nic_rep_unload; - rep.vport = FDB_UPLINK_VPORT; - rep.netdev = netdev; - mlx5_eswitch_register_vport_rep(esw, 0, &rep); - } + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5e_register_vport_reps(priv); if (netdev->reg_state != NETREG_REGISTERED) return; @@ -3828,16 +4163,29 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) } queue_work(priv->wq, &priv->set_rx_mode_work); + + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); } static void mlx5e_nic_disable(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; + + rtnl_lock(); + if (netif_running(priv->netdev)) + mlx5e_close(priv->netdev); + netif_device_detach(priv->netdev); + rtnl_unlock(); queue_work(priv->wq, &priv->set_rx_mode_work); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) - mlx5_eswitch_unregister_vport_rep(esw, 0); + mlx5e_unregister_vport_reps(priv); + mlx5e_disable_async_events(priv); mlx5_lag_remove(mdev); } @@ -3853,9 +4201,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .disable = mlx5e_nic_disable, .update_stats = mlx5e_update_stats, .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = mlx5e_handle_rx_cqe_mpwrq, .max_tc = MLX5E_MAX_NUM_TC, }; +/* mlx5e generic netdev management API (move to en_common.c) */ + struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, void *ppriv) @@ -3872,6 +4224,10 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, return NULL; } +#ifdef CONFIG_RFS_ACCEL + netdev->rx_cpu_rmap = mdev->rmap; +#endif + profile->init(mdev, netdev, profile, ppriv); netif_carrier_off(netdev); @@ -3891,14 +4247,12 @@ err_cleanup_nic: return NULL; } -int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +int mlx5e_attach_netdev(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; const struct mlx5e_profile *profile; - struct mlx5e_priv *priv; - u16 max_mtu; int err; - priv = netdev_priv(netdev); profile = priv->profile; clear_bit(MLX5E_STATE_DESTROYING, &priv->state); @@ -3906,7 +4260,7 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) if (err) goto out; - err = mlx5e_open_drop_rq(priv); + err = mlx5e_open_drop_rq(mdev, &priv->drop_rq); if (err) { mlx5_core_err(mdev, "open drop rq failed, %d\n", err); goto err_cleanup_tx; @@ -3918,28 +4272,13 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) mlx5e_create_q_counter(priv); - mlx5e_init_l2_addr(priv); - - /* MTU range: 68 - hw-specific max */ - netdev->min_mtu = ETH_MIN_MTU; - mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1); - netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu); - - mlx5e_set_dev_port_mtu(netdev); - if (profile->enable) profile->enable(priv); - rtnl_lock(); - if (netif_running(netdev)) - mlx5e_open(netdev); - netif_device_attach(netdev); - rtnl_unlock(); - return 0; err_close_drop_rq: - mlx5e_close_drop_rq(priv); + mlx5e_close_drop_rq(&priv->drop_rq); err_cleanup_tx: profile->cleanup_tx(priv); @@ -3948,66 +4287,34 @@ out: return err; } -static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - u8 mac[ETH_ALEN]; - - if (!MLX5_CAP_GEN(mdev, vport_group_manager)) - return; - - mlx5_query_nic_vport_mac_address(mdev, 0, mac); - - for (vport = 1; vport < total_vfs; vport++) { - struct mlx5_eswitch_rep rep; - - rep.load = mlx5e_vport_rep_load; - rep.unload = mlx5e_vport_rep_unload; - rep.vport = vport; - ether_addr_copy(rep.hw_id, mac); - mlx5_eswitch_register_vport_rep(esw, vport, &rep); - } -} - -static void mlx5e_unregister_vport_rep(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - int total_vfs = MLX5_TOTAL_VPORTS(mdev); - int vport; - - if (!MLX5_CAP_GEN(mdev, vport_group_manager)) - return; - - for (vport = 1; vport < total_vfs; vport++) - mlx5_eswitch_unregister_vport_rep(esw, vport); -} - -void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +void mlx5e_detach_netdev(struct mlx5e_priv *priv) { - struct mlx5e_priv *priv = netdev_priv(netdev); const struct mlx5e_profile *profile = priv->profile; set_bit(MLX5E_STATE_DESTROYING, &priv->state); - rtnl_lock(); - if (netif_running(netdev)) - mlx5e_close(netdev); - netif_device_detach(netdev); - rtnl_unlock(); - if (profile->disable) profile->disable(priv); flush_workqueue(priv->wq); mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); - mlx5e_close_drop_rq(priv); + mlx5e_close_drop_rq(&priv->drop_rq); profile->cleanup_tx(priv); cancel_delayed_work_sync(&priv->update_stats_work); } +void mlx5e_destroy_netdev(struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; + struct net_device *netdev = priv->netdev; + + destroy_workqueue(priv->wq); + if (profile->cleanup) + profile->cleanup(priv); + free_netdev(netdev); +} + /* mlx5e_attach and mlx5e_detach scope should be only creating/destroying * hardware contexts and to connect it to the current netdev. */ @@ -4024,13 +4331,12 @@ static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) if (err) return err; - err = mlx5e_attach_netdev(mdev, netdev); + err = mlx5e_attach_netdev(priv); if (err) { mlx5e_destroy_mdev_resources(mdev); return err; } - mlx5e_register_vport_rep(mdev); return 0; } @@ -4042,8 +4348,7 @@ static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) if (!netif_device_present(netdev)) return; - mlx5e_unregister_vport_rep(mdev); - mlx5e_detach_netdev(mdev, netdev); + mlx5e_detach_netdev(priv); mlx5e_destroy_mdev_resources(mdev); } @@ -4051,7 +4356,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) { struct mlx5_eswitch *esw = mdev->priv.eswitch; int total_vfs = MLX5_TOTAL_VPORTS(mdev); - void *ppriv = NULL; + struct mlx5e_rep_priv *rpriv = NULL; void *priv; int vport; int err; @@ -4061,10 +4366,17 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (err) return NULL; - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - ppriv = &esw->offloads.vport_reps[0]; + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) { + mlx5_core_warn(mdev, + "Not creating net device, Failed to alloc rep priv data\n"); + return NULL; + } + rpriv->rep = &esw->offloads.vport_reps[0]; + } - netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, rpriv); if (!netdev) { mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); goto err_unregister_reps; @@ -4090,33 +4402,25 @@ err_detach: mlx5e_detach(mdev, priv); err_destroy_netdev: - mlx5e_destroy_netdev(mdev, priv); + mlx5e_destroy_netdev(priv); err_unregister_reps: for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); + kfree(rpriv); return NULL; } -void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) -{ - const struct mlx5e_profile *profile = priv->profile; - struct net_device *netdev = priv->netdev; - - destroy_workqueue(priv->wq); - if (profile->cleanup) - profile->cleanup(priv); - free_netdev(netdev); -} - static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { struct mlx5e_priv *priv = vpriv; + void *ppriv = priv->ppriv; unregister_netdev(priv->netdev); mlx5e_detach(mdev, vpriv); - mlx5e_destroy_netdev(mdev, priv); + mlx5e_destroy_netdev(priv); + kfree(ppriv); } static void *mlx5e_get_netdev(void *vpriv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index f621373bd7a5..79462c0368a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -34,10 +34,14 @@ #include <linux/mlx5/fs.h> #include <net/switchdev.h> #include <net/pkt_cls.h> +#include <net/netevent.h> +#include <net/arp.h> #include "eswitch.h" #include "en.h" +#include "en_rep.h" #include "en_tc.h" +#include "fs_core.h" static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; @@ -75,7 +79,8 @@ static void mlx5e_rep_get_strings(struct net_device *dev, static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct rtnl_link_stats64 *vport_stats; struct ifla_vf_stats vf_stats; int err; @@ -102,14 +107,16 @@ static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) int i, j; memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->params.num_channels; i++) { - rq_stats = &priv->channel[i]->rq.stats; + for (i = 0; i < priv->channels.num; i++) { + struct mlx5e_channel *c = priv->channels.c[i]; + + rq_stats = &c->rq.stats; s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; - for (j = 0; j < priv->params.num_tc; j++) { - sq_stats = &priv->channel[i]->sq[j].stats; + for (j = 0; j < priv->channels.params.num_tc; j++) { + sq_stats = &c->sq[j].stats; s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; @@ -163,7 +170,8 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = { int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw->mode == SRIOV_NONE) @@ -182,66 +190,426 @@ int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) } int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) - { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5e_channel *c; - int n, tc, err, num_sqs = 0; + int n, tc, num_sqs = 0; + int err = -ENOMEM; u16 *sqs; - sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL); + sqs = kcalloc(priv->channels.num * priv->channels.params.num_tc, sizeof(u16), GFP_KERNEL); if (!sqs) - return -ENOMEM; + goto out; - for (n = 0; n < priv->params.num_channels; n++) { - c = priv->channel[n]; + for (n = 0; n < priv->channels.num; n++) { + c = priv->channels.c[n]; for (tc = 0; tc < c->num_tc; tc++) sqs[num_sqs++] = c->sq[tc].sqn; } err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs); - kfree(sqs); + +out: + if (err) + netdev_warn(priv->netdev, "Failed to add SQs FWD rules %d\n", err); return err; } -int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) { - struct net_device *netdev = rep->netdev; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; + + mlx5_eswitch_sqs2vport_stop(esw, rep); +} + +static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv) +{ +#if IS_ENABLED(CONFIG_IPV6) + unsigned long ipv6_interval = NEIGH_VAR(&ipv6_stub->nd_tbl->parms, + DELAY_PROBE_TIME); +#else + unsigned long ipv6_interval = ~0UL; +#endif + unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, + DELAY_PROBE_TIME); + struct net_device *netdev = rpriv->rep->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - return mlx5e_add_sqs_fwd_rules(priv); - return 0; + rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval); + mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval); } -void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; - mlx5_eswitch_sqs2vport_stop(esw, rep); + mlx5_fc_queue_stats_work(priv->mdev, + &neigh_update->neigh_stats_work, + neigh_update->min_interval); } -void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +static void mlx5e_rep_neigh_stats_work(struct work_struct *work) { - struct net_device *netdev = rep->netdev; + struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv, + neigh_update.neigh_stats_work.work); + struct net_device *netdev = rpriv->rep->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe; - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_remove_sqs_fwd_rules(priv); + rtnl_lock(); + if (!list_empty(&rpriv->neigh_update.neigh_list)) + mlx5e_rep_queue_neigh_stats_work(priv); - /* clean (and re-init) existing uplink offloaded TC rules */ - mlx5e_tc_cleanup(priv); - mlx5e_tc_init(priv); + list_for_each_entry(nhe, &rpriv->neigh_update.neigh_list, neigh_list) + mlx5e_tc_update_neigh_used_value(nhe); + + rtnl_unlock(); +} + +static void mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe) +{ + refcount_inc(&nhe->refcnt); +} + +static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe) +{ + if (refcount_dec_and_test(&nhe->refcnt)) + kfree(nhe); +} + +static void mlx5e_rep_update_flows(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + bool neigh_connected, + unsigned char ha[ETH_ALEN]) +{ + struct ethhdr *eth = (struct ethhdr *)e->encap_header; + + ASSERT_RTNL(); + + if ((!neigh_connected && (e->flags & MLX5_ENCAP_ENTRY_VALID)) || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_tc_encap_flows_del(priv, e); + + if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { + ether_addr_copy(e->h_dest, ha); + ether_addr_copy(eth->h_dest, ha); + + mlx5e_tc_encap_flows_add(priv, e); + } +} + +static void mlx5e_rep_neigh_update(struct work_struct *work) +{ + struct mlx5e_neigh_hash_entry *nhe = + container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work); + struct neighbour *n = nhe->n; + struct mlx5e_encap_entry *e; + unsigned char ha[ETH_ALEN]; + struct mlx5e_priv *priv; + bool neigh_connected; + bool encap_connected; + u8 nud_state, dead; + + rtnl_lock(); + + /* If these parameters are changed after we release the lock, + * we'll receive another event letting us know about it. + * We use this lock to avoid inconsistency between the neigh validity + * and it's hw address. + */ + read_lock_bh(&n->lock); + memcpy(ha, n->ha, ETH_ALEN); + nud_state = n->nud_state; + dead = n->dead; + read_unlock_bh(&n->lock); + + neigh_connected = (nud_state & NUD_VALID) && !dead; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); + priv = netdev_priv(e->out_dev); + + if (encap_connected != neigh_connected || + !ether_addr_equal(e->h_dest, ha)) + mlx5e_rep_update_flows(priv, e, neigh_connected, ha); + } + mlx5e_rep_neigh_entry_release(nhe); + rtnl_unlock(); + neigh_release(n); +} + +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh); + +static int mlx5e_rep_netevent_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv, + neigh_update.netevent_nb); + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct net_device *netdev = rpriv->rep->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_neigh_hash_entry *nhe = NULL; + struct mlx5e_neigh m_neigh = {}; + struct neigh_parms *p; + struct neighbour *n; + bool found = false; + + switch (event) { + case NETEVENT_NEIGH_UPDATE: + n = ptr; +#if IS_ENABLED(CONFIG_IPV6) + if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl) +#else + if (n->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + m_neigh.dev = n->dev; + m_neigh.family = n->ops->family; + memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + + /* We are in atomic context and can't take RTNL mutex, so use + * spin_lock_bh to lookup the neigh table. bh is used since + * netevent can be called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh); + if (!nhe) { + spin_unlock_bh(&neigh_update->encap_lock); + return NOTIFY_DONE; + } + + /* This assignment is valid as long as the the neigh reference + * is taken + */ + nhe->n = n; + + /* Take a reference to ensure the neighbour and mlx5 encap + * entry won't be destructed until we drop the reference in + * delayed work. + */ + neigh_hold(n); + mlx5e_rep_neigh_entry_hold(nhe); + + if (!queue_work(priv->wq, &nhe->neigh_update_work)) { + mlx5e_rep_neigh_entry_release(nhe); + neigh_release(n); + } + spin_unlock_bh(&neigh_update->encap_lock); + break; + + case NETEVENT_DELAY_PROBE_TIME_UPDATE: + p = ptr; + + /* We check the device is present since we don't care about + * changes in the default table, we only care about changes + * done per device delay prob time parameter. + */ +#if IS_ENABLED(CONFIG_IPV6) + if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl)) +#else + if (!p->dev || p->tbl != &arp_tbl) +#endif + return NOTIFY_DONE; + + /* We are in atomic context and can't take RTNL mutex, + * so use spin_lock_bh to walk the neigh list and look for + * the relevant device. bh is used since netevent can be + * called from a softirq context. + */ + spin_lock_bh(&neigh_update->encap_lock); + list_for_each_entry(nhe, &neigh_update->neigh_list, neigh_list) { + if (p->dev == nhe->m_neigh.dev) { + found = true; + break; + } + } + spin_unlock_bh(&neigh_update->encap_lock); + if (!found) + return NOTIFY_DONE; + + neigh_update->min_interval = min_t(unsigned long, + NEIGH_VAR(p, DELAY_PROBE_TIME), + neigh_update->min_interval); + mlx5_fc_update_sampling_interval(priv->mdev, + neigh_update->min_interval); + break; + } + return NOTIFY_DONE; +} + +static const struct rhashtable_params mlx5e_neigh_ht_params = { + .head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node), + .key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh), + .key_len = sizeof(struct mlx5e_neigh), + .automatic_shrinking = true, +}; + +static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + int err; + + err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params); + if (err) + return err; + + INIT_LIST_HEAD(&neigh_update->neigh_list); + spin_lock_init(&neigh_update->encap_lock); + INIT_DELAYED_WORK(&neigh_update->neigh_stats_work, + mlx5e_rep_neigh_stats_work); + mlx5e_rep_neigh_update_init_interval(rpriv); + + rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event; + err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb); + if (err) + goto out_err; + return 0; + +out_err: + rhashtable_destroy(&neigh_update->neigh_ht); + return err; +} + +static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) +{ + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + struct mlx5e_priv *priv = netdev_priv(rpriv->rep->netdev); + + unregister_netevent_notifier(&neigh_update->netevent_nb); + + flush_workqueue(priv->wq); /* flush neigh update works */ + + cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work); + + rhashtable_destroy(&neigh_update->neigh_ht); +} + +static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + int err; + + err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + if (err) + return err; + + list_add(&nhe->neigh_list, &rpriv->neigh_update.neigh_list); + + return err; +} + +static void mlx5e_rep_neigh_entry_remove(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + spin_lock_bh(&rpriv->neigh_update.encap_lock); + + list_del(&nhe->neigh_list); + + rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht, + &nhe->rhash_node, + mlx5e_neigh_ht_params); + spin_unlock_bh(&rpriv->neigh_update.encap_lock); +} + +/* This function must only be called under RTNL lock or under the + * representor's encap_lock in case RTNL mutex can't be held. + */ +static struct mlx5e_neigh_hash_entry * +mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv, + struct mlx5e_neigh *m_neigh) +{ + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update; + + return rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh, + mlx5e_neigh_ht_params); +} + +static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e, + struct mlx5e_neigh_hash_entry **nhe) +{ + int err; + + *nhe = kzalloc(sizeof(**nhe), GFP_KERNEL); + if (!*nhe) + return -ENOMEM; + + memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh)); + INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update); + INIT_LIST_HEAD(&(*nhe)->encap_list); + refcount_set(&(*nhe)->refcnt, 1); + + err = mlx5e_rep_neigh_entry_insert(priv, *nhe); + if (err) + goto out_free; + return 0; + +out_free: + kfree(*nhe); + return err; +} + +static void mlx5e_rep_neigh_entry_destroy(struct mlx5e_priv *priv, + struct mlx5e_neigh_hash_entry *nhe) +{ + /* The neigh hash entry must be removed from the hash table regardless + * of the reference count value, so it won't be found by the next + * neigh notification call. The neigh hash entry reference count is + * incremented only during creation and neigh notification calls and + * protects from freeing the nhe struct. + */ + mlx5e_rep_neigh_entry_remove(priv, nhe); + mlx5e_rep_neigh_entry_release(nhe); +} + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + int err; + + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + if (!nhe) { + err = mlx5e_rep_neigh_entry_create(priv, e, &nhe); + if (err) + return err; + } + list_add(&e->encap_list, &nhe->encap_list); + return 0; +} + +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_neigh_hash_entry *nhe; + + list_del(&e->encap_list); + nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh); + + if (list_empty(&nhe->encap_list)) + mlx5e_rep_neigh_entry_destroy(priv, nhe); } static int mlx5e_rep_open(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int err; @@ -259,7 +627,8 @@ static int mlx5e_rep_open(struct net_device *dev) static int mlx5e_rep_close(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; (void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN); @@ -271,7 +640,8 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; int ret; ret = snprintf(buf, len, "%d", rep->vport - 1); @@ -314,18 +684,25 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle, bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; - if (rep && rep->vport == FDB_UPLINK_VPORT && esw->mode == SRIOV_OFFLOADS) + if (!MLX5_CAP_GEN(priv->mdev, vport_group_manager)) + return false; + + rep = rpriv->rep; + if (esw->mode == SRIOV_OFFLOADS && + rep && rep->vport == FDB_UPLINK_VPORT) return true; return false; } -bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) +static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; if (rep && rep->vport != FDB_UPLINK_VPORT) return true; @@ -397,42 +774,23 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { .ndo_get_offload_stats = mlx5e_get_offload_stats, }; -static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - const struct mlx5e_profile *profile, - void *ppriv) +static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, + struct mlx5e_params *params) { - struct mlx5e_priv *priv = netdev_priv(netdev); u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->params.log_sq_size = - MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; - priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; - priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; - - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); - - priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); - mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); - - priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - priv->params.num_tc = 1; - - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; - - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; + params->log_sq_size = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + params->log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; - mutex_init(&priv->state_lock); + params->rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(params, cq_period_mode); - INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + params->tx_max_inline = mlx5e_get_max_inline_cap(mdev); + params->num_tc = 1; + params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; } static void mlx5e_build_rep_netdev(struct net_device *netdev) @@ -458,30 +816,39 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile, void *ppriv) { - mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv); + struct mlx5e_priv *priv = netdev_priv(netdev); + + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + + mutex_init(&priv->state_lock); + + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); + + priv->channels.params.num_channels = profile->max_nch(mdev); + mlx5e_build_rep_params(mdev, &priv->channels.params); mlx5e_build_rep_netdev(netdev); } static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5_flow_handle *flow_rule; int err; - int i; + + mlx5e_init_l2_addr(priv); err = mlx5e_create_direct_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + if (err) return err; - } err = mlx5e_create_direct_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + if (err) goto err_destroy_direct_rqts; - } flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, rep->vport, @@ -503,21 +870,19 @@ err_del_flow_rule: err_destroy_direct_tirs: mlx5e_destroy_direct_tirs(priv); err_destroy_direct_rqts: - for (i = 0; i < priv->params.num_channels; i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); return err; } static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) { - struct mlx5_eswitch_rep *rep = priv->ppriv; - int i; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; mlx5e_tc_cleanup(priv); mlx5_del_flow_rules(rep->vport_rx_rule); mlx5e_destroy_direct_tirs(priv); - for (i = 0; i < priv->params.num_channels; i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_direct_rqts(priv); } static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) @@ -546,56 +911,181 @@ static struct mlx5e_profile mlx5e_rep_profile = { .cleanup_tx = mlx5e_cleanup_nic_tx, .update_stats = mlx5e_rep_update_stats, .max_nch = mlx5e_get_rep_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5e_handle_rx_cqe_rep, + .rx_handlers.handle_rx_cqe_mpwqe = NULL /* Not supported */, .max_tc = 1, }; -int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +/* e-Switch vport representors */ + +static int +mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = netdev_priv(rep->netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + int err; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { + err = mlx5e_add_sqs_fwd_rules(priv); + if (err) + return err; + } + + err = mlx5e_rep_neigh_init(rpriv); + if (err) + goto err_remove_sqs; + + return 0; + +err_remove_sqs: + mlx5e_remove_sqs_fwd_rules(priv); + return err; +} + +static void +mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { + struct mlx5e_priv *priv = netdev_priv(rep->netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_remove_sqs_fwd_rules(priv); + + /* clean (and re-init) existing uplink offloaded TC rules */ + mlx5e_tc_cleanup(priv); + mlx5e_tc_init(priv); + + mlx5e_rep_neigh_cleanup(rpriv); +} + +static int +mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; struct net_device *netdev; int err; - netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL); + if (!rpriv) + return -ENOMEM; + + netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rpriv); if (!netdev) { pr_warn("Failed to create representor netdev for vport %d\n", rep->vport); + kfree(rpriv); return -EINVAL; } rep->netdev = netdev; + rpriv->rep = rep; - err = mlx5e_attach_netdev(esw->dev, netdev); + err = mlx5e_attach_netdev(netdev_priv(netdev)); if (err) { pr_warn("Failed to attach representor netdev for vport %d\n", rep->vport); goto err_destroy_netdev; } + err = mlx5e_rep_neigh_init(rpriv); + if (err) { + pr_warn("Failed to initialized neighbours handling for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + err = register_netdev(netdev); if (err) { pr_warn("Failed to register representor netdev for vport %d\n", rep->vport); - goto err_detach_netdev; + goto err_neigh_cleanup; } return 0; +err_neigh_cleanup: + mlx5e_rep_neigh_cleanup(rpriv); + err_detach_netdev: - mlx5e_detach_netdev(esw->dev, netdev); + mlx5e_detach_netdev(netdev_priv(netdev)); err_destroy_netdev: - mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev)); - + mlx5e_destroy_netdev(netdev_priv(netdev)); + kfree(rpriv); return err; } -void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) +static void +mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct net_device *netdev = rep->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_rep_priv *rpriv = priv->ppriv; + void *ppriv = priv->ppriv; + + unregister_netdev(rep->netdev); + + mlx5e_rep_neigh_cleanup(rpriv); + mlx5e_detach_netdev(priv); + mlx5e_destroy_netdev(priv); + kfree(ppriv); /* mlx5e_rep_priv */ +} + +static void mlx5e_rep_register_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + u8 mac[ETH_ALEN]; + + mlx5_query_nic_vport_mac_address(mdev, 0, mac); + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep rep; + + rep.load = mlx5e_vport_rep_load; + rep.unload = mlx5e_vport_rep_unload; + rep.vport = vport; + ether_addr_copy(rep.hw_id, mac); + mlx5_eswitch_register_vport_rep(esw, vport, &rep); + } +} + +static void mlx5e_rep_unregister_vf_vports(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); +} + +void mlx5e_register_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; + + mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); + rep.load = mlx5e_nic_rep_load; + rep.unload = mlx5e_nic_rep_unload; + rep.vport = FDB_UPLINK_VPORT; + rep.netdev = priv->netdev; + mlx5_eswitch_register_vport_rep(esw, 0, &rep); /* UPLINK PF vport*/ + + mlx5e_rep_register_vf_vports(priv); /* VFs vports */ +} + +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; - unregister_netdev(netdev); - mlx5e_detach_netdev(esw->dev, netdev); - mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev)); + mlx5e_rep_unregister_vf_vports(priv); /* VFs vports */ + mlx5_eswitch_unregister_vport_rep(esw, 0); /* UPLINK PF*/ } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h new file mode 100644 index 000000000000..a0a1a7a1d6c0 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_REP_H__ +#define __MLX5E_REP_H__ + +#include <net/ip_tunnels.h> +#include <linux/rhashtable.h> +#include "eswitch.h" +#include "en.h" + +struct mlx5e_neigh_update_table { + struct rhashtable neigh_ht; + /* Save the neigh hash entries in a list in addition to the hash table + * (neigh_ht). In order to iterate easily over the neigh entries. + * Used for stats query. + */ + struct list_head neigh_list; + /* protect lookup/remove operations */ + spinlock_t encap_lock; + struct notifier_block netevent_nb; + struct delayed_work neigh_stats_work; + unsigned long min_interval; /* jiffies */ +}; + +struct mlx5e_rep_priv { + struct mlx5_eswitch_rep *rep; + struct mlx5e_neigh_update_table neigh_update; +}; + +struct mlx5e_neigh { + struct net_device *dev; + union { + __be32 v4; + struct in6_addr v6; + } dst_ip; + int family; +}; + +struct mlx5e_neigh_hash_entry { + struct rhash_head rhash_node; + struct mlx5e_neigh m_neigh; + + /* Save the neigh hash entry in a list on the representor in + * addition to the hash table. In order to iterate easily over the + * neighbour entries. Used for stats query. + */ + struct list_head neigh_list; + + /* encap list sharing the same neigh */ + struct list_head encap_list; + + /* valid only when the neigh reference is taken during + * neigh_update_work workqueue callback. + */ + struct neighbour *n; + struct work_struct neigh_update_work; + + /* neigh hash entry can be deleted only when the refcount is zero. + * refcount is needed to avoid neigh hash entry removal by TC, while + * it's used by the neigh notification call. + */ + refcount_t refcnt; + + /* Save the last reported time offloaded trafic pass over one of the + * neigh hash entry flows. Use it to periodically update the neigh + * 'used' value and avoid neigh deleting by the kernel. + */ + unsigned long reported_lastuse; +}; + +enum { + /* set when the encap entry is successfully offloaded into HW */ + MLX5_ENCAP_ENTRY_VALID = BIT(0), +}; + +struct mlx5e_encap_entry { + /* neigh hash entry list of encaps sharing the same neigh */ + struct list_head encap_list; + struct mlx5e_neigh m_neigh; + /* a node of the eswitch encap hash table which keeping all the encap + * entries + */ + struct hlist_node encap_hlist; + struct list_head flows; + u32 encap_id; + struct ip_tunnel_info tun_info; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + + struct net_device *out_dev; + int tunnel_type; + u8 flags; + char *encap_header; + int encap_size; +}; + +void mlx5e_register_vport_reps(struct mlx5e_priv *priv); +void mlx5e_unregister_vport_reps(struct mlx5e_priv *priv); +bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); + +int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, void *sp); +bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id); + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv); + +#endif /* __MLX5E_REP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index bafcb349a50c..7b1566f0ae58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -39,6 +39,8 @@ #include "en.h" #include "en_tc.h" #include "eswitch.h" +#include "en_rep.h" +#include "ipoib.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -156,28 +158,6 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; } -void mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool val) -{ - bool was_opened; - - if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) - return; - - if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val) - return; - - was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - if (was_opened) - mlx5e_close_locked(priv->netdev); - - MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, val); - mlx5e_set_rq_type_params(priv, priv->params.rq_wq_type); - - if (was_opened) - mlx5e_open_locked(priv->netdev); - -} - #define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, @@ -331,7 +311,7 @@ mlx5e_copy_skb_header_mpwqe(struct device *pdev, static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - struct mlx5e_sq *sq = &rq->channel->icosq; + struct mlx5e_icosq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB); @@ -341,7 +321,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; sq->db.ico_wqe[pi].num_wqebbs = 1; - mlx5e_send_nop(sq, false); + mlx5e_post_nop(wq, sq->sqn, &sq->pc); } wqe = mlx5_wq_cyc_get_wqe(wq, pi); @@ -353,7 +333,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); + mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); } static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, @@ -637,37 +617,36 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); } -static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) { struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_tx_wqe *wqe; - u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ + u16 pi = (sq->pc - 1) & wq->sz_m1; /* last pi */ wqe = mlx5_wq_cyc_get_wqe(wq, pi); - wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &wqe->ctrl); } static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, const struct xdp_buff *xdp) { - struct mlx5e_sq *sq = &rq->channel->xdp_sq; + struct mlx5e_xdpsq *sq = &rq->xdpsq; struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = sq->pc & wq->sz_m1; + u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; struct mlx5_wqe_data_seg *dseg; - u8 ds_cnt = MLX5E_XDP_TX_DS_COUNT; ptrdiff_t data_offset = xdp->data - xdp->data_hard_start; dma_addr_t dma_addr = di->addr + data_offset; unsigned int dma_len = xdp->data_end - xdp->data; + prefetchw(wqe); + if (unlikely(dma_len < MLX5E_XDP_MIN_INLINE || MLX5E_SW2HW_MTU(rq->netdev->mtu) < dma_len)) { rq->stats.xdp_drop++; @@ -675,48 +654,42 @@ static inline bool mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, return false; } - if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { - if (sq->db.xdp.doorbell) { + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1))) { + if (sq->db.doorbell) { /* SQ is full, ring doorbell */ mlx5e_xmit_xdp_doorbell(sq); - sq->db.xdp.doorbell = false; + sq->db.doorbell = false; } rq->stats.xdp_tx_full++; mlx5e_page_release(rq, di, true); return false; } - dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, - PCI_DMA_TODEVICE); + dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, PCI_DMA_TODEVICE); - memset(wqe, 0, sizeof(*wqe)); + cseg->fm_ce_se = 0; dseg = (struct mlx5_wqe_data_seg *)eseg + 1; + /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { memcpy(eseg->inline_hdr.start, xdp->data, MLX5E_XDP_MIN_INLINE); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; - - ds_cnt += MLX5E_XDP_IHS_DS_COUNT; dseg++; } /* write the dma part */ dseg->addr = cpu_to_be64(dma_addr); dseg->byte_count = cpu_to_be32(dma_len); - dseg->lkey = sq->mkey_be; cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->db.xdp.di[pi] = *di; - wi->opcode = MLX5_OPCODE_SEND; - wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; - sq->pc += MLX5E_XDP_TX_WQEBBS; + sq->db.di[pi] = *di; + sq->pc++; - sq->db.xdp.doorbell = true; + sq->db.doorbell = true; rq->stats.xdp_tx++; return true; } @@ -837,7 +810,8 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { struct net_device *netdev = rq->netdev; struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep; struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; __be16 wqe_counter_be; @@ -932,7 +906,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto mpwrq_cqe_out; } - prefetch(skb->data); + prefetchw(skb->data); cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); @@ -950,7 +924,7 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; + struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; int work_done = 0; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) @@ -977,9 +951,9 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } - if (xdp_sq->db.xdp.doorbell) { - mlx5e_xmit_xdp_doorbell(xdp_sq); - xdp_sq->db.xdp.doorbell = false; + if (xdpsq->db.doorbell) { + mlx5e_xmit_xdp_doorbell(xdpsq); + xdpsq->db.doorbell = false; } mlx5_cqwq_update_db_record(&cq->wq); @@ -989,3 +963,152 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) return work_done; } + +bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_xdpsq *sq; + struct mlx5e_rq *rq; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_xdpsq, cq); + + if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + return false; + + rq = container_of(sq, struct mlx5e_rq, xdpsq); + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_dma_info *di; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + di = &sq->db.di[ci]; + + sqcc++; + /* Recycle RX page */ + mlx5e_page_release(rq, di, true); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + +void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq) +{ + struct mlx5e_rq *rq = container_of(sq, struct mlx5e_rq, xdpsq); + struct mlx5e_dma_info *di; + u16 ci; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + di = &sq->db.di[ci]; + sq->cc++; + + mlx5e_page_release(rq, di, false); + } +} + +#ifdef CONFIG_MLX5_CORE_IPOIB + +#define MLX5_IB_GRH_DGID_OFFSET 24 +#define MLX5_IB_GRH_BYTES 40 +#define MLX5_IPOIB_ENCAP_LEN 4 +#define MLX5_GID_SIZE 16 + +static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + struct net_device *netdev = rq->netdev; + u8 *dgid; + u8 g; + + g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3; + dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET; + if ((!g) || dgid[0] != 0xff) + skb->pkt_type = PACKET_HOST; + else if (memcmp(dgid, netdev->broadcast + 4, MLX5_GID_SIZE) == 0) + skb->pkt_type = PACKET_BROADCAST; + else + skb->pkt_type = PACKET_MULTICAST; + + /* TODO: IB/ipoib: Allow mcast packets from other VFs + * 68996a6e760e5c74654723eeb57bf65628ae87f4 + */ + + skb_pull(skb, MLX5_IB_GRH_BYTES); + + skb->protocol = *((__be16 *)(skb->data)); + + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + + skb_record_rx_queue(skb, rq->ix); + + if (likely(netdev->features & NETIF_F_RXHASH)) + mlx5e_skb_set_hash(cqe, skb); + + skb_reset_mac_header(skb); + skb_pull(skb, MLX5_IPOIB_ENCAP_LEN); + + skb->dev = netdev; + + rq->stats.csum_complete++; + rq->stats.packets++; + rq->stats.bytes += cqe_bcnt; +} + +void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + __be16 wqe_counter_be; + struct sk_buff *skb; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + + mlx5i_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +#endif /* CONFIG_MLX5_CORE_IPOIB */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c index cbfac06b7ffd..02dd3a95ed8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -293,7 +293,7 @@ void mlx5e_rx_am_work(struct work_struct *work) struct mlx5e_rq *rq = container_of(am, struct mlx5e_rq, am); struct mlx5e_cq_moder cur_profile = profile[am->mode][am->profile_ix]; - mlx5_core_modify_cq_moderation(rq->priv->mdev, &rq->cq.mcq, + mlx5_core_modify_cq_moderation(rq->mdev, &rq->cq.mcq, cur_profile.usec, cur_profile.pkts); am->state = MLX5E_AM_START_MEASURE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 5621dcfda4f1..5225f2226a67 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -236,12 +236,9 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv, { int err = 0; - err = mlx5e_refresh_tirs_self_loopback(priv->mdev, true); - if (err) { - netdev_err(priv->netdev, - "\tFailed to enable UC loopback err(%d)\n", err); + err = mlx5e_refresh_tirs(priv, true); + if (err) return err; - } lbtp->loopback_ok = false; init_completion(&lbtp->comp); @@ -258,7 +255,7 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, struct mlx5e_lbt_priv *lbtp) { dev_remove_pack(&lbtp->pt); - mlx5e_refresh_tirs_self_loopback(priv->mdev, false); + mlx5e_refresh_tirs(priv, false); } #define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 5436866798f4..11c27e4fadf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -42,14 +42,25 @@ #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> +#include <net/tc_act/tc_pedit.h> #include <net/vxlan.h> +#include <net/arp.h> #include "en.h" +#include "en_rep.h" #include "en_tc.h" #include "eswitch.h" #include "vxlan.h" +struct mlx5_nic_flow_attr { + u32 action; + u32 flow_tag; + u32 mod_hdr_id; +}; + enum { MLX5E_TC_FLOW_ESWITCH = BIT(0), + MLX5E_TC_FLOW_NIC = BIT(1), + MLX5E_TC_FLOW_OFFLOADED = BIT(2), }; struct mlx5e_tc_flow { @@ -58,7 +69,16 @@ struct mlx5e_tc_flow { u8 flags; struct mlx5_flow_handle *rule; struct list_head encap; /* flows sharing the same encap */ - struct mlx5_esw_flow_attr *attr; + union { + struct mlx5_esw_flow_attr esw_attr[0]; + struct mlx5_nic_flow_attr nic_attr[0]; + }; +}; + +struct mlx5e_tc_flow_parse_attr { + struct mlx5_flow_spec spec; + int num_mod_hdr_actions; + void *mod_hdr_actions; }; enum { @@ -71,24 +91,26 @@ enum { static struct mlx5_flow_handle * mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - u32 action, u32 flow_tag) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) { + struct mlx5_nic_flow_attr *attr = flow->nic_attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_flow_destination dest = { 0 }; + struct mlx5_flow_destination dest = {}; struct mlx5_flow_act flow_act = { - .action = action, - .flow_tag = flow_tag, + .action = attr->action, + .flow_tag = attr->flow_tag, .encap_id = 0, }; struct mlx5_fc *counter = NULL; struct mlx5_flow_handle *rule; bool table_created = false; + int err; - if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = priv->fs.vlan.ft.t; - } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); if (IS_ERR(counter)) return ERR_CAST(counter); @@ -97,6 +119,19 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, dest.counter = counter; } + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5_modify_header_alloc(dev, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr->num_mod_hdr_actions, + parse_attr->mod_hdr_actions, + &attr->mod_hdr_id); + flow_act.modify_id = attr->mod_hdr_id; + kfree(parse_attr->mod_hdr_actions); + if (err) { + rule = ERR_PTR(err); + goto err_create_mod_hdr_id; + } + } + if (IS_ERR_OR_NULL(priv->fs.tc.t)) { priv->fs.tc.t = mlx5_create_auto_grouped_flow_table(priv->fs.ns, @@ -114,8 +149,9 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, table_created = true; } - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1); + parse_attr->spec.match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + rule = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, + &flow_act, &dest, 1); if (IS_ERR(rule)) goto err_add_rule; @@ -128,6 +164,10 @@ err_add_rule: priv->fs.tc.t = NULL; } err_create_ft: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + attr->mod_hdr_id); +err_create_mod_hdr_id: mlx5_fc_destroy(dev, counter); return rule; @@ -138,47 +178,195 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, { struct mlx5_fc *counter = NULL; - if (!IS_ERR(flow->rule)) { - counter = mlx5_flow_rule_counter(flow->rule); - mlx5_del_flow_rules(flow->rule); - mlx5_fc_destroy(priv->mdev, counter); - } + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } + + if (flow->nic_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + flow->nic_attr->mod_hdr_id); } +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + static struct mlx5_flow_handle * mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, - struct mlx5_flow_spec *spec, - struct mlx5_esw_flow_attr *attr) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5_flow_handle *rule; int err; err = mlx5_eswitch_add_vlan_action(esw, attr); - if (err) - return ERR_PTR(err); + if (err) { + rule = ERR_PTR(err); + goto err_add_vlan; + } - return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); -} + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5_modify_header_alloc(priv->mdev, MLX5_FLOW_NAMESPACE_FDB, + parse_attr->num_mod_hdr_actions, + parse_attr->mod_hdr_actions, + &attr->mod_hdr_id); + kfree(parse_attr->mod_hdr_actions); + if (err) { + rule = ERR_PTR(err); + goto err_mod_hdr; + } + } -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow); + rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); + if (IS_ERR(rule)) + goto err_add_rule; + + return rule; + +err_add_rule: + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + attr->mod_hdr_id); +err_mod_hdr: + mlx5_eswitch_del_vlan_action(esw, attr); +err_add_vlan: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); + return rule; +} static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; - mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->attr); + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); + } - mlx5_eswitch_del_vlan_action(esw, flow->attr); + mlx5_eswitch_del_vlan_action(esw, flow->esw_attr); - if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { mlx5e_detach_encap(priv, flow); + kvfree(flow->esw_attr->parse_attr); + } + + if (flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + mlx5_modify_header_dealloc(priv->mdev, + attr->mod_hdr_id); +} + +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_tc_flow *flow; + int err; + + err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, + e->encap_size, e->encap_header, + &e->encap_id); + if (err) { + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %d\n", + err); + return; + } + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, &e->flows, encap) { + flow->esw_attr->encap_id = e->encap_id; + flow->rule = mlx5e_tc_add_fdb_flow(priv, + flow->esw_attr->parse_attr, + flow); + if (IS_ERR(flow->rule)) { + err = PTR_ERR(flow->rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", + err); + continue; + } + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + } +} + +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) +{ + struct mlx5e_tc_flow *flow; + struct mlx5_fc *counter; + + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_del_flow_rules(flow->rule); + mlx5_fc_destroy(priv->mdev, counter); + } + } + + if (e->flags & MLX5_ENCAP_ENTRY_VALID) { + e->flags &= ~MLX5_ENCAP_ENTRY_VALID; + mlx5_encap_dealloc(priv->mdev, e->encap_id); + } +} + +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) +{ + struct mlx5e_neigh *m_neigh = &nhe->m_neigh; + u64 bytes, packets, lastuse = 0; + struct mlx5e_tc_flow *flow; + struct mlx5e_encap_entry *e; + struct mlx5_fc *counter; + struct neigh_table *tbl; + bool neigh_used = false; + struct neighbour *n; + + if (m_neigh->family == AF_INET) + tbl = &arp_tbl; +#if IS_ENABLED(CONFIG_IPV6) + else if (m_neigh->family == AF_INET6) + tbl = ipv6_stub->nd_tbl; +#endif + else + return; + + list_for_each_entry(e, &nhe->encap_list, encap_list) { + if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) + continue; + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + counter = mlx5_flow_rule_counter(flow->rule); + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + neigh_used = true; + break; + } + } + } + } + + if (neigh_used) { + nhe->reported_lastuse = jiffies; + + /* find the relevant neigh according to the cached device and + * dst ip pair + */ + n = neigh_lookup(tbl, &m_neigh->dst_ip, m_neigh->dev); + if (!n) { + WARN(1, "The neighbour already freed\n"); + return; + } + + neigh_event_send(n, NULL); + neigh_release(n); + } } static void mlx5e_detach_encap(struct mlx5e_priv *priv, @@ -188,22 +376,20 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, list_del(&flow->encap); if (list_empty(next)) { - struct mlx5_encap_entry *e; + struct mlx5e_encap_entry *e; + + e = list_entry(next, struct mlx5e_encap_entry, flows); + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); - e = list_entry(next, struct mlx5_encap_entry, flows); - if (e->n) { + if (e->flags & MLX5_ENCAP_ENTRY_VALID) mlx5_encap_dealloc(priv->mdev, e->encap_id); - neigh_release(e->n); - } + hlist_del_rcu(&e->encap_hlist); + kfree(e->encap_header); kfree(e); } } -/* we get here also when setting rule to the FW failed, etc. It means that the - * flow rule itself might not exist, but some offloading related to the actions - * should be cleaned. - */ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -631,16 +817,18 @@ static int parse_cls_flower(struct mlx5e_priv *priv, { struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep; u8 min_inline; int err; err = __parse_cls_flower(priv, spec, f, &min_inline); - if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH) && - rep->vport != FDB_UPLINK_VPORT) { - if (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && - esw->offloads.inline_mode < min_inline) { + if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + rep = rpriv->rep; + if (rep->vport != FDB_UPLINK_VPORT && + (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && + esw->offloads.inline_mode < min_inline)) { netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", min_inline, esw->offloads.inline_mode); @@ -651,29 +839,313 @@ static int parse_cls_flower(struct mlx5e_priv *priv, return err; } +struct pedit_headers { + struct ethhdr eth; + struct iphdr ip4; + struct ipv6hdr ip6; + struct tcphdr tcp; + struct udphdr udp; +}; + +static int pedit_header_offsets[] = { + [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), +}; + +#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype]) + +static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, + struct pedit_headers *masks, + struct pedit_headers *vals) +{ + u32 *curr_pmask, *curr_pval; + + if (hdr_type >= __PEDIT_HDR_TYPE_MAX) + goto out_err; + + curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset); + + if (*curr_pmask & mask) /* disallow acting twice on the same location */ + goto out_err; + + *curr_pmask |= mask; + *curr_pval |= (val & mask); + + return 0; + +out_err: + return -EOPNOTSUPP; +} + +struct mlx5_fields { + u8 field; + u8 size; + u32 offset; +}; + +static struct mlx5_fields fields[] = { + {MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_dest[0])}, + {MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_dest[4])}, + {MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16, 4, offsetof(struct pedit_headers, eth.h_source[0])}, + {MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0, 2, offsetof(struct pedit_headers, eth.h_source[4])}, + {MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE, 2, offsetof(struct pedit_headers, eth.h_proto)}, + + {MLX5_ACTION_IN_FIELD_OUT_IP_DSCP, 1, offsetof(struct pedit_headers, ip4.tos)}, + {MLX5_ACTION_IN_FIELD_OUT_IP_TTL, 1, offsetof(struct pedit_headers, ip4.ttl)}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV4, 4, offsetof(struct pedit_headers, ip4.saddr)}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV4, 4, offsetof(struct pedit_headers, ip4.daddr)}, + + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[0])}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[1])}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[2])}, + {MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.saddr.s6_addr32[3])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[0])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[1])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[2])}, + {MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0, 4, offsetof(struct pedit_headers, ip6.daddr.s6_addr32[3])}, + + {MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT, 2, offsetof(struct pedit_headers, tcp.source)}, + {MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT, 2, offsetof(struct pedit_headers, tcp.dest)}, + {MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS, 1, offsetof(struct pedit_headers, tcp.ack_seq) + 5}, + + {MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT, 2, offsetof(struct pedit_headers, udp.source)}, + {MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT, 2, offsetof(struct pedit_headers, udp.dest)}, +}; + +/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, it says how many HW actions were + * actually parsed. + */ +static int offload_pedit_fields(struct pedit_headers *masks, + struct pedit_headers *vals, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals; + int i, action_size, nactions, max_actions, first, last; + void *s_masks_p, *a_masks_p, *vals_p; + u32 s_mask, a_mask, val; + struct mlx5_fields *f; + u8 cmd, field_bsize; + unsigned long mask; + void *action; + + set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET]; + add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD]; + set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET]; + add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; + + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + action = parse_attr->mod_hdr_actions; + max_actions = parse_attr->num_mod_hdr_actions; + nactions = 0; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; + /* avoid seeing bits set from previous iterations */ + s_mask = a_mask = mask = val = 0; + + s_masks_p = (void *)set_masks + f->offset; + a_masks_p = (void *)add_masks + f->offset; + + memcpy(&s_mask, s_masks_p, f->size); + memcpy(&a_mask, a_masks_p, f->size); + + if (!s_mask && !a_mask) /* nothing to offload here */ + continue; + + if (s_mask && a_mask) { + printk(KERN_WARNING "mlx5: can't set and add to the same HW field (%x)\n", f->field); + return -EOPNOTSUPP; + } + + if (nactions == max_actions) { + printk(KERN_WARNING "mlx5: parsed %d pedit actions, can't do more\n", nactions); + return -EOPNOTSUPP; + } + + if (s_mask) { + cmd = MLX5_ACTION_TYPE_SET; + mask = s_mask; + vals_p = (void *)set_vals + f->offset; + /* clear to denote we consumed this field */ + memset(s_masks_p, 0, f->size); + } else { + cmd = MLX5_ACTION_TYPE_ADD; + mask = a_mask; + vals_p = (void *)add_vals + f->offset; + /* clear to denote we consumed this field */ + memset(a_masks_p, 0, f->size); + } + + memcpy(&val, vals_p, f->size); + + field_bsize = f->size * BITS_PER_BYTE; + first = find_first_bit(&mask, field_bsize); + last = find_last_bit(&mask, field_bsize); + if (first > 0 || last != (field_bsize - 1)) { + printk(KERN_WARNING "mlx5: partial rewrite (mask %lx) is currently not offloaded\n", + mask); + return -EOPNOTSUPP; + } + + MLX5_SET(set_action_in, action, action_type, cmd); + MLX5_SET(set_action_in, action, field, f->field); + + if (cmd == MLX5_ACTION_TYPE_SET) { + MLX5_SET(set_action_in, action, offset, 0); + /* length is num of bits to be written, zero means length of 32 */ + MLX5_SET(set_action_in, action, length, field_bsize); + } + + if (field_bsize == 32) + MLX5_SET(set_action_in, action, data, ntohl(val)); + else if (field_bsize == 16) + MLX5_SET(set_action_in, action, data, ntohs(val)); + else if (field_bsize == 8) + MLX5_SET(set_action_in, action, data, val); + + action += action_size; + nactions++; + } + + parse_attr->num_mod_hdr_actions = nactions; + return 0; +} + +static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, + const struct tc_action *a, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + int nkeys, action_size, max_actions; + + nkeys = tcf_pedit_nkeys(a); + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); + + if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, max_modify_header_actions); + else /* namespace is MLX5_FLOW_NAMESPACE_KERNEL - NIC offloading */ + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, max_modify_header_actions); + + /* can get up to crazingly 16 HW actions in 32 bits pedit SW key */ + max_actions = min(max_actions, nkeys * 16); + + parse_attr->mod_hdr_actions = kcalloc(max_actions, action_size, GFP_KERNEL); + if (!parse_attr->mod_hdr_actions) + return -ENOMEM; + + parse_attr->num_mod_hdr_actions = max_actions; + return 0; +} + +static const struct pedit_headers zero_masks = {}; + +static int parse_tc_pedit_action(struct mlx5e_priv *priv, + const struct tc_action *a, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr) +{ + struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks; + int nkeys, i, err = -EOPNOTSUPP; + u32 mask, val, offset; + u8 cmd, htype; + + nkeys = tcf_pedit_nkeys(a); + + memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + + for (i = 0; i < nkeys; i++) { + htype = tcf_pedit_htype(a, i); + cmd = tcf_pedit_cmd(a, i); + err = -EOPNOTSUPP; /* can't be all optimistic */ + + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { + printk(KERN_WARNING "mlx5: legacy pedit isn't offloaded\n"); + goto out_err; + } + + if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) { + printk(KERN_WARNING "mlx5: pedit cmd %d isn't offloaded\n", cmd); + goto out_err; + } + + mask = tcf_pedit_mask(a, i); + val = tcf_pedit_val(a, i); + offset = tcf_pedit_offset(a, i); + + err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]); + if (err) + goto out_err; + } + + err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); + if (err) + goto out_err; + + err = offload_pedit_fields(masks, vals, parse_attr); + if (err < 0) + goto out_dealloc_parsed_actions; + + for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { + cmd_masks = &masks[cmd]; + if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { + printk(KERN_WARNING "mlx5: attempt to offload an unsupported field (cmd %d)\n", + cmd); + print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS, + 16, 1, cmd_masks, sizeof(zero_masks), true); + err = -EOPNOTSUPP; + goto out_dealloc_parsed_actions; + } + } + + return 0; + +out_dealloc_parsed_actions: + kfree(parse_attr->mod_hdr_actions); +out_err: + return err; +} + static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *flow_tag) + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct mlx5e_tc_flow *flow) { + struct mlx5_nic_flow_attr *attr = flow->nic_attr; const struct tc_action *a; LIST_HEAD(actions); + int err; if (tc_no_actions(exts)) return -EINVAL; - *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; - *action = 0; + attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + attr->action = 0; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ - if (*action) + if (attr->action) return -EINVAL; if (is_tcf_gact_shot(a)) { - *action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.flow_counter)) - *action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + continue; + } + + if (is_tcf_pedit(a)) { + err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; continue; } @@ -686,8 +1158,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - *flow_tag = mark; - *action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flow_tag = mark; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; continue; } @@ -853,16 +1325,17 @@ static void gen_vxlan_header_ipv6(struct net_device *out_dev, static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5_encap_entry *e, - struct net_device **out_dev) + struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv4_encap_size = ETH_HLEN + sizeof(struct iphdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; struct neighbour *n = NULL; struct flowi4 fl4 = {}; char *encap_header; int ttl, err; + u8 nud_state; if (max_encap_size < ipv4_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -887,25 +1360,36 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, fl4.daddr = tun_key->u.ipv4.dst; fl4.saddr = tun_key->u.ipv4.src; - err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev, + err = mlx5e_route_lookup_ipv4(priv, mirred_dev, &out_dev, &fl4, &n, &ttl); if (err) goto out; - if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI4 invalid\n", __func__, &fl4.daddr); - err = -EOPNOTSUPP; + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) goto out; - } - e->n = n; - e->out_dev = *out_dev; - - neigh_ha_snapshot(e->h_dest, n, *out_dev); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - gen_vxlan_header_ipv4(*out_dev, encap_header, + gen_vxlan_header_ipv4(out_dev, encap_header, ipv4_encap_size, e->h_dest, ttl, fl4.daddr, fl4.saddr, tun_key->tp_dst, @@ -913,31 +1397,49 @@ static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto destroy_neigh_entry; + } + e->encap_size = ipv4_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + neigh_release(n); + return -EAGAIN; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, ipv4_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); out: - if (err && n) - neigh_release(n); kfree(encap_header); + if (n) + neigh_release(n); return err; } static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, struct net_device *mirred_dev, - struct mlx5_encap_entry *e, - struct net_device **out_dev) - + struct mlx5e_encap_entry *e) { int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size); int ipv6_encap_size = ETH_HLEN + sizeof(struct ipv6hdr) + VXLAN_HLEN; struct ip_tunnel_key *tun_key = &e->tun_info.key; + struct net_device *out_dev; struct neighbour *n = NULL; struct flowi6 fl6 = {}; char *encap_header; int err, ttl = 0; + u8 nud_state; if (max_encap_size < ipv6_encap_size) { mlx5_core_warn(priv->mdev, "encap size %d too big, max supported is %d\n", @@ -963,25 +1465,36 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, fl6.daddr = tun_key->u.ipv6.dst; fl6.saddr = tun_key->u.ipv6.src; - err = mlx5e_route_lookup_ipv6(priv, mirred_dev, out_dev, + err = mlx5e_route_lookup_ipv6(priv, mirred_dev, &out_dev, &fl6, &n, &ttl); if (err) goto out; - if (!(n->nud_state & NUD_VALID)) { - pr_warn("%s: can't offload, neighbour to %pI6 invalid\n", __func__, &fl6.daddr); - err = -EOPNOTSUPP; + /* used by mlx5e_detach_encap to lookup a neigh hash table + * entry in the neigh hash table when a user deletes a rule + */ + e->m_neigh.dev = n->dev; + e->m_neigh.family = n->ops->family; + memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); + e->out_dev = out_dev; + + /* It's importent to add the neigh to the hash table before checking + * the neigh validity state. So if we'll get a notification, in case the + * neigh changes it's validity state, we would find the relevant neigh + * in the hash. + */ + err = mlx5e_rep_encap_entry_attach(netdev_priv(out_dev), e); + if (err) goto out; - } - - e->n = n; - e->out_dev = *out_dev; - neigh_ha_snapshot(e->h_dest, n, *out_dev); + read_lock_bh(&n->lock); + nud_state = n->nud_state; + ether_addr_copy(e->h_dest, n->ha); + read_unlock_bh(&n->lock); switch (e->tunnel_type) { case MLX5_HEADER_TYPE_VXLAN: - gen_vxlan_header_ipv6(*out_dev, encap_header, + gen_vxlan_header_ipv6(out_dev, encap_header, ipv6_encap_size, e->h_dest, ttl, &fl6.daddr, &fl6.saddr, tun_key->tp_dst, @@ -989,31 +1502,51 @@ static int mlx5e_create_encap_header_ipv6(struct mlx5e_priv *priv, break; default: err = -EOPNOTSUPP; - goto out; + goto destroy_neigh_entry; + } + + e->encap_size = ipv6_encap_size; + e->encap_header = encap_header; + + if (!(nud_state & NUD_VALID)) { + neigh_event_send(n, NULL); + neigh_release(n); + return -EAGAIN; } err = mlx5_encap_alloc(priv->mdev, e->tunnel_type, ipv6_encap_size, encap_header, &e->encap_id); + if (err) + goto destroy_neigh_entry; + + e->flags |= MLX5_ENCAP_ENTRY_VALID; + mlx5e_rep_queue_neigh_stats_work(netdev_priv(out_dev)); + neigh_release(n); + return err; + +destroy_neigh_entry: + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); out: - if (err && n) - neigh_release(n); kfree(encap_header); + if (n) + neigh_release(n); return err; } static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct ip_tunnel_info *tun_info, struct net_device *mirred_dev, - struct mlx5_esw_flow_attr *attr) + struct net_device **encap_dev, + struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *up_dev = mlx5_eswitch_get_uplink_netdev(esw); - struct mlx5e_priv *up_priv = netdev_priv(up_dev); unsigned short family = ip_tunnel_info_af(tun_info); + struct mlx5e_priv *up_priv = netdev_priv(up_dev); + struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct ip_tunnel_key *key = &tun_info->key; - struct mlx5_encap_entry *e; - struct net_device *out_dev; - int tunnel_type, err = -EOPNOTSUPP; + struct mlx5e_encap_entry *e; + int tunnel_type, err = 0; uintptr_t hash_key; bool found = false; @@ -1048,10 +1581,8 @@ vxlan_encap_offload_err: } } - if (found) { - attr->encap = e; - return 0; - } + if (found) + goto attach_flow; e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) @@ -1062,16 +1593,21 @@ vxlan_encap_offload_err: INIT_LIST_HEAD(&e->flows); if (family == AF_INET) - err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev); + err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) - err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e, &out_dev); + err = mlx5e_create_encap_header_ipv6(priv, mirred_dev, e); - if (err) + if (err && err != -EAGAIN) goto out_err; - attr->encap = e; hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); +attach_flow: + list_add(&flow->encap, &e->flows); + *encap_dev = e->out_dev; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + attr->encap_id = e->encap_id; + return err; out_err: @@ -1080,20 +1616,22 @@ out_err: } static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, + struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow) { - struct mlx5_esw_flow_attr *attr = flow->attr; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_rep_priv *rpriv = priv->ppriv; struct ip_tunnel_info *info = NULL; const struct tc_action *a; LIST_HEAD(actions); bool encap = false; - int err; + int err = 0; if (tc_no_actions(exts)) return -EINVAL; memset(attr, 0, sizeof(*attr)); - attr->in_rep = priv->ppriv; + attr->in_rep = rpriv->rep; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { @@ -1103,9 +1641,19 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, continue; } + if (is_tcf_pedit(a)) { + err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, + parse_attr); + if (err) + return err; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + continue; + } + if (is_tcf_mirred_egress_redirect(a)) { int ifindex = tcf_mirred_ifindex(a); - struct net_device *out_dev; + struct net_device *out_dev, *encap_dev = NULL; struct mlx5e_priv *out_priv; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -1115,18 +1663,20 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; out_priv = netdev_priv(out_dev); - attr->out_rep = out_priv->ppriv; + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; } else if (encap) { err = mlx5e_attach_encap(priv, info, - out_dev, attr); - if (err) + out_dev, &encap_dev, flow); + if (err && err != -EAGAIN) return err; - list_add(&flow->encap, &attr->encap->flows); attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - out_priv = netdev_priv(attr->encap->out_dev); - attr->out_rep = out_priv->ppriv; + out_priv = netdev_priv(encap_dev); + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; + attr->parse_attr = parse_attr; } else { pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); @@ -1166,28 +1716,30 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } - return 0; + return err; } int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_table *tc = &priv->fs.tc; - int err, attr_size = 0; - u32 flow_tag, action; struct mlx5e_tc_flow *flow; - struct mlx5_flow_spec *spec; - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + int attr_size, err = 0; u8 flow_flags = 0; if (esw && esw->mode == SRIOV_OFFLOADS) { flow_flags = MLX5E_TC_FLOW_ESWITCH; attr_size = sizeof(struct mlx5_esw_flow_attr); + } else { + flow_flags = MLX5E_TC_FLOW_NIC; + attr_size = sizeof(struct mlx5_nic_flow_attr); } flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); - spec = mlx5_vzalloc(sizeof(*spec)); - if (!spec || !flow) { + parse_attr = mlx5_vzalloc(sizeof(*parse_attr)); + if (!parse_attr || !flow) { err = -ENOMEM; goto err_free; } @@ -1195,42 +1747,54 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, flow->cookie = f->cookie; flow->flags = flow_flags; - err = parse_cls_flower(priv, flow, spec, f); + err = parse_cls_flower(priv, flow, &parse_attr->spec, f); if (err < 0) goto err_free; if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); - err = parse_tc_fdb_actions(priv, f->exts, flow); + err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); if (err < 0) - goto err_free; - flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); + goto err_handle_encap_flow; + flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); } else { - err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); if (err < 0) goto err_free; - flow->rule = mlx5e_tc_add_nic_flow(priv, spec, action, flow_tag); + flow->rule = mlx5e_tc_add_nic_flow(priv, parse_attr, flow); } if (IS_ERR(flow->rule)) { err = PTR_ERR(flow->rule); - goto err_del_rule; + goto err_free; } + flow->flags |= MLX5E_TC_FLOW_OFFLOADED; err = rhashtable_insert_fast(&tc->ht, &flow->node, tc->ht_params); if (err) goto err_del_rule; - goto out; + if (flow->flags & MLX5E_TC_FLOW_ESWITCH && + !(flow->esw_attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)) + kvfree(parse_attr); + return err; err_del_rule: mlx5e_tc_del_flow(priv, flow); +err_handle_encap_flow: + if (err == -EAGAIN) { + err = rhashtable_insert_fast(&tc->ht, &flow->node, + tc->ht_params); + if (err) + mlx5e_tc_del_flow(priv, flow); + else + return 0; + } + err_free: + kvfree(parse_attr); kfree(flow); -out: - kvfree(spec); return err; } @@ -1249,7 +1813,6 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, mlx5e_tc_del_flow(priv, flow); - kfree(flow); return 0; @@ -1272,6 +1835,9 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, if (!flow) return -EINVAL; + if (!(flow->flags & MLX5E_TC_FLOW_OFFLOADED)) + return 0; + counter = mlx5_flow_rule_counter(flow->rule); if (!counter) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 34bf903fc886..ecbe30d808ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -46,6 +46,15 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, int mlx5e_stats_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); +struct mlx5e_encap_entry; +void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); +void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e); + +struct mlx5e_neigh_hash_entry; +void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe); + static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { return atomic_read(&priv->fs.tc.ht.nelems); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 57f5e2d7ebd1..ab3bb026ff9e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -33,34 +33,12 @@ #include <linux/tcp.h> #include <linux/if_vlan.h> #include "en.h" +#include "ipoib.h" #define MLX5E_SQ_NOPS_ROOM MLX5_SEND_WQE_MAX_WQEBBS #define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\ MLX5E_SQ_NOPS_ROOM) -void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - - memset(cseg, 0, sizeof(*cseg)); - - cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); - - sq->pc++; - sq->stats.nop++; - - if (notify_hw) { - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); - } -} - static inline void mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) { @@ -76,25 +54,25 @@ static inline void mlx5e_tx_dma_unmap(struct device *pdev, } } -static inline void mlx5e_dma_push(struct mlx5e_sq *sq, +static inline void mlx5e_dma_push(struct mlx5e_txqsq *sq, dma_addr_t addr, u32 size, enum mlx5e_dma_map_type map_type) { u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask; - sq->db.txq.dma_fifo[i].addr = addr; - sq->db.txq.dma_fifo[i].size = size; - sq->db.txq.dma_fifo[i].type = map_type; + sq->db.dma_fifo[i].addr = addr; + sq->db.dma_fifo[i].size = size; + sq->db.dma_fifo[i].type = map_type; sq->dma_fifo_pc++; } -static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) +static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_txqsq *sq, u32 i) { - return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask]; + return &sq->db.dma_fifo[i & sq->dma_fifo_mask]; } -static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) +static void mlx5e_dma_unmap_wqe_err(struct mlx5e_txqsq *sq, u8 num_dma) { int i; @@ -111,6 +89,7 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, { struct mlx5e_priv *priv = netdev_priv(dev); int channel_ix = fallback(dev, skb); + u16 num_channels; int up = 0; if (!netdev_get_num_tc(dev)) @@ -122,11 +101,11 @@ u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, /* channel_ix can be larger than num_channels since * dev->num_real_tx_queues = num_channels * num_tc */ - if (channel_ix >= priv->params.num_channels) - channel_ix = reciprocal_scale(channel_ix, - priv->params.num_channels); + num_channels = priv->channels.params.num_channels; + if (channel_ix >= num_channels) + channel_ix = reciprocal_scale(channel_ix, num_channels); - return priv->channeltc_to_txq_map[channel_ix][up]; + return priv->channel_tc2txq[channel_ix][up]; } static inline int mlx5e_skb_l2_header_offset(struct sk_buff *skb) @@ -175,25 +154,6 @@ static inline unsigned int mlx5e_calc_min_inline(enum mlx5_inline_modes mode, } } -static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq, - struct sk_buff *skb, bool bf) -{ - /* Some NIC TX decisions, e.g loopback, are based on the packet - * headers and occur before the data gather. - * Therefore these headers must be copied into the WQE - */ - if (bf) { - u16 ihs = skb_headlen(skb); - - if (skb_vlan_tag_present(skb)) - ihs += VLAN_HLEN; - - if (ihs <= sq->max_inline) - return skb_headlen(skb); - } - return mlx5e_calc_min_inline(sq->min_inline_mode, skb); -} - static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, unsigned int *skb_len, unsigned int len) @@ -218,31 +178,9 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs, mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy2_sz); } -static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) +static inline void +mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { - struct mlx5_wq_cyc *wq = &sq->wq; - - u16 pi = sq->pc & wq->sz_m1; - struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi]; - - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - struct mlx5_wqe_data_seg *dseg; - - unsigned char *skb_data = skb->data; - unsigned int skb_len = skb->len; - u8 opcode = MLX5_OPCODE_SEND; - dma_addr_t dma_addr = 0; - unsigned int num_bytes; - bool bf = false; - u16 headlen; - u16 ds_cnt; - u16 ihs; - int i; - - memset(wqe, 0, sizeof(*wqe)); - if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; if (skb->encapsulation) { @@ -254,74 +192,51 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } } else sq->stats.csum_none++; +} - if (sq->cc != sq->prev_cc) { - sq->prev_cc = sq->cc; - sq->bf_budget = (sq->cc == sq->pc) ? MLX5E_SQ_BF_BUDGET : 0; - } - - if (skb_is_gso(skb)) { - eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); - opcode = MLX5_OPCODE_LSO; +static inline u16 +mlx5e_txwqe_build_eseg_gso(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_wqe_eth_seg *eseg, unsigned int *num_bytes) +{ + u16 ihs; - if (skb->encapsulation) { - ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); - sq->stats.tso_inner_packets++; - sq->stats.tso_inner_bytes += skb->len - ihs; - } else { - ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); - sq->stats.tso_packets++; - sq->stats.tso_bytes += skb->len - ihs; - } + eseg->mss = cpu_to_be16(skb_shinfo(skb)->gso_size); - sq->stats.packets += skb_shinfo(skb)->gso_segs; - num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + if (skb->encapsulation) { + ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb); + sq->stats.tso_inner_packets++; + sq->stats.tso_inner_bytes += skb->len - ihs; } else { - bf = sq->bf_budget && - !skb->xmit_more && - !skb_shinfo(skb)->nr_frags; - ihs = mlx5e_get_inline_hdr_size(sq, skb, bf); - sq->stats.packets++; - num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - } - - sq->stats.bytes += num_bytes; - wi->num_bytes = num_bytes; - - ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; - if (ihs) { - if (skb_vlan_tag_present(skb)) { - mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); - ihs += VLAN_HLEN; - } else { - memcpy(eseg->inline_hdr.start, skb_data, ihs); - mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); - } - eseg->inline_hdr.sz = cpu_to_be16(ihs); - ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); - } else if (skb_vlan_tag_present(skb)) { - eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); - eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); + ihs = skb_transport_offset(skb) + tcp_hdrlen(skb); + sq->stats.tso_packets++; + sq->stats.tso_bytes += skb->len - ihs; } - dseg = (struct mlx5_wqe_data_seg *)cseg + ds_cnt; + *num_bytes = skb->len + (skb_shinfo(skb)->gso_segs - 1) * ihs; + return ihs; +} - wi->num_dma = 0; +static inline int +mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, + unsigned char *skb_data, u16 headlen, + struct mlx5_wqe_data_seg *dseg) +{ + dma_addr_t dma_addr = 0; + u8 num_dma = 0; + int i; - headlen = skb_len - skb->data_len; if (headlen) { dma_addr = dma_map_single(sq->pdev, skb_data, headlen, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - goto dma_unmap_wqe_err; + return -ENOMEM; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(headlen); mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE); - wi->num_dma++; - + num_dma++; dseg++; } @@ -330,59 +245,120 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) int fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, - DMA_TO_DEVICE); + DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - goto dma_unmap_wqe_err; + return -ENOMEM; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; dseg->byte_count = cpu_to_be32(fsz); mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE); - wi->num_dma++; - + num_dma++; dseg++; } - ds_cnt += wi->num_dma; - - cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); - cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); + return num_dma; +} - sq->db.txq.skb[pi] = skb; +static inline void +mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb, + u8 opcode, u16 ds_cnt, u32 num_bytes, u8 num_dma, + struct mlx5e_tx_wqe_info *wi, struct mlx5_wqe_ctrl_seg *cseg) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi; + wi->num_bytes = num_bytes; + wi->num_dma = num_dma; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); - sq->pc += wi->num_wqebbs; + wi->skb = skb; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - netdev_tx_sent_queue(sq->txq, wi->num_bytes); + netdev_tx_sent_queue(sq->txq, num_bytes); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) { + sq->pc += wi->num_wqebbs; + if (unlikely(!mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM))) { netif_tx_stop_queue(sq->txq); sq->stats.stopped++; } - sq->stats.xmit_more += skb->xmit_more; - if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) { - int bf_sz = 0; + if (!skb->xmit_more || netif_xmit_stopped(sq->txq)) + mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg); - if (bf && test_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state)) - bf_sz = wi->num_wqebbs << 3; + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.wqe_info[pi].skb = NULL; + mlx5e_post_nop(wq, sq->sqn, &sq->pc); + sq->stats.nop++; + } +} - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz); +static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + + unsigned char *skb_data = skb->data; + unsigned int skb_len = skb->len; + u8 opcode = MLX5_OPCODE_SEND; + unsigned int num_bytes; + int num_dma; + u16 headlen; + u16 ds_cnt; + u16 ihs; + + memset(wqe, 0, sizeof(*wqe)); + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + if (skb_is_gso(skb)) { + opcode = MLX5_OPCODE_LSO; + ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); + sq->stats.packets += skb_shinfo(skb)->gso_segs; + } else { + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + sq->stats.packets++; } + sq->stats.bytes += num_bytes; + sq->stats.xmit_more += skb->xmit_more; - /* fill sq edge with nops to avoid wqe wrap around */ - while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->db.txq.skb[pi] = NULL; - mlx5e_send_nop(sq, false); + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (ihs) { + if (skb_vlan_tag_present(skb)) { + mlx5e_insert_vlan(eseg->inline_hdr.start, skb, ihs, &skb_data, &skb_len); + ihs += VLAN_HLEN; + } else { + memcpy(eseg->inline_hdr.start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + } + eseg->inline_hdr.sz = cpu_to_be16(ihs); + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); + } else if (skb_vlan_tag_present(skb)) { + eseg->insert.type = cpu_to_be16(MLX5_ETH_WQE_INSERT_VLAN); + eseg->insert.vlan_tci = cpu_to_be16(skb_vlan_tag_get(skb)); } - if (bf) - sq->bf_budget--; + headlen = skb_len - skb->data_len; + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, + (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + if (unlikely(num_dma < 0)) + goto dma_unmap_wqe_err; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, + num_bytes, num_dma, wi, cseg); return NETDEV_TX_OK; @@ -398,21 +374,21 @@ dma_unmap_wqe_err: netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sq *sq = priv->txq_to_sq_map[skb_get_queue_mapping(skb)]; + struct mlx5e_txqsq *sq = priv->txq2sq[skb_get_queue_mapping(skb)]; return mlx5e_sq_xmit(sq, skb); } bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { - struct mlx5e_sq *sq; + struct mlx5e_txqsq *sq; u32 dma_fifo_cc; u32 nbytes; u16 npkts; u16 sqcc; int i; - sq = container_of(cq, struct mlx5e_sq, cq); + sq = container_of(cq, struct mlx5e_txqsq, cq); if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; @@ -450,8 +426,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); ci = sqcc & sq->wq.sz_m1; - skb = sq->db.txq.skb[ci]; - wi = &sq->db.txq.wqe_info[ci]; + wi = &sq->db.wqe_info[ci]; + skb = wi->skb; if (unlikely(!skb)) { /* nop */ sqcc++; @@ -492,7 +468,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) netdev_tx_completed_queue(sq->txq, npkts, nbytes); if (netif_tx_queue_stopped(sq->txq) && - mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM)) { + mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, MLX5E_SQ_STOP_ROOM)) { netif_tx_wake_queue(sq->txq); sq->stats.wake++; } @@ -500,7 +476,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) +void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; @@ -509,8 +485,8 @@ static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; - skb = sq->db.txq.skb[ci]; - wi = &sq->db.txq.wqe_info[ci]; + wi = &sq->db.wqe_info[ci]; + skb = wi->skb; if (!skb) { /* nop */ sq->cc++; @@ -529,36 +505,89 @@ static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) } } -static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq) +#ifdef CONFIG_MLX5_CORE_IPOIB + +struct mlx5_wqe_eth_pad { + u8 rsvd0[16]; +}; + +struct mlx5i_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_datagram_seg datagram; + struct mlx5_wqe_eth_pad pad; + struct mlx5_wqe_eth_seg eth; +}; + +static inline void +mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey, + struct mlx5_wqe_datagram_seg *dseg) { - struct mlx5e_sq_wqe_info *wi; - struct mlx5e_dma_info *di; - u16 ci; + memcpy(&dseg->av, av, sizeof(struct mlx5_av)); + dseg->av.dqp_dct = cpu_to_be32(dqpn | MLX5_EXTENDED_UD_AV); + dseg->av.key.qkey.qkey = cpu_to_be32(dqkey); +} - while (sq->cc != sq->pc) { - ci = sq->cc & sq->wq.sz_m1; - di = &sq->db.xdp.di[ci]; - wi = &sq->db.xdp.wqe_info[ci]; +netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5i_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi]; - if (wi->opcode == MLX5_OPCODE_NOP) { - sq->cc++; - continue; - } + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_datagram_seg *datagram = &wqe->datagram; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; - sq->cc += wi->num_wqebbs; + unsigned char *skb_data = skb->data; + unsigned int skb_len = skb->len; + u8 opcode = MLX5_OPCODE_SEND; + unsigned int num_bytes; + int num_dma; + u16 headlen; + u16 ds_cnt; + u16 ihs; - mlx5e_page_release(&sq->channel->rq, di, false); + memset(wqe, 0, sizeof(*wqe)); + + mlx5i_txwqe_build_datagram(av, dqpn, dqkey, datagram); + + mlx5e_txwqe_build_eseg_csum(sq, skb, eseg); + + if (skb_is_gso(skb)) { + opcode = MLX5_OPCODE_LSO; + ihs = mlx5e_txwqe_build_eseg_gso(sq, skb, eseg, &num_bytes); + } else { + ihs = mlx5e_calc_min_inline(sq->min_inline_mode, skb); + num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN); } -} -void mlx5e_free_sq_descs(struct mlx5e_sq *sq) -{ - switch (sq->type) { - case MLX5E_SQ_TXQ: - mlx5e_free_txq_sq_descs(sq); - break; - case MLX5E_SQ_XDP: - mlx5e_free_xdp_sq_descs(sq); - break; + ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS; + if (ihs) { + memcpy(eseg->inline_hdr.start, skb_data, ihs); + mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs); + eseg->inline_hdr.sz = cpu_to_be16(ihs); + ds_cnt += DIV_ROUND_UP(ihs - sizeof(eseg->inline_hdr.start), MLX5_SEND_WQE_DS); } + + headlen = skb_len - skb->data_len; + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, + (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + if (unlikely(num_dma < 0)) + goto dma_unmap_wqe_err; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, + num_bytes, num_dma, wi, cseg); + + return NETDEV_TX_OK; + +dma_unmap_wqe_err: + sq->stats.dropped++; + mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); + + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; } + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index e5c12a732aa1..5ca6714e3e02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -37,124 +37,69 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) struct mlx5_cqwq *wq = &cq->wq; u32 ci = mlx5_cqwq_get_ci(wq); struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(wq, ci); - int cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; - int sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; + u8 cqe_ownership_bit = cqe->op_own & MLX5_CQE_OWNER_MASK; + u8 sw_ownership_val = mlx5_cqwq_get_wrap_cnt(wq) & 1; if (cqe_ownership_bit != sw_ownership_val) return NULL; /* ensure cqe content is read after cqe ownership bit */ - rmb(); + dma_rmb(); return cqe; } -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5_cqe64 *cqe, + u16 *sqcc) { - struct mlx5e_sq *sq = container_of(cq, struct mlx5e_sq, cq); - struct mlx5_wq_cyc *wq; - struct mlx5_cqe64 *cqe; - u16 sqcc; - - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + struct mlx5e_rq *rq = &sq->channel->rq; + + prefetch(rq); + mlx5_cqwq_pop(&cq->wq); + *sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); return; + } - cqe = mlx5e_get_cqe(cq); - if (likely(!cqe)) + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); return; + } - wq = &sq->wq; - - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), - * otherwise a cq overrun may occur - */ - sqcc = sq->cc; - - do { - u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - - mlx5_cqwq_pop(&cq->wq); - sqcc += icowi->num_wqebbs; - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { - WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", - cqe->op_own); - break; - } - - switch (icowi->opcode) { - case MLX5_OPCODE_NOP: - break; - case MLX5_OPCODE_UMR: - mlx5e_post_rx_mpwqe(&sq->channel->rq); - break; - default: - WARN_ONCE(true, - "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", - icowi->opcode); - } - - } while ((cqe = mlx5e_get_cqe(cq))); - - mlx5_cqwq_update_db_record(&cq->wq); - - /* ensure cq space is freed before enabling more cqes */ - wmb(); - - sq->cc = sqcc; + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); } -static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { - struct mlx5e_sq *sq; + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; u16 sqcc; - int i; - - sq = container_of(cq, struct mlx5e_sq, cq); if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) - return false; + return; + + cqe = mlx5e_get_cqe(cq); + if (likely(!cqe)) + return; /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ sqcc = sq->cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; - u16 wqe_counter; - bool last_wqe; - - cqe = mlx5e_get_cqe(cq); - if (!cqe) - break; - - mlx5_cqwq_pop(&cq->wq); - - wqe_counter = be16_to_cpu(cqe->wqe_counter); - - do { - struct mlx5e_sq_wqe_info *wi; - struct mlx5e_dma_info *di; - u16 ci; - - last_wqe = (sqcc == wqe_counter); - - ci = sqcc & sq->wq.sz_m1; - di = &sq->db.xdp.di[ci]; - wi = &sq->db.xdp.wqe_info[ci]; - - if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) { - sqcc++; - continue; - } - - sqcc += wi->num_wqebbs; - /* Recycle RX page */ - mlx5e_page_release(&sq->channel->rq, di, true); - } while (!last_wqe); - } + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc); mlx5_cqwq_update_db_record(&cq->wq); @@ -162,7 +107,6 @@ static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) wmb(); sq->cc = sqcc; - return (i == MLX5E_TX_CQ_POLL_BUDGET); } int mlx5e_napi_poll(struct napi_struct *napi, int budget) @@ -178,12 +122,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); + if (c->xdp) + busy |= mlx5e_poll_xdpsq_cq(&c->rq.xdpsq.cq); + work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; - if (c->xdp) - busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq); - mlx5e_poll_ico_cq(&c->icosq.cq); busy |= mlx5e_post_rx_wqes(&c->rq); @@ -224,8 +168,7 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); struct mlx5e_channel *c = cq->channel; - struct mlx5e_priv *priv = c->priv; - struct net_device *netdev = priv->netdev; + struct net_device *netdev = c->netdev; netdev_err(netdev, "%s: cqn=0x%.6x event=0x%.2x\n", __func__, mcq->cqn, event); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index fcd5bc7e31db..2e34d95ea776 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -53,13 +53,6 @@ struct esw_uc_addr { u32 vport; }; -/* E-Switch MC FDB table hash node */ -struct esw_mc_addr { /* SRIOV only */ - struct l2addr_node node; - struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ - u32 refcnt; -}; - /* Vport UC/MC hash node */ struct vport_addr { struct l2addr_node node; @@ -337,6 +330,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb; @@ -362,7 +356,9 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0, 0); + + ft_attr.max_fte = table_size; + fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); @@ -814,7 +810,7 @@ static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, bool promisc, bool mc_promisc) { - struct esw_mc_addr *allmulti_addr = esw->mc_promisc; + struct esw_mc_addr *allmulti_addr = &esw->mc_promisc; struct mlx5_vport *vport = &esw->vports[vport_num]; if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) @@ -1685,7 +1681,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", esw->enabled_vports, esw->mode); - mc_promisc = esw->mc_promisc; + mc_promisc = &esw->mc_promisc; nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) @@ -1729,7 +1725,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); int total_vports = MLX5_TOTAL_VPORTS(dev); - struct esw_mc_addr *mc_promisc; struct mlx5_eswitch *esw; int vport_num; int err; @@ -1758,13 +1753,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } esw->l2_table.size = l2_table_size; - mc_promisc = kzalloc(sizeof(*mc_promisc), GFP_KERNEL); - if (!mc_promisc) { - err = -ENOMEM; - goto abort; - } - esw->mc_promisc = mc_promisc; - esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -1803,6 +1791,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->enabled_vports = 0; esw->mode = SRIOV_NONE; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; + if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC; + else + esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE; dev->priv.eswitch = esw; return 0; @@ -1827,7 +1820,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); - kfree(esw->mc_promisc); kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ad329b1680b4..b746f62c8c79 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -36,7 +36,6 @@ #include <linux/if_ether.h> #include <linux/if_link.h> #include <net/devlink.h> -#include <net/ip_tunnels.h> #include <linux/mlx5/device.h> #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -210,6 +209,14 @@ struct mlx5_esw_offload { DECLARE_HASHTABLE(encap_tbl, 8); u8 inline_mode; u64 num_flows; + u8 encap; +}; + +/* E-Switch MC FDB table hash node */ +struct esw_mc_addr { /* SRIOV only */ + struct l2addr_node node; + struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */ + u32 refcnt; }; struct mlx5_eswitch { @@ -225,7 +232,7 @@ struct mlx5_eswitch { * and async SRIOV admin state changes */ struct mutex state_lock; - struct esw_mc_addr *mc_promisc; + struct esw_mc_addr mc_promisc; struct { bool enabled; @@ -285,20 +292,8 @@ enum { SET_VLAN_INSERT = BIT(1) }; -#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 -#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 - -struct mlx5_encap_entry { - struct hlist_node encap_hlist; - struct list_head flows; - u32 encap_id; - struct neighbour *n; - struct ip_tunnel_info tun_info; - unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ - - struct net_device *out_dev; - int tunnel_type; -}; +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x4000 +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x8000 struct mlx5_esw_flow_attr { struct mlx5_eswitch_rep *in_rep; @@ -307,7 +302,9 @@ struct mlx5_esw_flow_attr { int action; u16 vlan; bool vlan_handled; - struct mlx5_encap_entry *encap; + u32 encap_id; + u32 mod_hdr_id; + struct mlx5e_tc_flow_parse_attr *parse_attr; }; int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, @@ -321,6 +318,8 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode); int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode); int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode); +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap); +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap); void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, int vport_index, struct mlx5_eswitch_rep *rep); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index d111cebca9f1..f991f669047e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -68,8 +68,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); - if (IS_ERR(counter)) - return ERR_CAST(counter); + if (IS_ERR(counter)) { + rule = ERR_CAST(counter); + goto err_counter_alloc; + } dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[i].counter = counter; i++; @@ -86,17 +88,25 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - if (attr->encap) - flow_act.encap_id = attr->encap->encap_id; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) + flow_act.modify_id = attr->mod_hdr_id; + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + flow_act.encap_id = attr->encap_id; rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb, spec, &flow_act, dest, i); if (IS_ERR(rule)) - mlx5_fc_destroy(esw->dev, counter); + goto err_add_rule; else esw->offloads.num_flows++; return rule; + +err_add_rule: + mlx5_fc_destroy(esw->dev, counter); +err_counter_alloc: + return rule; } void @@ -106,12 +116,10 @@ mlx5_eswitch_del_offloaded_rule(struct mlx5_eswitch *esw, { struct mlx5_fc *counter = NULL; - if (!IS_ERR(rule)) { - counter = mlx5_flow_rule_counter(rule); - mlx5_del_flow_rules(rule); - mlx5_fc_destroy(esw->dev, counter); - esw->offloads.num_flows--; - } + counter = mlx5_flow_rule_counter(rule); + mlx5_del_flow_rules(rule); + mlx5_fc_destroy(esw->dev, counter); + esw->offloads.num_flows--; } static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) @@ -418,30 +426,21 @@ out: return err; } -#define MAX_PF_SQ 256 #define ESW_OFFLOADS_NUM_GROUPS 4 -static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_offloads_fast_fdb_table(struct mlx5_eswitch *esw) { - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - int table_size, ix, esw_size, err = 0; struct mlx5_core_dev *dev = esw->dev; struct mlx5_flow_namespace *root_ns; struct mlx5_flow_table *fdb = NULL; - struct mlx5_flow_group *g; - u32 *flow_group_in; - void *match_criteria; + int esw_size, err = 0; u32 flags = 0; - flow_group_in = mlx5_vzalloc(inlen); - if (!flow_group_in) - return -ENOMEM; - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); if (!root_ns) { esw_warn(dev, "Failed to get FDB flow namespace\n"); err = -EOPNOTSUPP; - goto ns_err; + goto out; } esw_debug(dev, "Create offloads FDB table, min (max esw size(2^%d), max counters(%d)*groups(%d))\n", @@ -451,8 +450,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_size = min_t(int, MLX5_CAP_GEN(dev, max_flow_counter) * ESW_OFFLOADS_NUM_GROUPS, 1 << MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) && - MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) + if (esw->offloads.encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE) flags |= MLX5_FLOW_TABLE_TUNNEL_EN; fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH, @@ -462,12 +460,55 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err); - goto fast_fdb_err; + goto out; } esw->fdb_table.fdb = fdb; +out: + return err; +} + +static void esw_destroy_offloads_fast_fdb_table(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_table(esw->fdb_table.fdb); +} + +#define MAX_PF_SQ 256 + +static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + int table_size, ix, err = 0; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + + esw_debug(esw->dev, "Create offloads FDB Tables\n"); + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + err = -EOPNOTSUPP; + goto ns_err; + } + + err = esw_create_offloads_fast_fdb_table(esw); + if (err) + goto fast_fdb_err; + table_size = nvports + MAX_PF_SQ + 1; - fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0, 0); + + ft_attr.max_fte = table_size; + ft_attr.prio = FDB_SLOW_PATH; + + fdb = mlx5_create_flow_table(root_ns, &ft_attr); if (IS_ERR(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err); @@ -532,25 +573,26 @@ ns_err: return err; } -static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; - esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + esw_debug(esw->dev, "Destroy offloads FDB Tables\n"); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.fdb); - mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw_destroy_offloads_fast_fdb_table(esw); } static int esw_create_offloads_table(struct mlx5_eswitch *esw) { - struct mlx5_flow_namespace *ns; - struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_namespace *ns; int err = 0; ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); @@ -559,7 +601,9 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw) return -EOPNOTSUPP; } - ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0); + ft_attr.max_fte = dev->priv.sriov.num_vfs + 2; + + ft_offloads = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft_offloads)) { err = PTR_ERR(ft_offloads); esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); @@ -700,7 +744,7 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) mlx5_remove_dev_by_protocol(esw->dev, MLX5_INTERFACE_PROTOCOL_IB); mlx5_dev_list_unlock(); - err = esw_create_offloads_fdb_table(esw, nvports); + err = esw_create_offloads_fdb_tables(esw, nvports); if (err) goto create_fdb_err; @@ -737,7 +781,7 @@ create_fg_err: esw_destroy_offloads_table(esw); create_ft_err: - esw_destroy_offloads_fdb_table(esw); + esw_destroy_offloads_fdb_tables(esw); create_fdb_err: /* enable back PF RoCE */ @@ -783,7 +827,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); - esw_destroy_offloads_fdb_table(esw); + esw_destroy_offloads_fdb_tables(esw); } static int esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) @@ -1012,6 +1056,66 @@ out: return 0; } +int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, u8 encap) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + int err; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + if (encap != DEVLINK_ESWITCH_ENCAP_MODE_NONE && + (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) || + !MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))) + return -EOPNOTSUPP; + + if (encap && encap != DEVLINK_ESWITCH_ENCAP_MODE_BASIC) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_LEGACY) { + esw->offloads.encap = encap; + return 0; + } + + if (esw->offloads.encap == encap) + return 0; + + if (esw->offloads.num_flows > 0) { + esw_warn(dev, "Can't set encapsulation when flows are configured\n"); + return -EOPNOTSUPP; + } + + esw_destroy_offloads_fast_fdb_table(esw); + + esw->offloads.encap = encap; + err = esw_create_offloads_fast_fdb_table(esw); + if (err) { + esw_warn(esw->dev, "Failed re-creating fast FDB table, err %d\n", err); + esw->offloads.encap = !encap; + (void) esw_create_offloads_fast_fdb_table(esw); + } + return err; +} + +int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, u8 *encap) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + *encap = esw->offloads.encap; + return 0; +} + void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, int vport_index, struct mlx5_eswitch_rep *__rep) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index b64a781c7e85..19e3d2fc2099 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -45,6 +45,10 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + ft->underlay_qpn == 0) + return 0; + MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); @@ -54,6 +58,10 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_SET(set_flow_table_root_in, in, other_vport, 1); } + if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + ft->underlay_qpn != 0) + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, ft->underlay_qpn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } @@ -249,6 +257,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action); MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id); + MLX5_SET(flow_context, in_flow_context, modify_header_id, fte->modify_id); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); @@ -515,3 +524,69 @@ void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id) mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } + +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)]; + int max_actions, actions_size, inlen, err; + void *actions_in; + u8 table_type; + u32 *in; + + switch (namespace) { + case MLX5_FLOW_NAMESPACE_FDB: + max_actions = MLX5_CAP_ESW_FLOWTABLE_FDB(dev, max_modify_header_actions); + table_type = FS_FT_FDB; + break; + case MLX5_FLOW_NAMESPACE_KERNEL: + max_actions = MLX5_CAP_FLOWTABLE_NIC_RX(dev, max_modify_header_actions); + table_type = FS_FT_NIC_RX; + break; + default: + return -EOPNOTSUPP; + } + + if (num_actions > max_actions) { + mlx5_core_warn(dev, "too many modify header actions %d, max supported %d\n", + num_actions, max_actions); + return -EOPNOTSUPP; + } + + actions_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * num_actions; + inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size; + + in = kzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(alloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(alloc_modify_header_context_in, in, table_type, table_type); + MLX5_SET(alloc_modify_header_context_in, in, num_of_actions, num_actions); + + actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions); + memcpy(actions_in, modify_actions, actions_size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + *modify_header_id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id); + kfree(in); + return err; +} + +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(dealloc_modify_header_context_in, in, opcode, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT); + MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id, + modify_header_id); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index ded27bb9a3b6..b8a176503d38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -476,6 +476,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act, fte->index = index; fte->action = flow_act->action; fte->encap_id = flow_act->encap_id; + fte->modify_id = flow_act->modify_id; return fte; } @@ -777,18 +778,16 @@ static void list_add_flow_table(struct mlx5_flow_table *ft, } static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + struct mlx5_flow_table_attr *ft_attr, enum fs_flow_table_op_mod op_mod, - u16 vport, int prio, - int max_fte, u32 level, - u32 flags) + u16 vport) { + struct mlx5_flow_root_namespace *root = find_root(&ns->node); struct mlx5_flow_table *next_ft = NULL; + struct fs_prio *fs_prio = NULL; struct mlx5_flow_table *ft; - int err; int log_table_sz; - struct mlx5_flow_root_namespace *root = - find_root(&ns->node); - struct fs_prio *fs_prio = NULL; + int err; if (!root) { pr_err("mlx5: flow steering failed to find root of namespace\n"); @@ -796,29 +795,31 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa } mutex_lock(&root->chain_lock); - fs_prio = find_prio(ns, prio); + fs_prio = find_prio(ns, ft_attr->prio); if (!fs_prio) { err = -EINVAL; goto unlock_root; } - if (level >= fs_prio->num_levels) { + if (ft_attr->level >= fs_prio->num_levels) { err = -ENOSPC; goto unlock_root; } /* The level is related to the * priority level range. */ - level += fs_prio->start_level; - ft = alloc_flow_table(level, + ft_attr->level += fs_prio->start_level; + ft = alloc_flow_table(ft_attr->level, vport, - max_fte ? roundup_pow_of_two(max_fte) : 0, + ft_attr->max_fte ? roundup_pow_of_two(ft_attr->max_fte) : 0, root->table_type, - op_mod, flags); + op_mod, ft_attr->flags); if (!ft) { err = -ENOMEM; goto unlock_root; } + ft->underlay_qpn = ft_attr->underlay_qpn; + tree_init_node(&ft->node, 1, del_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); @@ -848,44 +849,56 @@ unlock_root: } struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, - int prio, int max_fte, - u32 level, - u32 flags) + struct mlx5_flow_table_attr *ft_attr) { - return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio, - max_fte, level, flags); + return __mlx5_create_flow_table(ns, ft_attr, FS_FT_OP_MOD_NORMAL, 0); } struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, u32 level, u16 vport) { - return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio, - max_fte, level, 0); + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.max_fte = max_fte; + ft_attr.level = level; + ft_attr.prio = prio; + + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_NORMAL, 0); } -struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( - struct mlx5_flow_namespace *ns, - int prio, u32 level) +struct mlx5_flow_table* +mlx5_create_lag_demux_flow_table(struct mlx5_flow_namespace *ns, + int prio, u32 level) { - return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0, - level, 0); + struct mlx5_flow_table_attr ft_attr = {}; + + ft_attr.level = level; + ft_attr.prio = prio; + return __mlx5_create_flow_table(ns, &ft_attr, FS_FT_OP_MOD_LAG_DEMUX, 0); } EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); -struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - int max_num_groups, - u32 level, - u32 flags) +struct mlx5_flow_table* +mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, + int prio, + int num_flow_table_entries, + int max_num_groups, + u32 level, + u32 flags) { + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; if (max_num_groups > num_flow_table_entries) return ERR_PTR(-EINVAL); - ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level, flags); + ft_attr.max_fte = num_flow_table_entries; + ft_attr.prio = prio; + ft_attr.level = level; + ft_attr.flags = flags; + + ft = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft)) return ft; @@ -1827,12 +1840,18 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) static int create_anchor_flow_table(struct mlx5_flow_steering *steering) { struct mlx5_flow_namespace *ns = NULL; + struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_table *ft; ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (WARN_ON(!ns)) return -EINVAL; - ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0); + + ft_attr.max_fte = ANCHOR_SIZE; + ft_attr.level = ANCHOR_LEVEL; + ft_attr.prio = ANCHOR_PRIO; + + ft = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(ft)) { mlx5_core_err(steering->dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); @@ -1886,9 +1905,6 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return; - cleanup_root_ns(steering->root_ns); cleanup_root_ns(steering->esw_egress_root_ns); cleanup_root_ns(steering->esw_ingress_root_ns); @@ -1991,9 +2007,6 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) struct mlx5_flow_steering *steering; int err = 0; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) - return 0; - err = mlx5_init_fc_stats(dev); if (err) return err; @@ -2004,7 +2017,10 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) steering->dev = dev; dev->priv.steering = steering; - if (MLX5_CAP_GEN(dev, nic_flow_table) && + if ((((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && + (MLX5_CAP_GEN(dev, nic_flow_table))) || + ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads))) && MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) { err = init_root_ns(steering); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 8e668c63f69e..81eafc7b9dd9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -118,6 +118,7 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; + u32 underlay_qpn; }; struct mlx5_fc_cache { @@ -152,6 +153,7 @@ struct fs_fte { u32 index; u32 action; u32 encap_id; + u32 modify_id; enum fs_fte_status status; struct mlx5_fc *counter; }; @@ -197,6 +199,11 @@ struct mlx5_flow_root_namespace { int mlx5_init_fc_stats(struct mlx5_core_dev *dev); void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay); +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval); int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 7431f633de31..6507d8acc54d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -165,7 +165,8 @@ static void mlx5_fc_stats_work(struct work_struct *work) list_splice_tail_init(&fc_stats->addlist, &tmplist); if (!list_empty(&tmplist) || !RB_EMPTY_ROOT(&fc_stats->counters)) - queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); + queue_delayed_work(fc_stats->wq, &fc_stats->work, + fc_stats->sampling_interval); spin_unlock(&fc_stats->addlist_lock); @@ -200,7 +201,7 @@ static void mlx5_fc_stats_work(struct work_struct *work) node = mlx5_fc_stats_query(dev, counter, last->id); } - fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; + fc_stats->next_query = now + fc_stats->sampling_interval; } struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) @@ -265,6 +266,7 @@ int mlx5_init_fc_stats(struct mlx5_core_dev *dev) if (!fc_stats->wq) return -ENOMEM; + fc_stats->sampling_interval = MLX5_FC_STATS_PERIOD; INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); return 0; @@ -317,3 +319,21 @@ void mlx5_fc_query_cached(struct mlx5_fc *counter, counter->lastbytes = c.bytes; counter->lastpackets = c.packets; } + +void mlx5_fc_queue_stats_work(struct mlx5_core_dev *dev, + struct delayed_work *dwork, + unsigned long delay) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + queue_delayed_work(fc_stats->wq, dwork, delay); +} + +void mlx5_fc_update_sampling_interval(struct mlx5_core_dev *dev, + unsigned long interval) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + fc_stats->sampling_interval = min_t(unsigned long, interval, + fc_stats->sampling_interval); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index d0bbefa08af7..1bc14d0fded8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -137,7 +137,8 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } - if (MLX5_CAP_GEN(dev, nic_flow_table)) { + if (MLX5_CAP_GEN(dev, nic_flow_table) || + MLX5_CAP_GEN(dev, ipoib_enhanced_offloads)) { err = mlx5_core_get_caps(dev, MLX5_CAP_FLOW_TABLE); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c new file mode 100644 index 000000000000..3c84e36af018 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/fs.h> +#include "en.h" +#include "ipoib.h" + +#define IB_DEFAULT_Q_KEY 0xb1b + +static int mlx5i_open(struct net_device *netdev); +static int mlx5i_close(struct net_device *netdev); +static int mlx5i_dev_init(struct net_device *dev); +static void mlx5i_dev_cleanup(struct net_device *dev); + +static const struct net_device_ops mlx5i_netdev_ops = { + .ndo_open = mlx5i_open, + .ndo_stop = mlx5i_close, + .ndo_init = mlx5i_dev_init, + .ndo_uninit = mlx5i_dev_cleanup, +}; + +/* IPoIB mlx5 netdev profile */ + +/* Called directly after IPoIB netdevice was created to initialize SW structs */ +static void mlx5i_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + priv->mdev = mdev; + priv->netdev = netdev; + priv->profile = profile; + priv->ppriv = ppriv; + + mlx5e_build_nic_params(mdev, &priv->channels.params, profile->max_nch(mdev)); + + mutex_init(&priv->state_lock); + + netdev->hw_features |= NETIF_F_SG; + netdev->hw_features |= NETIF_F_IP_CSUM; + netdev->hw_features |= NETIF_F_IPV6_CSUM; + netdev->hw_features |= NETIF_F_GRO; + netdev->hw_features |= NETIF_F_TSO; + netdev->hw_features |= NETIF_F_TSO6; + netdev->hw_features |= NETIF_F_RXCSUM; + netdev->hw_features |= NETIF_F_RXHASH; + + netdev->netdev_ops = &mlx5i_netdev_ops; +} + +/* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */ +static void mlx5i_cleanup(struct mlx5e_priv *priv) +{ + /* Do nothing .. */ +} + +#define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2 + +static int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +{ + struct mlx5_qp_context *context = NULL; + u32 *in = NULL; + void *addr_path; + int ret = 0; + int inlen; + void *qpc; + + inlen = MLX5_ST_SZ_BYTES(create_qp_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); + MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, + MLX5_QP_ENHANCED_ULP_STATELESS_MODE); + + addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); + MLX5_SET(ads, addr_path, port, 1); + MLX5_SET(ads, addr_path, grh, 1); + + ret = mlx5_core_create_qp(mdev, qp, in, inlen); + if (ret) { + mlx5_core_err(mdev, "Failed creating IPoIB QP err : %d\n", ret); + goto out; + } + + /* QP states */ + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) { + ret = -ENOMEM; + goto out; + } + + context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11); + context->pri_path.port = 1; + context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret); + goto out; + } + memset(context, 0, sizeof(*context)); + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret); + goto out; + } + + ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp); + if (ret) { + mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret); + goto out; + } + +out: + kfree(context); + kvfree(in); + return ret; +} + +static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) +{ + mlx5_core_destroy_qp(mdev, qp); +} + +static int mlx5i_init_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp); + if (err) { + mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err); + return err; + } + + err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); + if (err) { + mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); + return err; + } + + return 0; +} + +static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + + mlx5e_destroy_tis(priv->mdev, priv->tisn[0]); + mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp); +} + +static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) +{ + struct mlx5i_priv *ipriv = priv->ppriv; + int err; + + priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + + if (!priv->fs.ns) + return -EINVAL; + + err = mlx5e_arfs_create_tables(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } + + err = mlx5e_create_ttc_table(priv, ipriv->qp.qpn); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } + + return 0; + +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv); + + return err; +} + +static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) +{ + mlx5e_destroy_ttc_table(priv); + mlx5e_arfs_destroy_tables(priv); +} + +static int mlx5i_init_rx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_indirect_rqt(priv); + if (err) + return err; + + err = mlx5e_create_direct_rqts(priv); + if (err) + goto err_destroy_indirect_rqts; + + err = mlx5e_create_indirect_tirs(priv); + if (err) + goto err_destroy_direct_rqts; + + err = mlx5e_create_direct_tirs(priv); + if (err) + goto err_destroy_indirect_tirs; + + err = mlx5i_create_flow_steering(priv); + if (err) + goto err_destroy_direct_tirs; + + return 0; + +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv); +err_destroy_direct_rqts: + mlx5e_destroy_direct_rqts(priv); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + return err; +} + +static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) +{ + mlx5i_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); + mlx5e_destroy_direct_rqts(priv); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +} + +static const struct mlx5e_profile mlx5i_nic_profile = { + .init = mlx5i_init, + .cleanup = mlx5i_cleanup, + .init_tx = mlx5i_init_tx, + .cleanup_tx = mlx5i_cleanup_tx, + .init_rx = mlx5i_init_rx, + .cleanup_rx = mlx5i_cleanup_rx, + .enable = NULL, /* mlx5i_enable */ + .disable = NULL, /* mlx5i_disable */ + .update_stats = NULL, /* mlx5i_update_stats */ + .max_nch = mlx5e_get_max_num_channels, + .rx_handlers.handle_rx_cqe = mlx5i_handle_rx_cqe, + .rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */ + .max_tc = MLX5I_MAX_NUM_TC, +}; + +/* mlx5i netdev NDos */ + +static int mlx5i_dev_init(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5i_priv *ipriv = priv->ppriv; + + /* Set dev address using underlay QP */ + dev->dev_addr[1] = (ipriv->qp.qpn >> 16) & 0xff; + dev->dev_addr[2] = (ipriv->qp.qpn >> 8) & 0xff; + dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff; + + return 0; +} + +static void mlx5i_dev_cleanup(struct net_device *dev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5i_priv *ipriv = priv->ppriv; + struct mlx5_qp_context context; + + /* detach qp from flow-steering by reset it */ + mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context, &ipriv->qp); +} + +static int mlx5i_open(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + set_bit(MLX5E_STATE_OPENED, &priv->state); + + err = mlx5e_open_channels(priv, &priv->channels); + if (err) + goto err_clear_state_opened_flag; + + mlx5e_refresh_tirs(priv, false); + mlx5e_activate_priv_channels(priv); + mutex_unlock(&priv->state_lock); + return 0; + +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &priv->state); + mutex_unlock(&priv->state_lock); + return err; +} + +static int mlx5i_close(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + + /* May already be CLOSED in case a previous configuration operation + * (e.g RX/TX queue size change) that involves close&open failed. + */ + mutex_lock(&priv->state_lock); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + goto unlock; + + clear_bit(MLX5E_STATE_OPENED, &priv->state); + + netif_carrier_off(priv->netdev); + mlx5e_deactivate_priv_channels(priv); + mlx5e_close_channels(&priv->channels); +unlock: + mutex_unlock(&priv->state_lock); + return 0; +} + +#ifdef notusedyet +/* IPoIB RDMA netdev callbacks */ +static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid, int set_qkey) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); + err = mlx5_core_attach_mcg(mdev, gid, ipriv->qp.qpn); + if (err) + mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n", + ipriv->qp.qpn, gid->raw); + + return err; +} + +static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca, + union ib_gid *gid, u16 lid) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = epriv->mdev; + struct mlx5i_priv *ipriv = epriv->ppriv; + int err; + + mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw); + + err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn); + if (err) + mlx5_core_dbg(mdev, "failed dettaching QPN 0x%x, MGID %pI6\n", + ipriv->qp.qpn, gid->raw); + + return err; +} + +static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb, + struct ib_ah *address, u32 dqpn, u32 dqkey) +{ + struct mlx5e_priv *epriv = mlx5i_epriv(dev); + struct mlx5e_txqsq *sq = epriv->txq2sq[skb_get_queue_mapping(skb)]; + struct mlx5_ib_ah *mah = to_mah(address); + + return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, dqkey); +} +#endif + +static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev) +{ + if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_IB) + return -EOPNOTSUPP; + + if (!MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads)) { + mlx5_core_warn(mdev, "IPoIB enhanced offloads are not supported\n"); + return -ENOTSUPP; + } + + return 0; +} + +static struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)) +{ + const struct mlx5e_profile *profile = &mlx5i_nic_profile; + int nch = profile->max_nch(mdev); + struct net_device *netdev; + struct mlx5i_priv *ipriv; + struct mlx5e_priv *epriv; + int err; + + if (mlx5i_check_required_hca_cap(mdev)) { + mlx5_core_warn(mdev, "Accelerated mode is not supported\n"); + return ERR_PTR(-EOPNOTSUPP); + } + + /* This function should only be called once per mdev */ + err = mlx5e_create_mdev_resources(mdev); + if (err) + return NULL; + + netdev = alloc_netdev_mqs(sizeof(struct mlx5i_priv) + sizeof(struct mlx5e_priv), + name, NET_NAME_UNKNOWN, + setup, + nch * MLX5E_MAX_NUM_TC, + nch); + if (!netdev) { + mlx5_core_warn(mdev, "alloc_netdev_mqs failed\n"); + goto free_mdev_resources; + } + + ipriv = netdev_priv(netdev); + epriv = mlx5i_epriv(netdev); + + epriv->wq = create_singlethread_workqueue("mlx5i"); + if (!epriv->wq) + goto err_free_netdev; + + profile->init(mdev, netdev, profile, ipriv); + + mlx5e_attach_netdev(epriv); + netif_carrier_off(netdev); + + /* TODO: set rdma_netdev func pointers + * rn = &ipriv->rn; + * rn->hca = ibdev; + * rn->send = mlx5i_xmit; + * rn->attach_mcast = mlx5i_attach_mcast; + * rn->detach_mcast = mlx5i_detach_mcast; + */ + return netdev; + +err_free_netdev: + free_netdev(netdev); +free_mdev_resources: + mlx5e_destroy_mdev_resources(mdev); + + return NULL; +} +EXPORT_SYMBOL(mlx5_rdma_netdev_alloc); + +static void mlx5_rdma_netdev_free(struct net_device *netdev) +{ + struct mlx5e_priv *priv = mlx5i_epriv(netdev); + const struct mlx5e_profile *profile = priv->profile; + + mlx5e_detach_netdev(priv); + profile->cleanup(priv); + destroy_workqueue(priv->wq); + free_netdev(netdev); + + mlx5e_destroy_mdev_resources(priv->mdev); +} +EXPORT_SYMBOL(mlx5_rdma_netdev_free); + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h new file mode 100644 index 000000000000..bae0a5cbc8ad --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __MLX5E_IPOB_H__ +#define __MLX5E_IPOB_H__ + +#include <linux/mlx5/fs.h> +#include "en.h" + +#define MLX5I_MAX_NUM_TC 1 + +/* ipoib rdma netdev's private data structure */ +struct mlx5i_priv { + struct mlx5_core_qp qp; + char *mlx5e_priv[0]; +}; + +/* Extract mlx5e_priv from IPoIB netdev */ +#define mlx5i_epriv(netdev) ((void *)(((struct mlx5i_priv *)netdev_priv(netdev))->mlx5e_priv)) + +netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5_av *av, u32 dqpn, u32 dqkey); +void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); + +#endif /* __MLX5E_IPOB_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 0ad66324247f..0c123d571b4c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1280,6 +1280,8 @@ static const struct devlink_ops mlx5_devlink_ops = { .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, .eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set, .eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get, + .eswitch_encap_mode_set = mlx5_devlink_eswitch_encap_mode_set, + .eswitch_encap_mode_get = mlx5_devlink_eswitch_encap_mode_get, #endif }; @@ -1514,8 +1516,10 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ - { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5, PCIe 4.0 */ - { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5, PCIe 4.0 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5 Ex */ + { PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 Ex VF */ + { PCI_VDEVICE(MELLANOX, 0x101b) }, /* ConnectX-6 */ + { PCI_VDEVICE(MELLANOX, 0x101c), MLX5_PCI_DEV_IS_VF}, /* ConnectX-6 VF */ { 0, } }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index b3dabe6e8836..fbc6e9e9e305 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -141,6 +141,11 @@ int mlx5_encap_alloc(struct mlx5_core_dev *dev, u32 *encap_id); void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id); +int mlx5_modify_header_alloc(struct mlx5_core_dev *dev, + u8 namespace, u8 num_actions, + void *modify_actions, u32 *modify_header_id); +void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev, u32 modify_header_id); + bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); int mlx5_query_mtpps(struct mlx5_core_dev *dev, u32 *mtpps, u32 mtpps_size); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 6b6c30deee83..2fb8c6585ac7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -15,7 +15,8 @@ obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o spectrum_router.o \ spectrum_kvdl.o spectrum_acl_tcam.o \ - spectrum_acl.o spectrum_flower.o + spectrum_acl.o spectrum_flower.o \ + spectrum_cnt.o spectrum_dpipe.o mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o obj-$(CONFIG_MLXSW_MINIMAL) += mlxsw_minimal.o mlxsw_minimal-objs := minimal.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index a1b48421648a..479511cf79bc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -1043,13 +1043,6 @@ MLXSW_ITEM32(cmd_mbox, sw2hw_cq, cv, 0x00, 28, 4); */ MLXSW_ITEM32(cmd_mbox, sw2hw_cq, c_eqn, 0x00, 24, 1); -/* cmd_mbox_sw2hw_cq_oi - * When set, overrun ignore is enabled. When set, updates of - * CQ consumer counter (poll for completion) or Request completion - * notifications (Arm CQ) DoorBells should not be rung on that CQ. - */ -MLXSW_ITEM32(cmd_mbox, sw2hw_cq, oi, 0x00, 12, 1); - /* cmd_mbox_sw2hw_cq_st * Event delivery state machine * 0x0 - FIRED @@ -1132,11 +1125,6 @@ static inline int mlxsw_cmd_sw2hw_eq(struct mlxsw_core *mlxsw_core, */ MLXSW_ITEM32(cmd_mbox, sw2hw_eq, int_msix, 0x00, 24, 1); -/* cmd_mbox_sw2hw_eq_oi - * When set, overrun ignore is enabled. - */ -MLXSW_ITEM32(cmd_mbox, sw2hw_eq, oi, 0x00, 12, 1); - /* cmd_mbox_sw2hw_eq_st * Event delivery state machine * 0x0 - FIRED diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index a4c07841aaf6..affe84eb4bff 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -40,9 +40,6 @@ #include <linux/export.h> #include <linux/err.h> #include <linux/if_link.h> -#include <linux/debugfs.h> -#include <linux/seq_file.h> -#include <linux/u64_stats_sync.h> #include <linux/netdevice.h> #include <linux/completion.h> #include <linux/skbuff.h> @@ -74,23 +71,9 @@ static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock); static const char mlxsw_core_driver_name[] = "mlxsw_core"; -static struct dentry *mlxsw_core_dbg_root; - static struct workqueue_struct *mlxsw_wq; static struct workqueue_struct *mlxsw_owq; -struct mlxsw_core_pcpu_stats { - u64 trap_rx_packets[MLXSW_TRAP_ID_MAX]; - u64 trap_rx_bytes[MLXSW_TRAP_ID_MAX]; - u64 port_rx_packets[MLXSW_PORT_MAX_PORTS]; - u64 port_rx_bytes[MLXSW_PORT_MAX_PORTS]; - struct u64_stats_sync syncp; - u32 trap_rx_dropped[MLXSW_TRAP_ID_MAX]; - u32 port_rx_dropped[MLXSW_PORT_MAX_PORTS]; - u32 trap_rx_invalid; - u32 port_rx_invalid; -}; - struct mlxsw_core_port { struct devlink_port devlink_port; void *port_driver_priv; @@ -121,23 +104,48 @@ struct mlxsw_core { spinlock_t trans_list_lock; /* protects trans_list writes */ bool use_emad; } emad; - struct mlxsw_core_pcpu_stats __percpu *pcpu_stats; - struct dentry *dbg_dir; - struct { - struct debugfs_blob_wrapper vsd_blob; - struct debugfs_blob_wrapper psid_blob; - } dbg; struct { u8 *mapping; /* lag_id+port_index to local_port mapping */ } lag; struct mlxsw_res res; struct mlxsw_hwmon *hwmon; struct mlxsw_thermal *thermal; - struct mlxsw_core_port ports[MLXSW_PORT_MAX_PORTS]; + struct mlxsw_core_port *ports; + unsigned int max_ports; unsigned long driver_priv[0]; /* driver_priv has to be always the last item */ }; +#define MLXSW_PORT_MAX_PORTS_DEFAULT 0x40 + +static int mlxsw_ports_init(struct mlxsw_core *mlxsw_core) +{ + /* Switch ports are numbered from 1 to queried value */ + if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_SYSTEM_PORT)) + mlxsw_core->max_ports = MLXSW_CORE_RES_GET(mlxsw_core, + MAX_SYSTEM_PORT) + 1; + else + mlxsw_core->max_ports = MLXSW_PORT_MAX_PORTS_DEFAULT + 1; + + mlxsw_core->ports = kcalloc(mlxsw_core->max_ports, + sizeof(struct mlxsw_core_port), GFP_KERNEL); + if (!mlxsw_core->ports) + return -ENOMEM; + + return 0; +} + +static void mlxsw_ports_fini(struct mlxsw_core *mlxsw_core) +{ + kfree(mlxsw_core->ports); +} + +unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->max_ports; +} +EXPORT_SYMBOL(mlxsw_core_max_ports); + void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) { return mlxsw_core->driver_priv; @@ -703,91 +711,6 @@ err_out: * Core functions *****************/ -static int mlxsw_core_rx_stats_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_core *mlxsw_core = file->private; - struct mlxsw_core_pcpu_stats *p; - u64 rx_packets, rx_bytes; - u64 tmp_rx_packets, tmp_rx_bytes; - u32 rx_dropped, rx_invalid; - unsigned int start; - int i; - int j; - static const char hdr[] = - " NUM RX_PACKETS RX_BYTES RX_DROPPED\n"; - - seq_printf(file, hdr); - for (i = 0; i < MLXSW_TRAP_ID_MAX; i++) { - rx_packets = 0; - rx_bytes = 0; - rx_dropped = 0; - for_each_possible_cpu(j) { - p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); - do { - start = u64_stats_fetch_begin(&p->syncp); - tmp_rx_packets = p->trap_rx_packets[i]; - tmp_rx_bytes = p->trap_rx_bytes[i]; - } while (u64_stats_fetch_retry(&p->syncp, start)); - - rx_packets += tmp_rx_packets; - rx_bytes += tmp_rx_bytes; - rx_dropped += p->trap_rx_dropped[i]; - } - seq_printf(file, "trap %3d %12llu %12llu %10u\n", - i, rx_packets, rx_bytes, rx_dropped); - } - rx_invalid = 0; - for_each_possible_cpu(j) { - p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); - rx_invalid += p->trap_rx_invalid; - } - seq_printf(file, "trap INV %10u\n", - rx_invalid); - - for (i = 0; i < MLXSW_PORT_MAX_PORTS; i++) { - rx_packets = 0; - rx_bytes = 0; - rx_dropped = 0; - for_each_possible_cpu(j) { - p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); - do { - start = u64_stats_fetch_begin(&p->syncp); - tmp_rx_packets = p->port_rx_packets[i]; - tmp_rx_bytes = p->port_rx_bytes[i]; - } while (u64_stats_fetch_retry(&p->syncp, start)); - - rx_packets += tmp_rx_packets; - rx_bytes += tmp_rx_bytes; - rx_dropped += p->port_rx_dropped[i]; - } - seq_printf(file, "port %3d %12llu %12llu %10u\n", - i, rx_packets, rx_bytes, rx_dropped); - } - rx_invalid = 0; - for_each_possible_cpu(j) { - p = per_cpu_ptr(mlxsw_core->pcpu_stats, j); - rx_invalid += p->port_rx_invalid; - } - seq_printf(file, "port INV %10u\n", - rx_invalid); - return 0; -} - -static int mlxsw_core_rx_stats_dbg_open(struct inode *inode, struct file *f) -{ - struct mlxsw_core *mlxsw_core = inode->i_private; - - return single_open(f, mlxsw_core_rx_stats_dbg_read, mlxsw_core); -} - -static const struct file_operations mlxsw_core_rx_stats_dbg_ops = { - .owner = THIS_MODULE, - .open = mlxsw_core_rx_stats_dbg_open, - .release = single_release, - .read = seq_read, - .llseek = seq_lseek -}; - int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver) { spin_lock(&mlxsw_core_driver_list_lock); @@ -835,39 +758,13 @@ static void mlxsw_core_driver_put(const char *kind) spin_unlock(&mlxsw_core_driver_list_lock); } -static int mlxsw_core_debugfs_init(struct mlxsw_core *mlxsw_core) -{ - const struct mlxsw_bus_info *bus_info = mlxsw_core->bus_info; - - mlxsw_core->dbg_dir = debugfs_create_dir(bus_info->device_name, - mlxsw_core_dbg_root); - if (!mlxsw_core->dbg_dir) - return -ENOMEM; - debugfs_create_file("rx_stats", S_IRUGO, mlxsw_core->dbg_dir, - mlxsw_core, &mlxsw_core_rx_stats_dbg_ops); - mlxsw_core->dbg.vsd_blob.data = (void *) &bus_info->vsd; - mlxsw_core->dbg.vsd_blob.size = sizeof(bus_info->vsd); - debugfs_create_blob("vsd", S_IRUGO, mlxsw_core->dbg_dir, - &mlxsw_core->dbg.vsd_blob); - mlxsw_core->dbg.psid_blob.data = (void *) &bus_info->psid; - mlxsw_core->dbg.psid_blob.size = sizeof(bus_info->psid); - debugfs_create_blob("psid", S_IRUGO, mlxsw_core->dbg_dir, - &mlxsw_core->dbg.psid_blob); - return 0; -} - -static void mlxsw_core_debugfs_fini(struct mlxsw_core *mlxsw_core) -{ - debugfs_remove_recursive(mlxsw_core->dbg_dir); -} - static int mlxsw_devlink_port_split(struct devlink *devlink, unsigned int port_index, unsigned int count) { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - if (port_index >= MLXSW_PORT_MAX_PORTS) + if (port_index >= mlxsw_core->max_ports) return -EINVAL; if (!mlxsw_core->driver->port_split) return -EOPNOTSUPP; @@ -879,7 +776,7 @@ static int mlxsw_devlink_port_unsplit(struct devlink *devlink, { struct mlxsw_core *mlxsw_core = devlink_priv(devlink); - if (port_index >= MLXSW_PORT_MAX_PORTS) + if (port_index >= mlxsw_core->max_ports) return -EINVAL; if (!mlxsw_core->driver->port_unsplit) return -EOPNOTSUPP; @@ -1101,18 +998,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, mlxsw_core->bus_priv = bus_priv; mlxsw_core->bus_info = mlxsw_bus_info; - mlxsw_core->pcpu_stats = - netdev_alloc_pcpu_stats(struct mlxsw_core_pcpu_stats); - if (!mlxsw_core->pcpu_stats) { - err = -ENOMEM; - goto err_alloc_stats; - } - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, &mlxsw_core->res); if (err) goto err_bus_init; + err = mlxsw_ports_init(mlxsw_core); + if (err) + goto err_ports_init; + if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG) && MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG_MEMBERS)) { alloc_size = sizeof(u8) * @@ -1148,15 +1042,8 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_driver_init; } - err = mlxsw_core_debugfs_init(mlxsw_core); - if (err) - goto err_debugfs_init; - return 0; -err_debugfs_init: - if (mlxsw_core->driver->fini) - mlxsw_core->driver->fini(mlxsw_core); err_driver_init: mlxsw_thermal_fini(mlxsw_core->thermal); err_thermal_init: @@ -1167,10 +1054,10 @@ err_devlink_register: err_emad_init: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: + mlxsw_ports_fini(mlxsw_core); +err_ports_init: mlxsw_bus->fini(bus_priv); err_bus_init: - free_percpu(mlxsw_core->pcpu_stats); -err_alloc_stats: devlink_free(devlink); err_devlink_alloc: mlxsw_core_driver_put(device_kind); @@ -1183,15 +1070,14 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) const char *device_kind = mlxsw_core->bus_info->device_kind; struct devlink *devlink = priv_to_devlink(mlxsw_core); - mlxsw_core_debugfs_fini(mlxsw_core); if (mlxsw_core->driver->fini) mlxsw_core->driver->fini(mlxsw_core); mlxsw_thermal_fini(mlxsw_core->thermal); devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); kfree(mlxsw_core->lag.mapping); + mlxsw_ports_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); - free_percpu(mlxsw_core->pcpu_stats); devlink_free(devlink); mlxsw_core_driver_put(device_kind); } @@ -1639,7 +1525,6 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, { struct mlxsw_rx_listener_item *rxl_item; const struct mlxsw_rx_listener *rxl; - struct mlxsw_core_pcpu_stats *pcpu_stats; u8 local_port; bool found = false; @@ -1661,7 +1546,7 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, __func__, local_port, rx_info->trap_id); if ((rx_info->trap_id >= MLXSW_TRAP_ID_MAX) || - (local_port >= MLXSW_PORT_MAX_PORTS)) + (local_port >= mlxsw_core->max_ports)) goto drop; rcu_read_lock(); @@ -1678,26 +1563,10 @@ void mlxsw_core_skb_receive(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, if (!found) goto drop; - pcpu_stats = this_cpu_ptr(mlxsw_core->pcpu_stats); - u64_stats_update_begin(&pcpu_stats->syncp); - pcpu_stats->port_rx_packets[local_port]++; - pcpu_stats->port_rx_bytes[local_port] += skb->len; - pcpu_stats->trap_rx_packets[rx_info->trap_id]++; - pcpu_stats->trap_rx_bytes[rx_info->trap_id] += skb->len; - u64_stats_update_end(&pcpu_stats->syncp); - rxl->func(skb, local_port, rxl_item->priv); return; drop: - if (rx_info->trap_id >= MLXSW_TRAP_ID_MAX) - this_cpu_inc(mlxsw_core->pcpu_stats->trap_rx_invalid); - else - this_cpu_inc(mlxsw_core->pcpu_stats->trap_rx_dropped[rx_info->trap_id]); - if (local_port >= MLXSW_PORT_MAX_PORTS) - this_cpu_inc(mlxsw_core->pcpu_stats->port_rx_invalid); - else - this_cpu_inc(mlxsw_core->pcpu_stats->port_rx_dropped[local_port]); dev_kfree_skb(skb); } EXPORT_SYMBOL(mlxsw_core_skb_receive); @@ -1926,15 +1795,8 @@ static int __init mlxsw_core_module_init(void) err = -ENOMEM; goto err_alloc_ordered_workqueue; } - mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); - if (!mlxsw_core_dbg_root) { - err = -ENOMEM; - goto err_debugfs_create_dir; - } return 0; -err_debugfs_create_dir: - destroy_workqueue(mlxsw_owq); err_alloc_ordered_workqueue: destroy_workqueue(mlxsw_wq); return err; @@ -1942,7 +1804,6 @@ err_alloc_ordered_workqueue: static void __exit mlxsw_core_module_exit(void) { - debugfs_remove_recursive(mlxsw_core_dbg_root); destroy_workqueue(mlxsw_owq); destroy_workqueue(mlxsw_wq); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index cf38cf9027f8..7fb35395adf5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -57,6 +57,8 @@ struct mlxsw_driver; struct mlxsw_bus; struct mlxsw_bus_info; +unsigned int mlxsw_core_max_ports(const struct mlxsw_core *mlxsw_core); + void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c index 5f337715a4da..46304ffb9449 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.c @@ -567,6 +567,89 @@ static char *mlxsw_afa_block_append_action(struct mlxsw_afa_block *block, return oneact + MLXSW_AFA_PAYLOAD_OFFSET; } +/* VLAN Action + * ----------- + * VLAN action is used for manipulating VLANs. It can be used to implement QinQ, + * VLAN translation, change of PCP bits of the VLAN tag, push, pop as swap VLANs + * and more. + */ + +#define MLXSW_AFA_VLAN_CODE 0x02 +#define MLXSW_AFA_VLAN_SIZE 1 + +enum mlxsw_afa_vlan_vlan_tag_cmd { + MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, + MLXSW_AFA_VLAN_VLAN_TAG_CMD_PUSH_TAG, + MLXSW_AFA_VLAN_VLAN_TAG_CMD_POP_TAG, +}; + +enum mlxsw_afa_vlan_cmd { + MLXSW_AFA_VLAN_CMD_NOP, + MLXSW_AFA_VLAN_CMD_SET_OUTER, + MLXSW_AFA_VLAN_CMD_SET_INNER, + MLXSW_AFA_VLAN_CMD_COPY_OUTER_TO_INNER, + MLXSW_AFA_VLAN_CMD_COPY_INNER_TO_OUTER, + MLXSW_AFA_VLAN_CMD_SWAP, +}; + +/* afa_vlan_vlan_tag_cmd + * Tag command: push, pop, nop VLAN header. + */ +MLXSW_ITEM32(afa, vlan, vlan_tag_cmd, 0x00, 29, 3); + +/* afa_vlan_vid_cmd */ +MLXSW_ITEM32(afa, vlan, vid_cmd, 0x04, 29, 3); + +/* afa_vlan_vid */ +MLXSW_ITEM32(afa, vlan, vid, 0x04, 0, 12); + +/* afa_vlan_ethertype_cmd */ +MLXSW_ITEM32(afa, vlan, ethertype_cmd, 0x08, 29, 3); + +/* afa_vlan_ethertype + * Index to EtherTypes in Switch VLAN EtherType Register (SVER). + */ +MLXSW_ITEM32(afa, vlan, ethertype, 0x08, 24, 3); + +/* afa_vlan_pcp_cmd */ +MLXSW_ITEM32(afa, vlan, pcp_cmd, 0x08, 13, 3); + +/* afa_vlan_pcp */ +MLXSW_ITEM32(afa, vlan, pcp, 0x08, 8, 3); + +static inline void +mlxsw_afa_vlan_pack(char *payload, + enum mlxsw_afa_vlan_vlan_tag_cmd vlan_tag_cmd, + enum mlxsw_afa_vlan_cmd vid_cmd, u16 vid, + enum mlxsw_afa_vlan_cmd pcp_cmd, u8 pcp, + enum mlxsw_afa_vlan_cmd ethertype_cmd, u8 ethertype) +{ + mlxsw_afa_vlan_vlan_tag_cmd_set(payload, vlan_tag_cmd); + mlxsw_afa_vlan_vid_cmd_set(payload, vid_cmd); + mlxsw_afa_vlan_vid_set(payload, vid); + mlxsw_afa_vlan_pcp_cmd_set(payload, pcp_cmd); + mlxsw_afa_vlan_pcp_set(payload, pcp); + mlxsw_afa_vlan_ethertype_cmd_set(payload, ethertype_cmd); + mlxsw_afa_vlan_ethertype_set(payload, ethertype); +} + +int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, + u16 vid, u8 pcp, u8 et) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_VLAN_CODE, + MLXSW_AFA_VLAN_SIZE); + + if (!act) + return -ENOBUFS; + mlxsw_afa_vlan_pack(act, MLXSW_AFA_VLAN_VLAN_TAG_CMD_NOP, + MLXSW_AFA_VLAN_CMD_SET_OUTER, vid, + MLXSW_AFA_VLAN_CMD_SET_OUTER, pcp, + MLXSW_AFA_VLAN_CMD_SET_OUTER, et); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_vlan_modify); + /* Trap / Discard Action * --------------------- * The Trap / Discard action enables trapping / mirroring packets to the CPU @@ -677,3 +760,98 @@ err_append_action: return err; } EXPORT_SYMBOL(mlxsw_afa_block_append_fwd); + +/* Policing and Counting Action + * ---------------------------- + * Policing and Counting action is used for binding policer and counter + * to ACL rules. + */ + +#define MLXSW_AFA_POLCNT_CODE 0x08 +#define MLXSW_AFA_POLCNT_SIZE 1 + +enum mlxsw_afa_polcnt_counter_set_type { + /* No count */ + MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Count packets and bytes */ + MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* afa_polcnt_counter_set_type + * Counter set type for flow counters. + */ +MLXSW_ITEM32(afa, polcnt, counter_set_type, 0x04, 24, 8); + +/* afa_polcnt_counter_index + * Counter index for flow counters. + */ +MLXSW_ITEM32(afa, polcnt, counter_index, 0x04, 0, 24); + +static inline void +mlxsw_afa_polcnt_pack(char *payload, + enum mlxsw_afa_polcnt_counter_set_type set_type, + u32 counter_index) +{ + mlxsw_afa_polcnt_counter_set_type_set(payload, set_type); + mlxsw_afa_polcnt_counter_index_set(payload, counter_index); +} + +int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, + u32 counter_index) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_POLCNT_CODE, + MLXSW_AFA_POLCNT_SIZE); + if (!act) + return -ENOBUFS; + mlxsw_afa_polcnt_pack(act, MLXSW_AFA_POLCNT_COUNTER_SET_TYPE_PACKETS_BYTES, + counter_index); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_counter); + +/* Virtual Router and Forwarding Domain Action + * ------------------------------------------- + * Virtual Switch action is used for manipulate the Virtual Router (VR), + * MPLS label space and the Forwarding Identifier (FID). + */ + +#define MLXSW_AFA_VIRFWD_CODE 0x0E +#define MLXSW_AFA_VIRFWD_SIZE 1 + +enum mlxsw_afa_virfwd_fid_cmd { + /* Do nothing */ + MLXSW_AFA_VIRFWD_FID_CMD_NOOP, + /* Set the Forwarding Identifier (FID) to fid */ + MLXSW_AFA_VIRFWD_FID_CMD_SET, +}; + +/* afa_virfwd_fid_cmd */ +MLXSW_ITEM32(afa, virfwd, fid_cmd, 0x08, 29, 3); + +/* afa_virfwd_fid + * The FID value. + */ +MLXSW_ITEM32(afa, virfwd, fid, 0x08, 0, 16); + +static inline void mlxsw_afa_virfwd_pack(char *payload, + enum mlxsw_afa_virfwd_fid_cmd fid_cmd, + u16 fid) +{ + mlxsw_afa_virfwd_fid_cmd_set(payload, fid_cmd); + mlxsw_afa_virfwd_fid_set(payload, fid); +} + +int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid) +{ + char *act = mlxsw_afa_block_append_action(block, + MLXSW_AFA_VIRFWD_CODE, + MLXSW_AFA_VIRFWD_SIZE); + if (!act) + return -ENOBUFS; + mlxsw_afa_virfwd_pack(act, MLXSW_AFA_VIRFWD_FID_CMD_SET, fid); + return 0; +} +EXPORT_SYMBOL(mlxsw_afa_block_append_fid_set); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h index 43f78dcfe394..bd8b91d02880 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_actions.h @@ -62,5 +62,10 @@ void mlxsw_afa_block_jump(struct mlxsw_afa_block *block, u16 group_id); int mlxsw_afa_block_append_drop(struct mlxsw_afa_block *block); int mlxsw_afa_block_append_fwd(struct mlxsw_afa_block *block, u8 local_port, bool in_port); +int mlxsw_afa_block_append_vlan_modify(struct mlxsw_afa_block *block, + u16 vid, u8 pcp, u8 et); +int mlxsw_afa_block_append_counter(struct mlxsw_afa_block *block, + u32 counter_index); +int mlxsw_afa_block_append_fid_set(struct mlxsw_afa_block *block, u16 fid); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h index e4fcba7c2af2..c75e9141e3ec 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.h @@ -54,6 +54,8 @@ enum mlxsw_afk_element { MLXSW_AFK_ELEMENT_DST_IP6_LO, MLXSW_AFK_ELEMENT_DST_L4_PORT, MLXSW_AFK_ELEMENT_SRC_L4_PORT, + MLXSW_AFK_ELEMENT_VID, + MLXSW_AFK_ELEMENT_PCP, MLXSW_AFK_ELEMENT_MAX, }; @@ -88,7 +90,7 @@ struct mlxsw_afk_element_info { MLXSW_AFK_ELEMENT_INFO(MLXSW_AFK_ELEMENT_TYPE_BUF, \ _element, _offset, 0, _size) -/* For the purpose of the driver, define a internal storage scratchpad +/* For the purpose of the driver, define an internal storage scratchpad * that will be used to store key/mask values. For each defined element type * define an internal storage geometry. */ @@ -98,6 +100,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = { MLXSW_AFK_ELEMENT_INFO_BUF(SMAC, 0x0A, 6), MLXSW_AFK_ELEMENT_INFO_U32(ETHERTYPE, 0x00, 0, 16), MLXSW_AFK_ELEMENT_INFO_U32(IP_PROTO, 0x10, 0, 8), + MLXSW_AFK_ELEMENT_INFO_U32(VID, 0x10, 8, 12), + MLXSW_AFK_ELEMENT_INFO_U32(PCP, 0x10, 20, 3), MLXSW_AFK_ELEMENT_INFO_U32(SRC_IP4, 0x18, 0, 32), MLXSW_AFK_ELEMENT_INFO_U32(DST_IP4, 0x1C, 0, 32), MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP6_HI, 0x18, 8), diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index a223c85dfde0..23f7d828cf67 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -44,8 +44,6 @@ #include <linux/skbuff.h> #include <linux/if_vlan.h> #include <linux/log2.h> -#include <linux/debugfs.h> -#include <linux/seq_file.h> #include <linux/string.h> #include "pci_hw.h" @@ -57,8 +55,6 @@ static const char mlxsw_pci_driver_name[] = "mlxsw_pci"; -static struct dentry *mlxsw_pci_dbg_root; - #define mlxsw_pci_write32(mlxsw_pci, reg, val) \ iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg)) #define mlxsw_pci_read32(mlxsw_pci, reg) \ @@ -71,21 +67,6 @@ enum mlxsw_pci_queue_type { MLXSW_PCI_QUEUE_TYPE_EQ, }; -static const char *mlxsw_pci_queue_type_str(enum mlxsw_pci_queue_type q_type) -{ - switch (q_type) { - case MLXSW_PCI_QUEUE_TYPE_SDQ: - return "sdq"; - case MLXSW_PCI_QUEUE_TYPE_RDQ: - return "rdq"; - case MLXSW_PCI_QUEUE_TYPE_CQ: - return "cq"; - case MLXSW_PCI_QUEUE_TYPE_EQ: - return "eq"; - } - BUG(); -} - #define MLXSW_PCI_QUEUE_TYPE_COUNT 4 static const u16 mlxsw_pci_doorbell_type_offset[] = { @@ -155,7 +136,6 @@ struct mlxsw_pci { u8 __iomem *hw_addr; struct mlxsw_pci_queue_type_group queues[MLXSW_PCI_QUEUE_TYPE_COUNT]; u32 doorbell_offset; - struct msix_entry msix_entry; struct mlxsw_core *core; struct { struct mlxsw_pci_mem_item *items; @@ -174,7 +154,6 @@ struct mlxsw_pci { } comp; } cmd; struct mlxsw_bus_info bus_info; - struct dentry *dbg_dir; }; static void mlxsw_pci_queue_tasklet_schedule(struct mlxsw_pci_queue *q) @@ -261,21 +240,11 @@ static u8 mlxsw_pci_sdq_count(struct mlxsw_pci *mlxsw_pci) return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_SDQ); } -static u8 mlxsw_pci_rdq_count(struct mlxsw_pci *mlxsw_pci) -{ - return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_RDQ); -} - static u8 mlxsw_pci_cq_count(struct mlxsw_pci *mlxsw_pci) { return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_CQ); } -static u8 mlxsw_pci_eq_count(struct mlxsw_pci *mlxsw_pci) -{ - return __mlxsw_pci_queue_count(mlxsw_pci, MLXSW_PCI_QUEUE_TYPE_EQ); -} - static struct mlxsw_pci_queue * __mlxsw_pci_queue_get(struct mlxsw_pci *mlxsw_pci, enum mlxsw_pci_queue_type q_type, u8 q_num) @@ -390,26 +359,6 @@ static void mlxsw_pci_sdq_fini(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_hw2sw_sdq(mlxsw_pci->core, q->num); } -static int mlxsw_pci_sdq_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); - struct mlxsw_pci_queue *q; - int i; - static const char hdr[] = - "NUM PROD_COUNT CONS_COUNT COUNT\n"; - - seq_printf(file, hdr); - for (i = 0; i < mlxsw_pci_sdq_count(mlxsw_pci); i++) { - q = mlxsw_pci_sdq_get(mlxsw_pci, i); - spin_lock_bh(&q->lock); - seq_printf(file, "%3d %10d %10d %5d\n", - i, q->producer_counter, q->consumer_counter, - q->count); - spin_unlock_bh(&q->lock); - } - return 0; -} - static int mlxsw_pci_wqe_frag_map(struct mlxsw_pci *mlxsw_pci, char *wqe, int index, char *frag_data, size_t frag_len, int direction) @@ -544,26 +493,6 @@ static void mlxsw_pci_rdq_fini(struct mlxsw_pci *mlxsw_pci, } } -static int mlxsw_pci_rdq_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); - struct mlxsw_pci_queue *q; - int i; - static const char hdr[] = - "NUM PROD_COUNT CONS_COUNT COUNT\n"; - - seq_printf(file, hdr); - for (i = 0; i < mlxsw_pci_rdq_count(mlxsw_pci); i++) { - q = mlxsw_pci_rdq_get(mlxsw_pci, i); - spin_lock_bh(&q->lock); - seq_printf(file, "%3d %10d %10d %5d\n", - i, q->producer_counter, q->consumer_counter, - q->count); - spin_unlock_bh(&q->lock); - } - return 0; -} - static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { @@ -580,7 +509,6 @@ static int mlxsw_pci_cq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_sw2hw_cq_cv_set(mbox, 0); /* CQE ver 0 */ mlxsw_cmd_mbox_sw2hw_cq_c_eqn_set(mbox, MLXSW_PCI_EQ_COMP_NUM); - mlxsw_cmd_mbox_sw2hw_cq_oi_set(mbox, 0); mlxsw_cmd_mbox_sw2hw_cq_st_set(mbox, 0); mlxsw_cmd_mbox_sw2hw_cq_log_cq_size_set(mbox, ilog2(q->count)); for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { @@ -602,27 +530,6 @@ static void mlxsw_pci_cq_fini(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_hw2sw_cq(mlxsw_pci->core, q->num); } -static int mlxsw_pci_cq_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); - - struct mlxsw_pci_queue *q; - int i; - static const char hdr[] = - "NUM CONS_INDEX SDQ_COUNT RDQ_COUNT COUNT\n"; - - seq_printf(file, hdr); - for (i = 0; i < mlxsw_pci_cq_count(mlxsw_pci); i++) { - q = mlxsw_pci_cq_get(mlxsw_pci, i); - spin_lock_bh(&q->lock); - seq_printf(file, "%3d %10d %10d %10d %5d\n", - i, q->consumer_counter, q->u.cq.comp_sdq_count, - q->u.cq.comp_rdq_count, q->count); - spin_unlock_bh(&q->lock); - } - return 0; -} - static void mlxsw_pci_cqe_sdq_handle(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q, u16 consumer_counter_limit, @@ -755,7 +662,6 @@ static int mlxsw_pci_eq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, } mlxsw_cmd_mbox_sw2hw_eq_int_msix_set(mbox, 1); /* MSI-X used */ - mlxsw_cmd_mbox_sw2hw_eq_oi_set(mbox, 0); mlxsw_cmd_mbox_sw2hw_eq_st_set(mbox, 1); /* armed */ mlxsw_cmd_mbox_sw2hw_eq_log_eq_size_set(mbox, ilog2(q->count)); for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { @@ -777,27 +683,6 @@ static void mlxsw_pci_eq_fini(struct mlxsw_pci *mlxsw_pci, mlxsw_cmd_hw2sw_eq(mlxsw_pci->core, q->num); } -static int mlxsw_pci_eq_dbg_read(struct seq_file *file, void *data) -{ - struct mlxsw_pci *mlxsw_pci = dev_get_drvdata(file->private); - struct mlxsw_pci_queue *q; - int i; - static const char hdr[] = - "NUM CONS_COUNT EV_CMD EV_COMP EV_OTHER COUNT\n"; - - seq_printf(file, hdr); - for (i = 0; i < mlxsw_pci_eq_count(mlxsw_pci); i++) { - q = mlxsw_pci_eq_get(mlxsw_pci, i); - spin_lock_bh(&q->lock); - seq_printf(file, "%3d %10d %10d %10d %10d %5d\n", - i, q->consumer_counter, q->u.eq.ev_cmd_count, - q->u.eq.ev_comp_count, q->u.eq.ev_other_count, - q->count); - spin_unlock_bh(&q->lock); - } - return 0; -} - static void mlxsw_pci_eq_cmd_event(struct mlxsw_pci *mlxsw_pci, char *eqe) { mlxsw_pci->cmd.comp.status = mlxsw_pci_eqe_cmd_status_get(eqe); @@ -868,7 +753,6 @@ struct mlxsw_pci_queue_ops { void (*fini)(struct mlxsw_pci *mlxsw_pci, struct mlxsw_pci_queue *q); void (*tasklet)(unsigned long data); - int (*dbg_read)(struct seq_file *s, void *data); u16 elem_count; u8 elem_size; }; @@ -877,7 +761,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_sdq_ops = { .type = MLXSW_PCI_QUEUE_TYPE_SDQ, .init = mlxsw_pci_sdq_init, .fini = mlxsw_pci_sdq_fini, - .dbg_read = mlxsw_pci_sdq_dbg_read, .elem_count = MLXSW_PCI_WQE_COUNT, .elem_size = MLXSW_PCI_WQE_SIZE, }; @@ -886,7 +769,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_rdq_ops = { .type = MLXSW_PCI_QUEUE_TYPE_RDQ, .init = mlxsw_pci_rdq_init, .fini = mlxsw_pci_rdq_fini, - .dbg_read = mlxsw_pci_rdq_dbg_read, .elem_count = MLXSW_PCI_WQE_COUNT, .elem_size = MLXSW_PCI_WQE_SIZE }; @@ -896,7 +778,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_cq_ops = { .init = mlxsw_pci_cq_init, .fini = mlxsw_pci_cq_fini, .tasklet = mlxsw_pci_cq_tasklet, - .dbg_read = mlxsw_pci_cq_dbg_read, .elem_count = MLXSW_PCI_CQE_COUNT, .elem_size = MLXSW_PCI_CQE_SIZE }; @@ -906,7 +787,6 @@ static const struct mlxsw_pci_queue_ops mlxsw_pci_eq_ops = { .init = mlxsw_pci_eq_init, .fini = mlxsw_pci_eq_fini, .tasklet = mlxsw_pci_eq_tasklet, - .dbg_read = mlxsw_pci_eq_dbg_read, .elem_count = MLXSW_PCI_EQE_COUNT, .elem_size = MLXSW_PCI_EQE_SIZE }; @@ -984,9 +864,7 @@ static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox, const struct mlxsw_pci_queue_ops *q_ops, u8 num_qs) { - struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_type_group *queue_group; - char tmp[16]; int i; int err; @@ -1003,10 +881,6 @@ static int mlxsw_pci_queue_group_init(struct mlxsw_pci *mlxsw_pci, char *mbox, } queue_group->count = num_qs; - sprintf(tmp, "%s_stats", mlxsw_pci_queue_type_str(q_ops->type)); - debugfs_create_devm_seqfile(&pdev->dev, tmp, mlxsw_pci->dbg_dir, - q_ops->dbg_read); - return 0; err_queue_init: @@ -1534,7 +1408,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_aqs_init; - err = request_irq(mlxsw_pci->msix_entry.vector, + err = request_irq(pci_irq_vector(pdev, 0), mlxsw_pci_eq_irq_handler, 0, mlxsw_pci->bus_info.device_kind, mlxsw_pci); if (err) { @@ -1567,7 +1441,7 @@ static void mlxsw_pci_fini(void *bus_priv) { struct mlxsw_pci *mlxsw_pci = bus_priv; - free_irq(mlxsw_pci->msix_entry.vector, mlxsw_pci); + free_irq(pci_irq_vector(mlxsw_pci->pdev, 0), mlxsw_pci); mlxsw_pci_aqs_fini(mlxsw_pci); mlxsw_pci_fw_area_fini(mlxsw_pci); mlxsw_pci_mbox_free(mlxsw_pci, &mlxsw_pci->cmd.out_mbox); @@ -1842,8 +1716,8 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_sw_reset; } - err = pci_enable_msix_exact(pdev, &mlxsw_pci->msix_entry, 1); - if (err) { + err = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSIX); + if (err < 0) { dev_err(&pdev->dev, "MSI-X init failed\n"); goto err_msix_init; } @@ -1852,14 +1726,6 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev); mlxsw_pci->bus_info.dev = &pdev->dev; - mlxsw_pci->dbg_dir = debugfs_create_dir(mlxsw_pci->bus_info.device_name, - mlxsw_pci_dbg_root); - if (!mlxsw_pci->dbg_dir) { - dev_err(&pdev->dev, "Failed to create debugfs dir\n"); - err = -ENOMEM; - goto err_dbg_create_dir; - } - err = mlxsw_core_bus_device_register(&mlxsw_pci->bus_info, &mlxsw_pci_bus, mlxsw_pci); if (err) { @@ -1870,9 +1736,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; err_bus_device_register: - debugfs_remove_recursive(mlxsw_pci->dbg_dir); -err_dbg_create_dir: - pci_disable_msix(mlxsw_pci->pdev); + pci_free_irq_vectors(mlxsw_pci->pdev); err_msix_init: err_sw_reset: iounmap(mlxsw_pci->hw_addr); @@ -1892,8 +1756,7 @@ static void mlxsw_pci_remove(struct pci_dev *pdev) struct mlxsw_pci *mlxsw_pci = pci_get_drvdata(pdev); mlxsw_core_bus_device_unregister(mlxsw_pci->core); - debugfs_remove_recursive(mlxsw_pci->dbg_dir); - pci_disable_msix(mlxsw_pci->pdev); + pci_free_irq_vectors(mlxsw_pci->pdev); iounmap(mlxsw_pci->hw_addr); pci_release_regions(mlxsw_pci->pdev); pci_disable_device(mlxsw_pci->pdev); @@ -1916,15 +1779,11 @@ EXPORT_SYMBOL(mlxsw_pci_driver_unregister); static int __init mlxsw_pci_module_init(void) { - mlxsw_pci_dbg_root = debugfs_create_dir(mlxsw_pci_driver_name, NULL); - if (!mlxsw_pci_dbg_root) - return -ENOMEM; return 0; } static void __exit mlxsw_pci_module_exit(void) { - debugfs_remove_recursive(mlxsw_pci_dbg_root); } module_init(mlxsw_pci_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h index 3d42146473b3..c580abba8d34 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/port.h +++ b/drivers/net/ethernet/mellanox/mlxsw/port.h @@ -49,20 +49,12 @@ #define MLXSW_PORT_MID 0xd000 -#define MLXSW_PORT_MAX_PHY_PORTS 0x40 -#define MLXSW_PORT_MAX_PORTS (MLXSW_PORT_MAX_PHY_PORTS + 1) - #define MLXSW_PORT_MAX_IB_PHY_PORTS 36 #define MLXSW_PORT_MAX_IB_PORTS (MLXSW_PORT_MAX_IB_PHY_PORTS + 1) -#define MLXSW_PORT_DEVID_BITS_OFFSET 10 -#define MLXSW_PORT_PHY_BITS_OFFSET 4 -#define MLXSW_PORT_PHY_BITS_MASK (MLXSW_PORT_MAX_PHY_PORTS - 1) - #define MLXSW_PORT_CPU_PORT 0x0 -#define MLXSW_PORT_ROUTER_PORT (MLXSW_PORT_MAX_PHY_PORTS + 2) -#define MLXSW_PORT_DONT_CARE (MLXSW_PORT_MAX_PORTS) +#define MLXSW_PORT_DONT_CARE 0xFF #define MLXSW_PORT_MODULE_MAX_WIDTH 4 diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index d9616daf8a70..83b277c8090e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4125,6 +4125,60 @@ MLXSW_ITEM32(reg, ritr, sp_if_system_port, 0x08, 0, 16); */ MLXSW_ITEM32(reg, ritr, sp_if_vid, 0x18, 0, 12); +/* Shared between ingress/egress */ +enum mlxsw_reg_ritr_counter_set_type { + /* No Count. */ + MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT = 0x0, + /* Basic. Used for router interfaces, counting the following: + * - Error and Discard counters. + * - Unicast, Multicast and Broadcast counters. Sharing the + * same set of counters for the different type of traffic + * (IPv4, IPv6 and mpls). + */ + MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC = 0x9, +}; + +/* reg_ritr_ingress_counter_index + * Counter Index for flow counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ingress_counter_index, 0x38, 0, 24); + +/* reg_ritr_ingress_counter_set_type + * Igress Counter Set Type for router interface counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, ingress_counter_set_type, 0x38, 24, 8); + +/* reg_ritr_egress_counter_index + * Counter Index for flow counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, egress_counter_index, 0x3C, 0, 24); + +/* reg_ritr_egress_counter_set_type + * Egress Counter Set Type for router interface counter. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, egress_counter_set_type, 0x3C, 24, 8); + +static inline void mlxsw_reg_ritr_counter_pack(char *payload, u32 index, + bool enable, bool egress) +{ + enum mlxsw_reg_ritr_counter_set_type set_type; + + if (enable) + set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_BASIC; + else + set_type = MLXSW_REG_RITR_COUNTER_SET_TYPE_NO_COUNT; + mlxsw_reg_ritr_egress_counter_set_type_set(payload, set_type); + + if (egress) + mlxsw_reg_ritr_egress_counter_index_set(payload, index); + else + mlxsw_reg_ritr_ingress_counter_index_set(payload, index); +} + static inline void mlxsw_reg_ritr_rif_pack(char *payload, u16 rif) { MLXSW_REG_ZERO(ritr, payload); @@ -4141,7 +4195,8 @@ static inline void mlxsw_reg_ritr_sp_if_pack(char *payload, bool lag, static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, enum mlxsw_reg_ritr_if_type type, - u16 rif, u16 mtu, const char *mac) + u16 rif, u16 vr_id, u16 mtu, + const char *mac) { bool op = enable ? MLXSW_REG_RITR_RIF_CREATE : MLXSW_REG_RITR_RIF_DEL; @@ -4153,6 +4208,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); mlxsw_reg_ritr_lb_en_set(payload, 1); + mlxsw_reg_ritr_virtual_router_set(payload, vr_id); mlxsw_reg_ritr_mtu_set(payload, mtu); mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); } @@ -4285,6 +4341,129 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload, mlxsw_reg_ratr_eth_destination_mac_memcpy_to(payload, dest_mac); } +/* RICNT - Router Interface Counter Register + * ----------------------------------------- + * The RICNT register retrieves per port performance counters + */ +#define MLXSW_REG_RICNT_ID 0x800B +#define MLXSW_REG_RICNT_LEN 0x100 + +MLXSW_REG_DEFINE(ricnt, MLXSW_REG_RICNT_ID, MLXSW_REG_RICNT_LEN); + +/* reg_ricnt_counter_index + * Counter index + * Access: RW + */ +MLXSW_ITEM32(reg, ricnt, counter_index, 0x04, 0, 24); + +enum mlxsw_reg_ricnt_counter_set_type { + /* No Count. */ + MLXSW_REG_RICNT_COUNTER_SET_TYPE_NO_COUNT = 0x00, + /* Basic. Used for router interfaces, counting the following: + * - Error and Discard counters. + * - Unicast, Multicast and Broadcast counters. Sharing the + * same set of counters for the different type of traffic + * (IPv4, IPv6 and mpls). + */ + MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC = 0x09, +}; + +/* reg_ricnt_counter_set_type + * Counter Set Type for router interface counter + * Access: RW + */ +MLXSW_ITEM32(reg, ricnt, counter_set_type, 0x04, 24, 8); + +enum mlxsw_reg_ricnt_opcode { + /* Nop. Supported only for read access*/ + MLXSW_REG_RICNT_OPCODE_NOP = 0x00, + /* Clear. Setting the clr bit will reset the counter value for + * all counters of the specified Router Interface. + */ + MLXSW_REG_RICNT_OPCODE_CLEAR = 0x08, +}; + +/* reg_ricnt_opcode + * Opcode + * Access: RW + */ +MLXSW_ITEM32(reg, ricnt, op, 0x00, 28, 4); + +/* reg_ricnt_good_unicast_packets + * good unicast packets. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_unicast_packets, 0x08, 0, 64); + +/* reg_ricnt_good_multicast_packets + * good multicast packets. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_multicast_packets, 0x10, 0, 64); + +/* reg_ricnt_good_broadcast_packets + * good broadcast packets + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_broadcast_packets, 0x18, 0, 64); + +/* reg_ricnt_good_unicast_bytes + * A count of L3 data and padding octets not including L2 headers + * for good unicast frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_unicast_bytes, 0x20, 0, 64); + +/* reg_ricnt_good_multicast_bytes + * A count of L3 data and padding octets not including L2 headers + * for good multicast frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_multicast_bytes, 0x28, 0, 64); + +/* reg_ritr_good_broadcast_bytes + * A count of L3 data and padding octets not including L2 headers + * for good broadcast frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, good_broadcast_bytes, 0x30, 0, 64); + +/* reg_ricnt_error_packets + * A count of errored frames that do not pass the router checks. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, error_packets, 0x38, 0, 64); + +/* reg_ricnt_discrad_packets + * A count of non-errored frames that do not pass the router checks. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, discard_packets, 0x40, 0, 64); + +/* reg_ricnt_error_bytes + * A count of L3 data and padding octets not including L2 headers + * for errored frames. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, error_bytes, 0x48, 0, 64); + +/* reg_ricnt_discard_bytes + * A count of L3 data and padding octets not including L2 headers + * for non-errored frames that do not pass the router checks. + * Access: RW + */ +MLXSW_ITEM64(reg, ricnt, discard_bytes, 0x50, 0, 64); + +static inline void mlxsw_reg_ricnt_pack(char *payload, u32 index, + enum mlxsw_reg_ricnt_opcode op) +{ + MLXSW_REG_ZERO(ricnt, payload); + mlxsw_reg_ricnt_op_set(payload, op); + mlxsw_reg_ricnt_counter_index_set(payload, index); + mlxsw_reg_ricnt_counter_set_type_set(payload, + MLXSW_REG_RICNT_COUNTER_SET_TYPE_BASIC); +} + /* RALTA - Router Algorithmic LPM Tree Allocation Register * ------------------------------------------------------- * RALTA is used to allocate the LPM trees of the SHSPM method. @@ -5504,6 +5683,70 @@ static inline void mlxsw_reg_mpsc_pack(char *payload, u8 local_port, bool e, mlxsw_reg_mpsc_rate_set(payload, rate); } +/* MGPC - Monitoring General Purpose Counter Set Register + * The MGPC register retrieves and sets the General Purpose Counter Set. + */ +#define MLXSW_REG_MGPC_ID 0x9081 +#define MLXSW_REG_MGPC_LEN 0x18 + +MLXSW_REG_DEFINE(mgpc, MLXSW_REG_MGPC_ID, MLXSW_REG_MGPC_LEN); + +enum mlxsw_reg_mgpc_counter_set_type { + /* No count */ + MLXSW_REG_MGPC_COUNTER_SET_TYPE_NO_COUT = 0x00, + /* Count packets and bytes */ + MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES = 0x03, + /* Count only packets */ + MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS = 0x05, +}; + +/* reg_mgpc_counter_set_type + * Counter set type. + * Access: OP + */ +MLXSW_ITEM32(reg, mgpc, counter_set_type, 0x00, 24, 8); + +/* reg_mgpc_counter_index + * Counter index. + * Access: Index + */ +MLXSW_ITEM32(reg, mgpc, counter_index, 0x00, 0, 24); + +enum mlxsw_reg_mgpc_opcode { + /* Nop */ + MLXSW_REG_MGPC_OPCODE_NOP = 0x00, + /* Clear counters */ + MLXSW_REG_MGPC_OPCODE_CLEAR = 0x08, +}; + +/* reg_mgpc_opcode + * Opcode. + * Access: OP + */ +MLXSW_ITEM32(reg, mgpc, opcode, 0x04, 28, 4); + +/* reg_mgpc_byte_counter + * Byte counter value. + * Access: RW + */ +MLXSW_ITEM64(reg, mgpc, byte_counter, 0x08, 0, 64); + +/* reg_mgpc_packet_counter + * Packet counter value. + * Access: RW + */ +MLXSW_ITEM64(reg, mgpc, packet_counter, 0x10, 0, 64); + +static inline void mlxsw_reg_mgpc_pack(char *payload, u32 counter_index, + enum mlxsw_reg_mgpc_opcode opcode, + enum mlxsw_reg_mgpc_counter_set_type set_type) +{ + MLXSW_REG_ZERO(mgpc, payload); + mlxsw_reg_mgpc_counter_index_set(payload, counter_index); + mlxsw_reg_mgpc_counter_set_type_set(payload, set_type); + mlxsw_reg_mgpc_opcode_set(payload, opcode); +} + /* SBPR - Shared Buffer Pools Register * ----------------------------------- * The SBPR configures and retrieves the shared buffer pools and configuration. @@ -5960,6 +6203,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rgcr), MLXSW_REG(ritr), MLXSW_REG(ratr), + MLXSW_REG(ricnt), MLXSW_REG(ralta), MLXSW_REG(ralst), MLXSW_REG(raltb), @@ -5977,6 +6221,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(mpar), MLXSW_REG(mlcr), MLXSW_REG(mpsc), + MLXSW_REG(mgpc), MLXSW_REG(sbpr), MLXSW_REG(sbcm), MLXSW_REG(sbpm), diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h index bce8c2e00630..9556d934714b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/resources.h +++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h @@ -43,11 +43,15 @@ enum mlxsw_res_id { MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE, MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE, MLXSW_RES_ID_MAX_TRAP_GROUPS, + MLXSW_RES_ID_COUNTER_POOL_SIZE, MLXSW_RES_ID_MAX_SPAN, + MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES, + MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC, MLXSW_RES_ID_MAX_SYSTEM_PORT, MLXSW_RES_ID_MAX_LAG, MLXSW_RES_ID_MAX_LAG_MEMBERS, MLXSW_RES_ID_MAX_BUFFER_SIZE, + MLXSW_RES_ID_CELL_SIZE, MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS, MLXSW_RES_ID_ACL_MAX_TCAM_RULES, MLXSW_RES_ID_ACL_MAX_REGIONS, @@ -59,6 +63,7 @@ enum mlxsw_res_id { MLXSW_RES_ID_MAX_CPU_POLICERS, MLXSW_RES_ID_MAX_VRS, MLXSW_RES_ID_MAX_RIFS, + MLXSW_RES_ID_MAX_LPM_TREES, /* Internal resources. * Determined by the SW, not queried from the HW. @@ -75,11 +80,15 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002, [MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003, [MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201, + [MLXSW_RES_ID_COUNTER_POOL_SIZE] = 0x2410, [MLXSW_RES_ID_MAX_SPAN] = 0x2420, + [MLXSW_RES_ID_COUNTER_SIZE_PACKETS_BYTES] = 0x2443, + [MLXSW_RES_ID_COUNTER_SIZE_ROUTER_BASIC] = 0x2449, [MLXSW_RES_ID_MAX_SYSTEM_PORT] = 0x2502, [MLXSW_RES_ID_MAX_LAG] = 0x2520, [MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521, [MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802, /* Bytes */ + [MLXSW_RES_ID_CELL_SIZE] = 0x2803, /* Bytes */ [MLXSW_RES_ID_ACL_MAX_TCAM_REGIONS] = 0x2901, [MLXSW_RES_ID_ACL_MAX_TCAM_RULES] = 0x2902, [MLXSW_RES_ID_ACL_MAX_REGIONS] = 0x2903, @@ -91,6 +100,7 @@ static u16 mlxsw_res_ids[] = { [MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13, [MLXSW_RES_ID_MAX_VRS] = 0x2C01, [MLXSW_RES_ID_MAX_RIFS] = 0x2C02, + [MLXSW_RES_ID_MAX_LPM_TREES] = 0x2C30, }; struct mlxsw_res { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 16484f24b7db..88357cee7679 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -66,6 +66,8 @@ #include "port.h" #include "trap.h" #include "txheader.h" +#include "spectrum_cnt.h" +#include "spectrum_dpipe.h" static const char mlxsw_sp_driver_name[] = "mlxsw_spectrum"; static const char mlxsw_sp_driver_version[] = "1.0"; @@ -138,6 +140,60 @@ MLXSW_ITEM32(tx, hdr, fid, 0x08, 0, 16); */ MLXSW_ITEM32(tx, hdr, type, 0x0C, 0, 4); +int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index, u64 *packets, + u64 *bytes) +{ + char mgpc_pl[MLXSW_REG_MGPC_LEN]; + int err; + + mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_NOP, + MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); + if (err) + return err; + *packets = mlxsw_reg_mgpc_packet_counter_get(mgpc_pl); + *bytes = mlxsw_reg_mgpc_byte_counter_get(mgpc_pl); + return 0; +} + +static int mlxsw_sp_flow_counter_clear(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index) +{ + char mgpc_pl[MLXSW_REG_MGPC_LEN]; + + mlxsw_reg_mgpc_pack(mgpc_pl, counter_index, MLXSW_REG_MGPC_OPCODE_CLEAR, + MLXSW_REG_MGPC_COUNTER_SET_TYPE_PACKETS_BYTES); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mgpc), mgpc_pl); +} + +int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, + unsigned int *p_counter_index) +{ + int err; + + err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW, + p_counter_index); + if (err) + return err; + err = mlxsw_sp_flow_counter_clear(mlxsw_sp, *p_counter_index); + if (err) + goto err_counter_clear; + return 0; + +err_counter_clear: + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW, + *p_counter_index); + return err; +} + +void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index) +{ + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_FLOW, + counter_index); +} + static void mlxsw_sp_txhdr_construct(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info) { @@ -304,9 +360,10 @@ static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port) return false; } -static int mlxsw_sp_span_mtu_to_buffsize(int mtu) +static int mlxsw_sp_span_mtu_to_buffsize(const struct mlxsw_sp *mlxsw_sp, + int mtu) { - return MLXSW_SP_BYTES_TO_CELLS(mtu * 5 / 2) + 1; + return mlxsw_sp_bytes_cells(mlxsw_sp, mtu * 5 / 2) + 1; } static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) @@ -319,8 +376,9 @@ static int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu) * updated according to the mtu value */ if (mlxsw_sp_span_is_egress_mirror(port)) { - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, - mlxsw_sp_span_mtu_to_buffsize(mtu)); + u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, mtu); + + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); if (err) { netdev_err(port->dev, "Could not update shared buffer for mirroring\n"); @@ -357,8 +415,10 @@ mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, /* if it is an egress SPAN, bind a shared buffer to it */ if (type == MLXSW_SP_SPAN_EGRESS) { - mlxsw_reg_sbib_pack(sbib_pl, port->local_port, - mlxsw_sp_span_mtu_to_buffsize(port->dev->mtu)); + u32 buffsize = mlxsw_sp_span_mtu_to_buffsize(mlxsw_sp, + port->dev->mtu); + + mlxsw_reg_sbib_pack(sbib_pl, port->local_port, buffsize); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl); if (err) { netdev_err(port->dev, "Could not create shared buffer for mirroring\n"); @@ -745,19 +805,47 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) return 0; } -static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int pg_index, int mtu, - bool pause_en, bool pfc_en, u16 delay) +static u16 mlxsw_sp_pg_buf_threshold_get(const struct mlxsw_sp *mlxsw_sp, + int mtu) { - u16 pg_size = 2 * MLXSW_SP_BYTES_TO_CELLS(mtu); + return 2 * mlxsw_sp_bytes_cells(mlxsw_sp, mtu); +} - delay = pfc_en ? mlxsw_sp_pfc_delay_get(mtu, delay) : - MLXSW_SP_PAUSE_DELAY; +#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */ + +static u16 mlxsw_sp_pfc_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu, + u16 delay) +{ + delay = mlxsw_sp_bytes_cells(mlxsw_sp, DIV_ROUND_UP(delay, + BITS_PER_BYTE)); + return MLXSW_SP_CELL_FACTOR * delay + mlxsw_sp_bytes_cells(mlxsw_sp, + mtu); +} + +/* Maximum delay buffer needed in case of PAUSE frames, in bytes. + * Assumes 100m cable and maximum MTU. + */ +#define MLXSW_SP_PAUSE_DELAY 58752 - if (pause_en || pfc_en) - mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, pg_index, - pg_size + delay, pg_size); +static u16 mlxsw_sp_pg_buf_delay_get(const struct mlxsw_sp *mlxsw_sp, int mtu, + u16 delay, bool pfc, bool pause) +{ + if (pfc) + return mlxsw_sp_pfc_delay_get(mlxsw_sp, mtu, delay); + else if (pause) + return mlxsw_sp_bytes_cells(mlxsw_sp, MLXSW_SP_PAUSE_DELAY); + else + return 0; +} + +static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int index, u16 size, u16 thres, + bool lossy) +{ + if (lossy) + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, index, size); else - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg_index, pg_size); + mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, index, size, + thres); } int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, @@ -778,6 +866,8 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { bool configure = false; bool pfc = false; + bool lossy; + u16 thres; for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { if (prio_tc[j] == i) { @@ -789,7 +879,12 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, if (!configure) continue; - mlxsw_sp_pg_buf_pack(pbmc_pl, i, mtu, pause_en, pfc, delay); + + lossy = !(pfc || pause_en); + thres = mlxsw_sp_pg_buf_threshold_get(mlxsw_sp, mtu); + delay = mlxsw_sp_pg_buf_delay_get(mlxsw_sp, mtu, delay, pfc, + pause_en); + mlxsw_sp_pg_buf_pack(pbmc_pl, i, thres + delay, thres, lossy); } return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); @@ -966,8 +1061,9 @@ mlxsw_sp_port_get_stats64(struct net_device *dev, memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); } -int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, - u16 vid_end, bool is_member, bool untagged) +static int __mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool is_member, bool untagged) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *spvm_pl; @@ -984,6 +1080,26 @@ int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, return err; } +int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, + u16 vid_end, bool is_member, bool untagged) +{ + u16 vid, vid_e; + int err; + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), + vid_end); + + err = __mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, + is_member, untagged); + if (err) + return err; + } + + return 0; +} + static int mlxsw_sp_port_vp_mode_trans(struct mlxsw_sp_port *mlxsw_sp_port) { enum mlxsw_reg_svfa_mt mt = MLXSW_REG_SVFA_MT_PORT_VID_TO_FID; @@ -1368,7 +1484,7 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, tc->cls_mall); return 0; default: - return -EINVAL; + return -EOPNOTSUPP; } case TC_SETUP_CLSFLOWER: switch (tc->cls_flower->command) { @@ -1379,6 +1495,9 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, u32 handle, mlxsw_sp_flower_destroy(mlxsw_sp_port, ingress, tc->cls_flower); return 0; + case TC_CLSFLOWER_STATS: + return mlxsw_sp_flower_stats(mlxsw_sp_port, ingress, + tc->cls_flower); default: return -EOPNOTSUPP; } @@ -1492,6 +1611,7 @@ err_port_pause_configure: struct mlxsw_sp_port_hw_stats { char str[ETH_GSTRING_LEN]; u64 (*getter)(const char *payload); + bool cells_bytes; }; static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = { @@ -1612,17 +1732,11 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = { #define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats) -static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(const char *ppcnt_pl) -{ - u64 transmit_queue = mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl); - - return MLXSW_SP_CELLS_TO_BYTES(transmit_queue); -} - static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_tc_stats[] = { { .str = "tc_transmit_queue_tc", - .getter = mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get, + .getter = mlxsw_reg_ppcnt_tc_transmit_queue_get, + .cells_bytes = true, }, { .str = "tc_no_buffer_discard_uc_tc", @@ -1734,6 +1848,8 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, enum mlxsw_reg_ppcnt_grp grp, int prio, u64 *data, int data_index) { + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_hw_stats *hw_stats; char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; int i, len; @@ -1743,8 +1859,13 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, if (err) return; mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); - for (i = 0; i < len; i++) + for (i = 0; i < len; i++) { data[data_index + i] = hw_stats[i].getter(ppcnt_pl); + if (!hw_stats[i].cells_bytes) + continue; + data[data_index + i] = mlxsw_sp_cells_bytes(mlxsw_sp, + data[data_index + i]); + } } static void mlxsw_sp_port_get_stats(struct net_device *dev, @@ -2537,25 +2658,33 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp) { int i; - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) if (mlxsw_sp_port_created(mlxsw_sp, i)) mlxsw_sp_port_remove(mlxsw_sp, i); + kfree(mlxsw_sp->port_to_module); kfree(mlxsw_sp->ports); } static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp) { + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); u8 module, width, lane; size_t alloc_size; int i; int err; - alloc_size = sizeof(struct mlxsw_sp_port *) * MLXSW_PORT_MAX_PORTS; + alloc_size = sizeof(struct mlxsw_sp_port *) * max_ports; mlxsw_sp->ports = kzalloc(alloc_size, GFP_KERNEL); if (!mlxsw_sp->ports) return -ENOMEM; - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { + mlxsw_sp->port_to_module = kcalloc(max_ports, sizeof(u8), GFP_KERNEL); + if (!mlxsw_sp->port_to_module) { + err = -ENOMEM; + goto err_port_to_module_alloc; + } + + for (i = 1; i < max_ports; i++) { err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module, &width, &lane); if (err) @@ -2575,6 +2704,8 @@ err_port_module_info_get: for (i--; i >= 1; i--) if (mlxsw_sp_port_created(mlxsw_sp, i)) mlxsw_sp_port_remove(mlxsw_sp, i); + kfree(mlxsw_sp->port_to_module); +err_port_to_module_alloc: kfree(mlxsw_sp->ports); return err; } @@ -2877,6 +3008,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, IGMP, false), MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false), MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false), + MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false), /* L3 traps */ MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false), @@ -3158,6 +3290,18 @@ static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core) return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl); } +static int mlxsw_sp_vfid_op(struct mlxsw_sp *mlxsw_sp, u16 fid, bool create); + +static int mlxsw_sp_dummy_fid_init(struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_sp_vfid_op(mlxsw_sp, MLXSW_SP_DUMMY_FID, true); +} + +static void mlxsw_sp_dummy_fid_fini(struct mlxsw_sp *mlxsw_sp) +{ + mlxsw_sp_vfid_op(mlxsw_sp, MLXSW_SP_DUMMY_FID, false); +} + static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { @@ -3224,6 +3368,24 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, goto err_acl_init; } + err = mlxsw_sp_counter_pool_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init counter pool\n"); + goto err_counter_pool_init; + } + + err = mlxsw_sp_dpipe_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init pipeline debug\n"); + goto err_dpipe_init; + } + + err = mlxsw_sp_dummy_fid_init(mlxsw_sp); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Failed to init dummy FID\n"); + goto err_dummy_fid_init; + } + err = mlxsw_sp_ports_create(mlxsw_sp); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Failed to create ports\n"); @@ -3233,6 +3395,12 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, return 0; err_ports_create: + mlxsw_sp_dummy_fid_fini(mlxsw_sp); +err_dummy_fid_init: + mlxsw_sp_dpipe_fini(mlxsw_sp); +err_dpipe_init: + mlxsw_sp_counter_pool_fini(mlxsw_sp); +err_counter_pool_init: mlxsw_sp_acl_fini(mlxsw_sp); err_acl_init: mlxsw_sp_span_fini(mlxsw_sp); @@ -3255,6 +3423,9 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); mlxsw_sp_ports_remove(mlxsw_sp); + mlxsw_sp_dummy_fid_fini(mlxsw_sp); + mlxsw_sp_dpipe_fini(mlxsw_sp); + mlxsw_sp_counter_pool_fini(mlxsw_sp); mlxsw_sp_acl_fini(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); @@ -3326,13 +3497,13 @@ bool mlxsw_sp_port_dev_check(const struct net_device *dev) return dev->netdev_ops == &mlxsw_sp_port_netdev_ops; } -static int mlxsw_lower_dev_walk(struct net_device *lower_dev, void *data) +static int mlxsw_sp_lower_dev_walk(struct net_device *lower_dev, void *data) { - struct mlxsw_sp_port **port = data; + struct mlxsw_sp_port **p_mlxsw_sp_port = data; int ret = 0; if (mlxsw_sp_port_dev_check(lower_dev)) { - *port = netdev_priv(lower_dev); + *p_mlxsw_sp_port = netdev_priv(lower_dev); ret = 1; } @@ -3341,18 +3512,18 @@ static int mlxsw_lower_dev_walk(struct net_device *lower_dev, void *data) static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev) { - struct mlxsw_sp_port *port; + struct mlxsw_sp_port *mlxsw_sp_port; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - port = NULL; - netdev_walk_all_lower_dev(dev, mlxsw_lower_dev_walk, &port); + mlxsw_sp_port = NULL; + netdev_walk_all_lower_dev(dev, mlxsw_sp_lower_dev_walk, &mlxsw_sp_port); - return port; + return mlxsw_sp_port; } -static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) +struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) { struct mlxsw_sp_port *mlxsw_sp_port; @@ -3362,15 +3533,16 @@ static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev) static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev) { - struct mlxsw_sp_port *port; + struct mlxsw_sp_port *mlxsw_sp_port; if (mlxsw_sp_port_dev_check(dev)) return netdev_priv(dev); - port = NULL; - netdev_walk_all_lower_dev_rcu(dev, mlxsw_lower_dev_walk, &port); + mlxsw_sp_port = NULL; + netdev_walk_all_lower_dev_rcu(dev, mlxsw_sp_lower_dev_walk, + &mlxsw_sp_port); - return port; + return mlxsw_sp_port; } struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev) @@ -3390,546 +3562,6 @@ void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port) dev_put(mlxsw_sp_port->dev); } -static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, - unsigned long event) -{ - switch (event) { - case NETDEV_UP: - if (!r) - return true; - r->ref_count++; - return false; - case NETDEV_DOWN: - if (r && --r->ref_count == 0) - return true; - /* It is possible we already removed the RIF ourselves - * if it was assigned to a netdev that is now a bridge - * or LAG slave. - */ - return false; - } - - return false; -} - -static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) -{ - int i; - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) - if (!mlxsw_sp->rifs[i]) - return i; - - return MLXSW_SP_INVALID_RIF; -} - -static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, - bool *p_lagged, u16 *p_system_port) -{ - u8 local_port = mlxsw_sp_vport->local_port; - - *p_lagged = mlxsw_sp_vport->lagged; - *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port; -} - -static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *l3_dev, u16 rif, - bool create) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - bool lagged = mlxsw_sp_vport->lagged; - char ritr_pl[MLXSW_REG_RITR_LEN]; - u16 system_port; - - mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif, - l3_dev->mtu, l3_dev->dev_addr); - - mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port); - mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port, - mlxsw_sp_vport_vid_get(mlxsw_sp_vport)); - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} - -static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport); - -static struct mlxsw_sp_fid * -mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev) -{ - struct mlxsw_sp_fid *f; - - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (!f) - return NULL; - - f->leave = mlxsw_sp_vport_rif_sp_leave; - f->ref_count = 0; - f->dev = l3_dev; - f->fid = fid; - - return f; -} - -static struct mlxsw_sp_rif * -mlxsw_sp_rif_alloc(u16 rif, struct net_device *l3_dev, struct mlxsw_sp_fid *f) -{ - struct mlxsw_sp_rif *r; - - r = kzalloc(sizeof(*r), GFP_KERNEL); - if (!r) - return NULL; - - INIT_LIST_HEAD(&r->nexthop_list); - INIT_LIST_HEAD(&r->neigh_list); - ether_addr_copy(r->addr, l3_dev->dev_addr); - r->mtu = l3_dev->mtu; - r->ref_count = 1; - r->dev = l3_dev; - r->rif = rif; - r->f = f; - - return r; -} - -static struct mlxsw_sp_rif * -mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *l3_dev) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - struct mlxsw_sp_fid *f; - struct mlxsw_sp_rif *r; - u16 fid, rif; - int err; - - rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_INVALID_RIF) - return ERR_PTR(-ERANGE); - - err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true); - if (err) - return ERR_PTR(err); - - fid = mlxsw_sp_rif_sp_to_fid(rif); - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true); - if (err) - goto err_rif_fdb_op; - - f = mlxsw_sp_rfid_alloc(fid, l3_dev); - if (!f) { - err = -ENOMEM; - goto err_rfid_alloc; - } - - r = mlxsw_sp_rif_alloc(rif, l3_dev, f); - if (!r) { - err = -ENOMEM; - goto err_rif_alloc; - } - - f->r = r; - mlxsw_sp->rifs[rif] = r; - - return r; - -err_rif_alloc: - kfree(f); -err_rfid_alloc: - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); -err_rif_fdb_op: - mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); - return ERR_PTR(err); -} - -static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, - struct mlxsw_sp_rif *r) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - struct net_device *l3_dev = r->dev; - struct mlxsw_sp_fid *f = r->f; - u16 fid = f->fid; - u16 rif = r->rif; - - mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r); - - mlxsw_sp->rifs[rif] = NULL; - f->r = NULL; - - kfree(r); - - kfree(f); - - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); - - mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, false); -} - -static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport, - struct net_device *l3_dev) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; - struct mlxsw_sp_rif *r; - - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); - if (!r) { - r = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev); - if (IS_ERR(r)) - return PTR_ERR(r); - } - - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, r->f); - r->f->ref_count++; - - netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", r->f->fid); - - return 0; -} - -static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport) -{ - struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); - - netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); - - mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); - if (--f->ref_count == 0) - mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->r); -} - -static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev, - struct net_device *port_dev, - unsigned long event, u16 vid) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); - struct mlxsw_sp_port *mlxsw_sp_vport; - - mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (WARN_ON(!mlxsw_sp_vport)) - return -EINVAL; - - switch (event) { - case NETDEV_UP: - return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev); - case NETDEV_DOWN: - mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); - break; - } - - return 0; -} - -static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, - unsigned long event) -{ - if (netif_is_bridge_port(port_dev) || netif_is_lag_port(port_dev)) - return 0; - - return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1); -} - -static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, - struct net_device *lag_dev, - unsigned long event, u16 vid) -{ - struct net_device *port_dev; - struct list_head *iter; - int err; - - netdev_for_each_lower_dev(lag_dev, port_dev, iter) { - if (mlxsw_sp_port_dev_check(port_dev)) { - err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev, - event, vid); - if (err) - return err; - } - } - - return 0; -} - -static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, - unsigned long event) -{ - if (netif_is_bridge_port(lag_dev)) - return 0; - - return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); -} - -static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev) -{ - u16 fid; - - if (is_vlan_dev(l3_dev)) - fid = vlan_dev_vlan_id(l3_dev); - else if (mlxsw_sp->master_bridge.dev == l3_dev) - fid = 1; - else - return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev); - - return mlxsw_sp_fid_find(mlxsw_sp, fid); -} - -static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid) -{ - return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID : - MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; -} - -static u16 mlxsw_sp_flood_table_index_get(u16 fid) -{ - return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid; -} - -static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid, - bool set) -{ - enum mlxsw_flood_table_type table_type; - char *sftr_pl; - u16 index; - int err; - - sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); - if (!sftr_pl) - return -ENOMEM; - - table_type = mlxsw_sp_flood_table_type_get(fid); - index = mlxsw_sp_flood_table_index_get(fid); - mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type, - 1, MLXSW_PORT_ROUTER_PORT, set); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); - - kfree(sftr_pl); - return err; -} - -static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) -{ - if (mlxsw_sp_fid_is_vfid(fid)) - return MLXSW_REG_RITR_FID_IF; - else - return MLXSW_REG_RITR_VLAN_IF; -} - -static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev, - u16 fid, u16 rif, - bool create) -{ - enum mlxsw_reg_ritr_if_type rif_type; - char ritr_pl[MLXSW_REG_RITR_LEN]; - - rif_type = mlxsw_sp_rif_type_get(fid); - mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, l3_dev->mtu, - l3_dev->dev_addr); - mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid); - - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} - -static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, - struct net_device *l3_dev, - struct mlxsw_sp_fid *f) -{ - struct mlxsw_sp_rif *r; - u16 rif; - int err; - - rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_INVALID_RIF) - return -ERANGE; - - err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); - if (err) - return err; - - err = mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, true); - if (err) - goto err_rif_bridge_op; - - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); - if (err) - goto err_rif_fdb_op; - - r = mlxsw_sp_rif_alloc(rif, l3_dev, f); - if (!r) { - err = -ENOMEM; - goto err_rif_alloc; - } - - f->r = r; - mlxsw_sp->rifs[rif] = r; - - netdev_dbg(l3_dev, "RIF=%d created\n", rif); - - return 0; - -err_rif_alloc: - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); -err_rif_fdb_op: - mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); -err_rif_bridge_op: - mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); - return err; -} - -void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r) -{ - struct net_device *l3_dev = r->dev; - struct mlxsw_sp_fid *f = r->f; - u16 rif = r->rif; - - mlxsw_sp_router_rif_gone_sync(mlxsw_sp, r); - - mlxsw_sp->rifs[rif] = NULL; - f->r = NULL; - - kfree(r); - - mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); - - mlxsw_sp_rif_bridge_op(mlxsw_sp, l3_dev, f->fid, rif, false); - - mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); - - netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif); -} - -static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, - struct net_device *br_dev, - unsigned long event) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); - struct mlxsw_sp_fid *f; - - /* FID can either be an actual FID if the L3 device is the - * VLAN-aware bridge or a VLAN device on top. Otherwise, the - * L3 device is a VLAN-unaware bridge and we get a vFID. - */ - f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); - if (WARN_ON(!f)) - return -EINVAL; - - switch (event) { - case NETDEV_UP: - return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f); - case NETDEV_DOWN: - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); - break; - } - - return 0; -} - -static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, - unsigned long event) -{ - struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); - u16 vid = vlan_dev_vlan_id(vlan_dev); - - if (mlxsw_sp_port_dev_check(real_dev)) - return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event, - vid); - else if (netif_is_lag_master(real_dev)) - return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, - vid); - else if (netif_is_bridge_master(real_dev) && - mlxsw_sp->master_bridge.dev == real_dev) - return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev, - event); - - return 0; -} - -static int mlxsw_sp_inetaddr_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; - struct net_device *dev = ifa->ifa_dev->dev; - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_rif *r; - int err = 0; - - mlxsw_sp = mlxsw_sp_lower_get(dev); - if (!mlxsw_sp) - goto out; - - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!mlxsw_sp_rif_should_config(r, event)) - goto out; - - if (mlxsw_sp_port_dev_check(dev)) - err = mlxsw_sp_inetaddr_port_event(dev, event); - else if (netif_is_lag_master(dev)) - err = mlxsw_sp_inetaddr_lag_event(dev, event); - else if (netif_is_bridge_master(dev)) - err = mlxsw_sp_inetaddr_bridge_event(dev, dev, event); - else if (is_vlan_dev(dev)) - err = mlxsw_sp_inetaddr_vlan_event(dev, event); - -out: - return notifier_from_errno(err); -} - -static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif, - const char *mac, int mtu) -{ - char ritr_pl[MLXSW_REG_RITR_LEN]; - int err; - - mlxsw_reg_ritr_rif_pack(ritr_pl, rif); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); - if (err) - return err; - - mlxsw_reg_ritr_mtu_set(ritr_pl, mtu); - mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac); - mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} - -static int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) -{ - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_rif *r; - int err; - - mlxsw_sp = mlxsw_sp_lower_get(dev); - if (!mlxsw_sp) - return 0; - - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!r) - return 0; - - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, false); - if (err) - return err; - - err = mlxsw_sp_rif_edit(mlxsw_sp, r->rif, dev->dev_addr, dev->mtu); - if (err) - goto err_rif_edit; - - err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, r->f->fid, true); - if (err) - goto err_rif_fdb_op; - - ether_addr_copy(r->addr, dev->dev_addr); - r->mtu = dev->mtu; - - netdev_dbg(dev, "Updated RIF=%d\n", r->rif); - - return 0; - -err_rif_fdb_op: - mlxsw_sp_rif_edit(mlxsw_sp, r->rif, r->addr, r->mtu); -err_rif_edit: - mlxsw_sp_rif_fdb_op(mlxsw_sp, r->addr, r->f->fid, true); - return err; -} - static bool mlxsw_sp_lag_port_fid_member(struct mlxsw_sp_port *lag_port, u16 fid) { @@ -4220,7 +3852,7 @@ static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, - u16 lag_id) + struct net_device *lag_dev, u16 lag_id) { struct mlxsw_sp_port *mlxsw_sp_vport; struct mlxsw_sp_fid *f; @@ -4238,6 +3870,7 @@ mlxsw_sp_port_pvid_vport_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_vport->lag_id = lag_id; mlxsw_sp_vport->lagged = 1; + mlxsw_sp_vport->dev = lag_dev; } static void @@ -4254,6 +3887,7 @@ mlxsw_sp_port_pvid_vport_lag_leave(struct mlxsw_sp_port *mlxsw_sp_port) if (f) f->leave(mlxsw_sp_vport); + mlxsw_sp_vport->dev = mlxsw_sp_port->dev; mlxsw_sp_vport->lagged = 0; } @@ -4293,7 +3927,7 @@ static int mlxsw_sp_port_lag_join(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port->lagged = 1; lag->ref_count++; - mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_id); + mlxsw_sp_port_pvid_vport_lag_join(mlxsw_sp_port, lag_dev, lag_id); return 0; @@ -4403,6 +4037,56 @@ static void mlxsw_sp_port_vlan_unlink(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_vport->dev = mlxsw_sp_port->dev; } +static int mlxsw_sp_port_stp_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool enable) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + enum mlxsw_reg_spms_state spms_state; + char *spms_pl; + u16 vid; + int err; + + spms_state = enable ? MLXSW_REG_SPMS_STATE_FORWARDING : + MLXSW_REG_SPMS_STATE_DISCARDING; + + spms_pl = kmalloc(MLXSW_REG_SPMS_LEN, GFP_KERNEL); + if (!spms_pl) + return -ENOMEM; + mlxsw_reg_spms_pack(spms_pl, mlxsw_sp_port->local_port); + + for (vid = 0; vid < VLAN_N_VID; vid++) + mlxsw_reg_spms_vid_pack(spms_pl, vid, spms_state); + + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spms), spms_pl); + kfree(spms_pl); + return err; +} + +static int mlxsw_sp_port_ovs_join(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_stp_set(mlxsw_sp_port, true); + if (err) + return err; + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, + true, false); + if (err) + goto err_port_vlan_set; + return 0; + +err_port_vlan_set: + mlxsw_sp_port_stp_set(mlxsw_sp_port, false); + return err; +} + +static void mlxsw_sp_port_ovs_leave(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port_vlan_set(mlxsw_sp_port, 2, VLAN_N_VID - 1, + false, false); + mlxsw_sp_port_stp_set(mlxsw_sp_port, false); +} + static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, unsigned long event, void *ptr) { @@ -4421,7 +4105,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, upper_dev = info->upper_dev; if (!is_vlan_dev(upper_dev) && !netif_is_lag_master(upper_dev) && - !netif_is_bridge_master(upper_dev)) + !netif_is_bridge_master(upper_dev) && + !netif_is_ovs_master(upper_dev)) return -EINVAL; if (!info->linking) break; @@ -4438,6 +4123,10 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, if (netif_is_lag_port(dev) && is_vlan_dev(upper_dev) && !netif_is_lag_master(vlan_dev_real_dev(upper_dev))) return -EINVAL; + if (netif_is_ovs_master(upper_dev) && vlan_uses_dev(dev)) + return -EINVAL; + if (netif_is_ovs_port(dev) && is_vlan_dev(upper_dev)) + return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; @@ -4446,8 +4135,8 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, err = mlxsw_sp_port_vlan_link(mlxsw_sp_port, upper_dev); else - mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, - upper_dev); + mlxsw_sp_port_vlan_unlink(mlxsw_sp_port, + upper_dev); } else if (netif_is_bridge_master(upper_dev)) { if (info->linking) err = mlxsw_sp_port_bridge_join(mlxsw_sp_port, @@ -4461,6 +4150,11 @@ static int mlxsw_sp_netdevice_port_upper_event(struct net_device *dev, else mlxsw_sp_port_lag_leave(mlxsw_sp_port, upper_dev); + } else if (netif_is_ovs_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_port_ovs_join(mlxsw_sp_port); + else + mlxsw_sp_port_ovs_leave(mlxsw_sp_port); } else { err = -EINVAL; WARN_ON(1); @@ -4552,8 +4246,8 @@ static void mlxsw_sp_master_bridge_vlan_unlink(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f; f = mlxsw_sp_fid_find(mlxsw_sp, fid); - if (f && f->r) - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + if (f && f->rif) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); if (f && --f->ref_count == 0) mlxsw_sp_fid_destroy(mlxsw_sp, f); } @@ -4564,33 +4258,40 @@ static int mlxsw_sp_netdevice_bridge_event(struct net_device *br_dev, struct netdev_notifier_changeupper_info *info; struct net_device *upper_dev; struct mlxsw_sp *mlxsw_sp; - int err; + int err = 0; mlxsw_sp = mlxsw_sp_lower_get(br_dev); if (!mlxsw_sp) return 0; - if (br_dev != mlxsw_sp->master_bridge.dev) - return 0; info = ptr; switch (event) { - case NETDEV_CHANGEUPPER: + case NETDEV_PRECHANGEUPPER: upper_dev = info->upper_dev; if (!is_vlan_dev(upper_dev)) - break; - if (info->linking) { - err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp, - upper_dev); - if (err) - return err; + return -EINVAL; + if (is_vlan_dev(upper_dev) && + br_dev != mlxsw_sp->master_bridge.dev) + return -EINVAL; + break; + case NETDEV_CHANGEUPPER: + upper_dev = info->upper_dev; + if (is_vlan_dev(upper_dev)) { + if (info->linking) + err = mlxsw_sp_master_bridge_vlan_link(mlxsw_sp, + upper_dev); + else + mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, + upper_dev); } else { - mlxsw_sp_master_bridge_vlan_unlink(mlxsw_sp, upper_dev); + err = -EINVAL; + WARN_ON(1); } break; } - return 0; + return err; } static u16 mlxsw_sp_avail_vfid_get(const struct mlxsw_sp *mlxsw_sp) @@ -4657,8 +4358,8 @@ static void mlxsw_sp_vfid_destroy(struct mlxsw_sp *mlxsw_sp, clear_bit(vfid, mlxsw_sp->vfids.mapped); list_del(&f->list); - if (f->r) - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + if (f->rif) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); kfree(f); @@ -4810,6 +4511,8 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, int err = 0; mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (!mlxsw_sp_vport) + return 0; switch (event) { case NETDEV_PRECHANGEUPPER: @@ -4821,22 +4524,24 @@ static int mlxsw_sp_netdevice_vport_event(struct net_device *dev, /* We can't have multiple VLAN interfaces configured on * the same port and being members in the same bridge. */ - if (!mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, + if (netif_is_bridge_master(upper_dev) && + !mlxsw_sp_port_master_bridge_check(mlxsw_sp_port, upper_dev)) return -EINVAL; break; case NETDEV_CHANGEUPPER: upper_dev = info->upper_dev; - if (info->linking) { - if (WARN_ON(!mlxsw_sp_vport)) - return -EINVAL; - err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport, - upper_dev); + if (netif_is_bridge_master(upper_dev)) { + if (info->linking) + err = mlxsw_sp_vport_bridge_join(mlxsw_sp_vport, + upper_dev); + else + mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport); } else { - if (!mlxsw_sp_vport) - return 0; - mlxsw_sp_vport_bridge_leave(mlxsw_sp_vport); + err = -EINVAL; + WARN_ON(1); } + break; } return err; @@ -4878,6 +4583,15 @@ static int mlxsw_sp_netdevice_vlan_event(struct net_device *vlan_dev, return 0; } +static bool mlxsw_sp_is_vrf_event(unsigned long event, void *ptr) +{ + struct netdev_notifier_changeupper_info *info = ptr; + + if (event != NETDEV_PRECHANGEUPPER && event != NETDEV_CHANGEUPPER) + return false; + return netif_is_l3_master(info->upper_dev); +} + static int mlxsw_sp_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -4886,6 +4600,8 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused, if (event == NETDEV_CHANGEADDR || event == NETDEV_CHANGEMTU) err = mlxsw_sp_netdevice_router_port_event(dev); + else if (mlxsw_sp_is_vrf_event(event, ptr)) + err = mlxsw_sp_netdevice_vrf_event(dev, event, ptr); else if (mlxsw_sp_port_dev_check(dev)) err = mlxsw_sp_netdevice_port_event(dev, event, ptr); else if (netif_is_lag_master(dev)) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 13ec85e7c392..0c23bc1e946d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -57,41 +57,21 @@ #define MLXSW_SP_VFID_BASE VLAN_N_VID #define MLXSW_SP_VFID_MAX 1024 /* Bridged VLAN interfaces */ +#define MLXSW_SP_DUMMY_FID 15359 + #define MLXSW_SP_RFID_BASE 15360 -#define MLXSW_SP_INVALID_RIF 0xffff #define MLXSW_SP_MID_MAX 7000 #define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4 -#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */ -#define MLXSW_SP_LPM_TREE_MAX 22 -#define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN) - #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ -#define MLXSW_SP_BYTES_PER_CELL 96 - -#define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL) -#define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) - #define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ #define MLXSW_SP_KVD_GRANULARITY 128 -/* Maximum delay buffer needed in case of PAUSE frames, in cells. - * Assumes 100m cable and maximum MTU. - */ -#define MLXSW_SP_PAUSE_DELAY 612 - -#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */ - -static inline u16 mlxsw_sp_pfc_delay_get(int mtu, u16 delay) -{ - delay = MLXSW_SP_BYTES_TO_CELLS(DIV_ROUND_UP(delay, BITS_PER_BYTE)); - return MLXSW_SP_CELL_FACTOR * delay + MLXSW_SP_BYTES_TO_CELLS(mtu); -} - struct mlxsw_sp_port; +struct mlxsw_sp_rif; struct mlxsw_sp_upper { struct net_device *dev; @@ -103,21 +83,10 @@ struct mlxsw_sp_fid { struct list_head list; unsigned int ref_count; struct net_device *dev; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; u16 fid; }; -struct mlxsw_sp_rif { - struct list_head nexthop_list; - struct list_head neigh_list; - struct net_device *dev; - unsigned int ref_count; - struct mlxsw_sp_fid *f; - unsigned char addr[ETH_ALEN]; - int mtu; - u16 rif; -}; - struct mlxsw_sp_mid { struct list_head list; unsigned char addr[ETH_ALEN]; @@ -138,17 +107,7 @@ static inline u16 mlxsw_sp_fid_to_vfid(u16 fid) static inline bool mlxsw_sp_fid_is_vfid(u16 fid) { - return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_RFID_BASE; -} - -static inline bool mlxsw_sp_fid_is_rfid(u16 fid) -{ - return fid >= MLXSW_SP_RFID_BASE; -} - -static inline u16 mlxsw_sp_rif_sp_to_fid(u16 rif) -{ - return MLXSW_SP_RFID_BASE + rif; + return fid >= MLXSW_SP_VFID_BASE && fid < MLXSW_SP_DUMMY_FID; } struct mlxsw_sp_sb_pr { @@ -177,12 +136,15 @@ struct mlxsw_sp_sb_pm { #define MLXSW_SP_SB_POOL_COUNT 4 #define MLXSW_SP_SB_TC_COUNT 8 +struct mlxsw_sp_sb_port { + struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; + struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; +}; + struct mlxsw_sp_sb { struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT]; - struct { - struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; - struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; - } ports[MLXSW_PORT_MAX_PORTS]; + struct mlxsw_sp_sb_port *ports; + u32 cell_size; }; #define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE) @@ -207,11 +169,9 @@ struct mlxsw_sp_fib; struct mlxsw_sp_vr { u16 id; /* virtual router ID */ - bool used; - enum mlxsw_sp_l3proto proto; u32 tb_id; /* kernel fib table id */ - struct mlxsw_sp_lpm_tree *lpm_tree; - struct mlxsw_sp_fib *fib; + unsigned int rif_count; + struct mlxsw_sp_fib *fib4; }; enum mlxsw_sp_span_type { @@ -253,12 +213,15 @@ struct mlxsw_sp_port_mall_tc_entry { }; struct mlxsw_sp_router { - struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; struct mlxsw_sp_vr *vrs; struct rhashtable neigh_ht; struct rhashtable nexthop_group_ht; struct rhashtable nexthop_ht; struct { + struct mlxsw_sp_lpm_tree *trees; + unsigned int tree_count; + } lpm; + struct { struct delayed_work dw; unsigned long interval; /* ms */ } neighs_update; @@ -269,6 +232,7 @@ struct mlxsw_sp_router { }; struct mlxsw_sp_acl; +struct mlxsw_sp_counter_pool; struct mlxsw_sp { struct { @@ -296,7 +260,7 @@ struct mlxsw_sp { u32 ageing_time; struct mlxsw_sp_upper master_bridge; struct mlxsw_sp_upper *lags; - u8 port_to_module[MLXSW_PORT_MAX_PORTS]; + u8 *port_to_module; struct mlxsw_sp_sb sb; struct mlxsw_sp_router router; struct mlxsw_sp_acl *acl; @@ -304,6 +268,7 @@ struct mlxsw_sp { DECLARE_BITMAP(usage, MLXSW_SP_KVD_LINEAR_SIZE); } kvdl; + struct mlxsw_sp_counter_pool *counter_pool; struct { struct mlxsw_sp_span_entry *entries; int entries_count; @@ -317,6 +282,18 @@ mlxsw_sp_lag_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id) return &mlxsw_sp->lags[lag_id]; } +static inline u32 mlxsw_sp_cells_bytes(const struct mlxsw_sp *mlxsw_sp, + u32 cells) +{ + return mlxsw_sp->sb.cell_size * cells; +} + +static inline u32 mlxsw_sp_bytes_cells(const struct mlxsw_sp *mlxsw_sp, + u32 bytes) +{ + return DIV_ROUND_UP(bytes, mlxsw_sp->sb.cell_size); +} + struct mlxsw_sp_port_pcpu_stats { u64 rx_packets; u64 rx_bytes; @@ -386,6 +363,7 @@ struct mlxsw_sp_port { }; bool mlxsw_sp_port_dev_check(const struct net_device *dev); +struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev); struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); void mlxsw_sp_port_dev_put(struct mlxsw_sp_port *mlxsw_sp_port); @@ -497,19 +475,6 @@ mlxsw_sp_vfid_find(const struct mlxsw_sp *mlxsw_sp, return NULL; } -static inline struct mlxsw_sp_rif * -mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) -{ - int i; - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) - if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) - return mlxsw_sp->rifs[i]; - - return NULL; -} - enum mlxsw_sp_flood_table { MLXSW_SP_FLOOD_TABLE_UC, MLXSW_SP_FLOOD_TABLE_BC, @@ -570,8 +535,6 @@ int mlxsw_sp_rif_fdb_op(struct mlxsw_sp *mlxsw_sp, const char *mac, u16 fid, bool adding); struct mlxsw_sp_fid *mlxsw_sp_fid_create(struct mlxsw_sp *mlxsw_sp, u16 fid); void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f); -void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r); int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, bool dwrr, u8 dwrr_weight); @@ -608,10 +571,16 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_router_netevent_event(struct notifier_block *unused, unsigned long event, void *ptr); -void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r); +int mlxsw_sp_netdevice_router_port_event(struct net_device *dev); +int mlxsw_sp_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr); +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif); +int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, + struct netdev_notifier_changeupper_info *info); -int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count); +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, + u32 *p_entry_index); void mlxsw_sp_kvdl_free(struct mlxsw_sp *mlxsw_sp, int entry_index); struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl); @@ -620,6 +589,8 @@ struct mlxsw_sp_acl_rule_info { unsigned int priority; struct mlxsw_afk_element_values values; struct mlxsw_afa_block *act_block; + unsigned int counter_index; + bool counter_valid; }; enum mlxsw_sp_acl_profile { @@ -639,6 +610,8 @@ struct mlxsw_sp_acl_profile_ops { void *ruleset_priv, void *rule_priv, struct mlxsw_sp_acl_rule_info *rulei); void (*rule_del)(struct mlxsw_sp *mlxsw_sp, void *rule_priv); + int (*rule_activity_get)(struct mlxsw_sp *mlxsw_sp, void *rule_priv, + bool *activity); }; struct mlxsw_sp_acl_ops { @@ -679,6 +652,14 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei); int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_rule_info *rulei, struct net_device *out_dev); +int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 action, u16 vid, u16 proto, u8 prio); +int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei); +int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u16 fid); struct mlxsw_sp_acl_rule; @@ -698,6 +679,9 @@ mlxsw_sp_acl_rule_lookup(struct mlxsw_sp *mlxsw_sp, unsigned long cookie); struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule); +int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + u64 *packets, u64 *bytes, u64 *last_use); int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp); @@ -708,5 +692,14 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, __be16 protocol, struct tc_cls_flower_offload *f); void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, struct tc_cls_flower_offload *f); +int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, + struct tc_cls_flower_offload *f); +int mlxsw_sp_flow_counter_get(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index, u64 *packets, + u64 *bytes); +int mlxsw_sp_flow_counter_alloc(struct mlxsw_sp *mlxsw_sp, + unsigned int *p_counter_index); +void mlxsw_sp_flow_counter_free(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index); #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 8a18b3aa70dc..317f7b14627f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@ -39,6 +39,7 @@ #include <linux/string.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> +#include <net/tc_act/tc_vlan.h> #include "reg.h" #include "core.h" @@ -49,10 +50,17 @@ #include "spectrum_acl_flex_keys.h" struct mlxsw_sp_acl { + struct mlxsw_sp *mlxsw_sp; struct mlxsw_afk *afk; struct mlxsw_afa *afa; const struct mlxsw_sp_acl_ops *ops; struct rhashtable ruleset_ht; + struct list_head rules; + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ +#define MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS 1000 + } rule_activity_update; unsigned long priv[0]; /* priv has to be always the last item */ }; @@ -79,9 +87,13 @@ struct mlxsw_sp_acl_ruleset { struct mlxsw_sp_acl_rule { struct rhash_head ht_node; /* Member of rule HT */ + struct list_head list; unsigned long cookie; /* HT key */ struct mlxsw_sp_acl_ruleset *ruleset; struct mlxsw_sp_acl_rule_info *rulei; + u64 last_used; + u64 last_packets; + u64 last_bytes; unsigned long priv[0]; /* priv has to be always the last item */ }; @@ -237,6 +249,27 @@ void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); } +static int +mlxsw_sp_acl_rulei_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei) +{ + int err; + + err = mlxsw_sp_flow_counter_alloc(mlxsw_sp, &rulei->counter_index); + if (err) + return err; + rulei->counter_valid = true; + return 0; +} + +static void +mlxsw_sp_acl_rulei_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei) +{ + rulei->counter_valid = false; + mlxsw_sp_flow_counter_free(mlxsw_sp, rulei->counter_index); +} + struct mlxsw_sp_acl_rule_info * mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl) { @@ -335,6 +368,48 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp, local_port, in_port); } +int mlxsw_sp_acl_rulei_act_vlan(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u32 action, u16 vid, u16 proto, u8 prio) +{ + u8 ethertype; + + if (action == TCA_VLAN_ACT_MODIFY) { + switch (proto) { + case ETH_P_8021Q: + ethertype = 0; + break; + case ETH_P_8021AD: + ethertype = 1; + break; + default: + dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN protocol %#04x\n", + proto); + return -EINVAL; + } + + return mlxsw_afa_block_append_vlan_modify(rulei->act_block, + vid, prio, ethertype); + } else { + dev_err(mlxsw_sp->bus_info->dev, "Unsupported VLAN action\n"); + return -EINVAL; + } +} + +int mlxsw_sp_acl_rulei_act_count(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei) +{ + return mlxsw_afa_block_append_counter(rulei->act_block, + rulei->counter_index); +} + +int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule_info *rulei, + u16 fid) +{ + return mlxsw_afa_block_append_fid_set(rulei->act_block, fid); +} + struct mlxsw_sp_acl_rule * mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset, @@ -358,8 +433,14 @@ mlxsw_sp_acl_rule_create(struct mlxsw_sp *mlxsw_sp, err = PTR_ERR(rule->rulei); goto err_rulei_create; } + + err = mlxsw_sp_acl_rulei_counter_alloc(mlxsw_sp, rule->rulei); + if (err) + goto err_counter_alloc; return rule; +err_counter_alloc: + mlxsw_sp_acl_rulei_destroy(rule->rulei); err_rulei_create: kfree(rule); err_alloc: @@ -372,6 +453,7 @@ void mlxsw_sp_acl_rule_destroy(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + mlxsw_sp_acl_rulei_counter_free(mlxsw_sp, rule->rulei); mlxsw_sp_acl_rulei_destroy(rule->rulei); kfree(rule); mlxsw_sp_acl_ruleset_ref_dec(mlxsw_sp, ruleset); @@ -393,6 +475,7 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp, if (err) goto err_rhashtable_insert; + list_add_tail(&rule->list, &mlxsw_sp->acl->rules); return 0; err_rhashtable_insert: @@ -406,6 +489,7 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + list_del(&rule->list); rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node, mlxsw_sp_acl_rule_ht_params); ops->rule_del(mlxsw_sp, rule->priv); @@ -426,6 +510,90 @@ mlxsw_sp_acl_rule_rulei(struct mlxsw_sp_acl_rule *rule) return rule->rulei; } +static int mlxsw_sp_acl_rule_activity_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule) +{ + struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; + const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + bool active; + int err; + + err = ops->rule_activity_get(mlxsw_sp, rule->priv, &active); + if (err) + return err; + if (active) + rule->last_used = jiffies; + return 0; +} + +static int mlxsw_sp_acl_rules_activity_update(struct mlxsw_sp_acl *acl) +{ + struct mlxsw_sp_acl_rule *rule; + int err; + + /* Protect internal structures from changes */ + rtnl_lock(); + list_for_each_entry(rule, &acl->rules, list) { + err = mlxsw_sp_acl_rule_activity_update(acl->mlxsw_sp, + rule); + if (err) + goto err_rule_update; + } + rtnl_unlock(); + return 0; + +err_rule_update: + rtnl_unlock(); + return err; +} + +static void mlxsw_sp_acl_rule_activity_work_schedule(struct mlxsw_sp_acl *acl) +{ + unsigned long interval = acl->rule_activity_update.interval; + + mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_acl_rul_activity_update_work(struct work_struct *work) +{ + struct mlxsw_sp_acl *acl = container_of(work, struct mlxsw_sp_acl, + rule_activity_update.dw.work); + int err; + + err = mlxsw_sp_acl_rules_activity_update(acl); + if (err) + dev_err(acl->mlxsw_sp->bus_info->dev, "Could not update acl activity"); + + mlxsw_sp_acl_rule_activity_work_schedule(acl); +} + +int mlxsw_sp_acl_rule_get_stats(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_rule *rule, + u64 *packets, u64 *bytes, u64 *last_use) + +{ + struct mlxsw_sp_acl_rule_info *rulei; + u64 current_packets; + u64 current_bytes; + int err; + + rulei = mlxsw_sp_acl_rule_rulei(rule); + err = mlxsw_sp_flow_counter_get(mlxsw_sp, rulei->counter_index, + ¤t_packets, ¤t_bytes); + if (err) + return err; + + *packets = current_packets - rule->last_packets; + *bytes = current_bytes - rule->last_bytes; + *last_use = rule->last_used; + + rule->last_bytes = current_bytes; + rule->last_packets = current_packets; + + return 0; +} + #define MLXSW_SP_KDVL_ACT_EXT_SIZE 1 static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, @@ -434,7 +602,6 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, struct mlxsw_sp *mlxsw_sp = priv; char pefa_pl[MLXSW_REG_PEFA_LEN]; u32 kvdl_index; - int ret; int err; /* The first action set of a TCAM entry is stored directly in TCAM, @@ -443,10 +610,10 @@ static int mlxsw_sp_act_kvdl_set_add(void *priv, u32 *p_kvdl_index, if (is_first) return 0; - ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE); - if (ret < 0) - return ret; - kvdl_index = ret; + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KDVL_ACT_EXT_SIZE, + &kvdl_index); + if (err) + return err; mlxsw_reg_pefa_pack(pefa_pl, kvdl_index, enc_actions); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pefa), pefa_pl); if (err) @@ -475,13 +642,11 @@ static int mlxsw_sp_act_kvdl_fwd_entry_add(void *priv, u32 *p_kvdl_index, struct mlxsw_sp *mlxsw_sp = priv; char ppbs_pl[MLXSW_REG_PPBS_LEN]; u32 kvdl_index; - int ret; int err; - ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1); - if (ret < 0) - return ret; - kvdl_index = ret; + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, 1, &kvdl_index); + if (err) + return err; mlxsw_reg_ppbs_pack(ppbs_pl, kvdl_index, local_port); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ppbs), ppbs_pl); if (err) @@ -518,7 +683,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) if (!acl) return -ENOMEM; mlxsw_sp->acl = acl; - + acl->mlxsw_sp = mlxsw_sp; acl->afk = mlxsw_afk_create(MLXSW_CORE_RES_GET(mlxsw_sp->core, ACL_FLEX_KEYS), mlxsw_sp_afk_blocks, @@ -541,11 +706,18 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_rhashtable_init; + INIT_LIST_HEAD(&acl->rules); err = acl_ops->init(mlxsw_sp, acl->priv); if (err) goto err_acl_ops_init; acl->ops = acl_ops; + + /* Create the delayed work for the rule activity_update */ + INIT_DELAYED_WORK(&acl->rule_activity_update.dw, + mlxsw_sp_acl_rul_activity_update_work); + acl->rule_activity_update.interval = MLXSW_SP_ACL_RULE_ACTIVITY_UPDATE_PERIOD_MS; + mlxsw_core_schedule_dw(&acl->rule_activity_update.dw, 0); return 0; err_acl_ops_init: @@ -564,7 +736,9 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp *mlxsw_sp) struct mlxsw_sp_acl *acl = mlxsw_sp->acl; const struct mlxsw_sp_acl_ops *acl_ops = acl->ops; + cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw); acl_ops->fini(mlxsw_sp, acl->priv); + WARN_ON(!list_empty(&acl->rules)); rhashtable_destroy(&acl->ruleset_ht); mlxsw_afa_destroy(acl->afa); mlxsw_afk_destroy(acl->afk); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h index 82b81cf7f4a7..af7b7bad48df 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.h @@ -39,11 +39,15 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_dmac[] = { MLXSW_AFK_ELEMENT_INST_BUF(DMAC, 0x00, 6), + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_l2_smac[] = { MLXSW_AFK_ELEMENT_INST_BUF(SMAC, 0x00, 6), + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 13, 3), + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x08, 0, 12), MLXSW_AFK_ELEMENT_INST_U32(SRC_SYS_PORT, 0x0C, 0, 16), }; @@ -65,6 +69,8 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_dip[] = { }; static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_ex[] = { + MLXSW_AFK_ELEMENT_INST_U32(VID, 0x00, 0, 12), + MLXSW_AFK_ELEMENT_INST_U32(PCP, 0x08, 29, 3), MLXSW_AFK_ELEMENT_INST_U32(SRC_L4_PORT, 0x08, 0, 16), MLXSW_AFK_ELEMENT_INST_U32(DST_L4_PORT, 0x0C, 0, 16), }; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c index 7382832215fa..3a24289979d9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c @@ -561,6 +561,24 @@ mlxsw_sp_acl_tcam_region_entry_remove(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); } +static int +mlxsw_sp_acl_tcam_region_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_region *region, + unsigned int offset, + bool *activity) +{ + char ptce2_pl[MLXSW_REG_PTCE2_LEN]; + int err; + + mlxsw_reg_ptce2_pack(ptce2_pl, true, MLXSW_REG_PTCE2_OP_QUERY_CLEAR_ON_READ, + region->tcam_region_info, offset); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptce2), ptce2_pl); + if (err) + return err; + *activity = mlxsw_reg_ptce2_a_get(ptce2_pl); + return 0; +} + #define MLXSW_SP_ACL_TCAM_CATCHALL_PRIO (~0U) static int @@ -940,6 +958,19 @@ static void mlxsw_sp_acl_tcam_entry_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_acl_tcam_chunk_put(mlxsw_sp, chunk); } +static int +mlxsw_sp_acl_tcam_entry_activity_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_acl_tcam_entry *entry, + bool *activity) +{ + struct mlxsw_sp_acl_tcam_chunk *chunk = entry->chunk; + struct mlxsw_sp_acl_tcam_region *region = chunk->region; + + return mlxsw_sp_acl_tcam_region_entry_activity_get(mlxsw_sp, region, + entry->parman_item.index, + activity); +} + static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_SRC_SYS_PORT, MLXSW_AFK_ELEMENT_DMAC, @@ -950,6 +981,8 @@ static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv4[] = { MLXSW_AFK_ELEMENT_DST_IP4, MLXSW_AFK_ELEMENT_DST_L4_PORT, MLXSW_AFK_ELEMENT_SRC_L4_PORT, + MLXSW_AFK_ELEMENT_VID, + MLXSW_AFK_ELEMENT_PCP, }; static const enum mlxsw_afk_element mlxsw_sp_acl_tcam_pattern_ipv6[] = { @@ -1046,6 +1079,16 @@ mlxsw_sp_acl_tcam_flower_rule_del(struct mlxsw_sp *mlxsw_sp, void *rule_priv) mlxsw_sp_acl_tcam_entry_del(mlxsw_sp, &rule->entry); } +static int +mlxsw_sp_acl_tcam_flower_rule_activity_get(struct mlxsw_sp *mlxsw_sp, + void *rule_priv, bool *activity) +{ + struct mlxsw_sp_acl_tcam_flower_rule *rule = rule_priv; + + return mlxsw_sp_acl_tcam_entry_activity_get(mlxsw_sp, &rule->entry, + activity); +} + static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .ruleset_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_ruleset), .ruleset_add = mlxsw_sp_acl_tcam_flower_ruleset_add, @@ -1055,6 +1098,7 @@ static const struct mlxsw_sp_acl_profile_ops mlxsw_sp_acl_tcam_flower_ops = { .rule_priv_size = sizeof(struct mlxsw_sp_acl_tcam_flower_rule), .rule_add = mlxsw_sp_acl_tcam_flower_rule_add, .rule_del = mlxsw_sp_acl_tcam_flower_rule_del, + .rule_activity_get = mlxsw_sp_acl_tcam_flower_rule_activity_get, }; static const struct mlxsw_sp_acl_profile_ops * diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index a7468262f118..997189cfe7fd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -162,8 +162,8 @@ static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, } static const u16 mlxsw_sp_pbs[] = { - [0] = 2 * MLXSW_SP_BYTES_TO_CELLS(ETH_FRAME_LEN), - [9] = 2 * MLXSW_SP_BYTES_TO_CELLS(MLXSW_PORT_MAX_MTU), + [0] = 2 * ETH_FRAME_LEN, + [9] = 2 * MLXSW_PORT_MAX_MTU, }; #define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs) @@ -171,20 +171,22 @@ static const u16 mlxsw_sp_pbs[] = { static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char pbmc_pl[MLXSW_REG_PBMC_LEN]; int i; mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2); for (i = 0; i < MLXSW_SP_PBS_LEN; i++) { + u16 size = mlxsw_sp_bytes_cells(mlxsw_sp, mlxsw_sp_pbs[i]); + if (i == MLXSW_SP_PB_UNUSED) continue; - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, mlxsw_sp_pbs[i]); + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, size); } mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0); - return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(pbmc), pbmc_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); } static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port) @@ -209,11 +211,25 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port); } -#define MLXSW_SP_SB_PR_INGRESS_SIZE \ - (15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) +static int mlxsw_sp_sb_ports_init(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); + + mlxsw_sp->sb.ports = kcalloc(max_ports, sizeof(struct mlxsw_sp_sb_port), + GFP_KERNEL); + if (!mlxsw_sp->sb.ports) + return -ENOMEM; + return 0; +} + +static void mlxsw_sp_sb_ports_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->sb.ports); +} + +#define MLXSW_SP_SB_PR_INGRESS_SIZE 12440000 #define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000) -#define MLXSW_SP_SB_PR_EGRESS_SIZE \ - (14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) +#define MLXSW_SP_SB_PR_EGRESS_SIZE 13232000 #define MLXSW_SP_SB_PR(_mode, _size) \ { \ @@ -223,18 +239,17 @@ static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = { MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_SIZE)), + MLXSW_SP_SB_PR_INGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_MNG_SIZE)), + MLXSW_SP_SB_PR_INGRESS_MNG_SIZE), }; #define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress) static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = { - MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, - MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_EGRESS_SIZE)), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, MLXSW_SP_SB_PR_EGRESS_SIZE), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), @@ -251,11 +266,9 @@ static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, int err; for (i = 0; i < prs_len; i++) { - const struct mlxsw_sp_sb_pr *pr; + u32 size = mlxsw_sp_bytes_cells(mlxsw_sp, prs[i].size); - pr = &prs[i]; - err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, - pr->mode, pr->size); + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, prs[i].mode, size); if (err) return err; } @@ -284,7 +297,7 @@ static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp) } static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 8, 0), + MLXSW_SP_SB_CM(10000, 8, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), @@ -293,20 +306,20 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */ - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(20000), 1, 3), + MLXSW_SP_SB_CM(20000, 1, 3), }; #define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress) static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), + MLXSW_SP_SB_CM(1500, 9, 0), MLXSW_SP_SB_CM(0, 0, 0), MLXSW_SP_SB_CM(0, 0, 0), MLXSW_SP_SB_CM(0, 0, 0), @@ -330,7 +343,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0), + MLXSW_SP_SB_CM(10000, 0, 0), MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, @@ -370,13 +383,17 @@ static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, for (i = 0; i < cms_len; i++) { const struct mlxsw_sp_sb_cm *cm; + u32 min_buff; if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) continue; /* PG number 8 does not exist, skip it */ cm = &cms[i]; + /* All pools are initialized using dynamic thresholds, + * therefore 'max_buff' isn't specified in cells. + */ + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, cm->min_buff); err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir, - cm->min_buff, cm->max_buff, - cm->pool); + min_buff, cm->max_buff, cm->pool); if (err) return err; } @@ -484,21 +501,21 @@ struct mlxsw_sp_sb_mm { } static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), - MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), + MLXSW_SP_SB_MM(20000, 0xff, 0), }; #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) @@ -511,10 +528,15 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) for (i = 0; i < MLXSW_SP_SB_MMS_LEN; i++) { const struct mlxsw_sp_sb_mm *mc; + u32 min_buff; mc = &mlxsw_sp_sb_mms[i]; - mlxsw_reg_sbmm_pack(sbmm_pl, i, mc->min_buff, - mc->max_buff, mc->pool); + /* All pools are initialized using dynamic thresholds, + * therefore 'max_buff' isn't specified in cells. + */ + min_buff = mlxsw_sp_bytes_cells(mlxsw_sp, mc->min_buff); + mlxsw_reg_sbmm_pack(sbmm_pl, i, min_buff, mc->max_buff, + mc->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl); if (err) return err; @@ -522,32 +544,53 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) return 0; } -#define MLXSW_SP_SB_SIZE (16 * 1024 * 1024) - int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { + u64 sb_size; int err; - err = mlxsw_sp_sb_prs_init(mlxsw_sp); + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, CELL_SIZE)) + return -EIO; + mlxsw_sp->sb.cell_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, CELL_SIZE); + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_BUFFER_SIZE)) + return -EIO; + sb_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE); + + err = mlxsw_sp_sb_ports_init(mlxsw_sp); if (err) return err; + err = mlxsw_sp_sb_prs_init(mlxsw_sp); + if (err) + goto err_sb_prs_init; err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); if (err) - return err; + goto err_sb_cpu_port_sb_cms_init; err = mlxsw_sp_sb_mms_init(mlxsw_sp); if (err) - return err; - return devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, - MLXSW_SP_SB_SIZE, - MLXSW_SP_SB_POOL_COUNT, - MLXSW_SP_SB_POOL_COUNT, - MLXSW_SP_SB_TC_COUNT, - MLXSW_SP_SB_TC_COUNT); + goto err_sb_mms_init; + err = devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, sb_size, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_TC_COUNT, + MLXSW_SP_SB_TC_COUNT); + if (err) + goto err_devlink_sb_register; + + return 0; + +err_devlink_sb_register: +err_sb_mms_init: +err_sb_cpu_port_sb_cms_init: +err_sb_prs_init: + mlxsw_sp_sb_ports_fini(mlxsw_sp); + return err; } void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp) { devlink_sb_unregister(priv_to_devlink(mlxsw_sp->core), 0); + mlxsw_sp_sb_ports_fini(mlxsw_sp); } int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) @@ -596,7 +639,7 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); pool_info->pool_type = (enum devlink_sb_pool_type) dir; - pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size); + pool_info->size = mlxsw_sp_cells_bytes(mlxsw_sp, pr->size); pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; return 0; } @@ -606,9 +649,9 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, enum devlink_sb_threshold_type threshold_type) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u32 pool_size = mlxsw_sp_bytes_cells(mlxsw_sp, size); u8 pool = pool_get(pool_index); enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); - u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size); enum mlxsw_reg_sbpr_mode mode; if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) @@ -627,7 +670,7 @@ static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool, if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; - return MLXSW_SP_CELLS_TO_BYTES(max_buff); + return mlxsw_sp_cells_bytes(mlxsw_sp, max_buff); } static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool, @@ -645,7 +688,7 @@ static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool, return -EINVAL; *p_max_buff = val; } else { - *p_max_buff = MLXSW_SP_BYTES_TO_CELLS(threshold); + *p_max_buff = mlxsw_sp_bytes_cells(mlxsw_sp, threshold); } return 0; } @@ -761,7 +804,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, masked_count = 0; for (local_port = cb_ctx.local_port_1; - local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { @@ -775,7 +818,7 @@ static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, } masked_count = 0; for (local_port = cb_ctx.local_port_1; - local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { @@ -817,7 +860,7 @@ next_batch: mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); } - for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); @@ -847,7 +890,7 @@ do_query: cb_priv); if (err) goto out; - if (local_port < MLXSW_PORT_MAX_PORTS) + if (local_port < mlxsw_core_max_ports(mlxsw_core)) goto next_batch; out: @@ -882,7 +925,7 @@ next_batch: mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); } - for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + for (; local_port < mlxsw_core_max_ports(mlxsw_core); local_port++) { if (!mlxsw_sp->ports[local_port]) continue; mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); @@ -908,7 +951,7 @@ do_query: &bulk_list, NULL, 0); if (err) goto out; - if (local_port < MLXSW_PORT_MAX_PORTS) + if (local_port < mlxsw_core_max_ports(mlxsw_core)) goto next_batch; out: @@ -932,8 +975,8 @@ int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); - *p_cur = MLXSW_SP_CELLS_TO_BYTES(pm->occ.cur); - *p_max = MLXSW_SP_CELLS_TO_BYTES(pm->occ.max); + *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.cur); + *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, pm->occ.max); return 0; } @@ -951,7 +994,7 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); - *p_cur = MLXSW_SP_CELLS_TO_BYTES(cm->occ.cur); - *p_max = MLXSW_SP_CELLS_TO_BYTES(cm->occ.max); + *p_cur = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.cur); + *p_max = mlxsw_sp_cells_bytes(mlxsw_sp, cm->occ.max); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c new file mode 100644 index 000000000000..0f46775e0307 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c @@ -0,0 +1,207 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arkadis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include "spectrum_cnt.h" + +#define MLXSW_SP_COUNTER_POOL_BANK_SIZE 4096 + +struct mlxsw_sp_counter_sub_pool { + unsigned int base_index; + unsigned int size; + unsigned int entry_size; + unsigned int bank_count; +}; + +struct mlxsw_sp_counter_pool { + unsigned int pool_size; + unsigned long *usage; /* Usage bitmap */ + struct mlxsw_sp_counter_sub_pool *sub_pools; +}; + +static struct mlxsw_sp_counter_sub_pool mlxsw_sp_counter_sub_pools[] = { + [MLXSW_SP_COUNTER_SUB_POOL_FLOW] = { + .bank_count = 6, + }, + [MLXSW_SP_COUNTER_SUB_POOL_RIF] = { + .bank_count = 2, + } +}; + +static int mlxsw_sp_counter_pool_validate(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int total_bank_config = 0; + unsigned int pool_size; + int i; + + pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); + /* Check config is valid, no bank over subscription */ + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) + total_bank_config += mlxsw_sp_counter_sub_pools[i].bank_count; + if (total_bank_config > pool_size / MLXSW_SP_COUNTER_POOL_BANK_SIZE + 1) + return -EINVAL; + return 0; +} + +static int mlxsw_sp_counter_sub_pools_prepare(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_counter_sub_pool *sub_pool; + + /* Prepare generic flow pool*/ + sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_FLOW]; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_PACKETS_BYTES)) + return -EIO; + sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + COUNTER_SIZE_PACKETS_BYTES); + /* Prepare erif pool*/ + sub_pool = &mlxsw_sp_counter_sub_pools[MLXSW_SP_COUNTER_SUB_POOL_RIF]; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_SIZE_ROUTER_BASIC)) + return -EIO; + sub_pool->entry_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, + COUNTER_SIZE_ROUTER_BASIC); + return 0; +} + +int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_counter_sub_pool *sub_pool; + struct mlxsw_sp_counter_pool *pool; + unsigned int base_index; + unsigned int map_size; + int i; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, COUNTER_POOL_SIZE)) + return -EIO; + + err = mlxsw_sp_counter_pool_validate(mlxsw_sp); + if (err) + return err; + + err = mlxsw_sp_counter_sub_pools_prepare(mlxsw_sp); + if (err) + return err; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return -ENOMEM; + + pool->pool_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, COUNTER_POOL_SIZE); + map_size = BITS_TO_LONGS(pool->pool_size) * sizeof(unsigned long); + + pool->usage = kzalloc(map_size, GFP_KERNEL); + if (!pool->usage) { + err = -ENOMEM; + goto err_usage_alloc; + } + + pool->sub_pools = mlxsw_sp_counter_sub_pools; + /* Allocation is based on bank count which should be + * specified for each sub pool statically. + */ + base_index = 0; + for (i = 0; i < ARRAY_SIZE(mlxsw_sp_counter_sub_pools); i++) { + sub_pool = &pool->sub_pools[i]; + sub_pool->size = sub_pool->bank_count * + MLXSW_SP_COUNTER_POOL_BANK_SIZE; + sub_pool->base_index = base_index; + base_index += sub_pool->size; + /* The last bank can't be fully used */ + if (sub_pool->base_index + sub_pool->size > pool->pool_size) + sub_pool->size = pool->pool_size - sub_pool->base_index; + } + + mlxsw_sp->counter_pool = pool; + return 0; + +err_usage_alloc: + kfree(pool); + return err; +} + +void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + + WARN_ON(find_first_bit(pool->usage, pool->pool_size) != + pool->pool_size); + kfree(pool->usage); + kfree(pool); +} + +int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int *p_counter_index) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct mlxsw_sp_counter_sub_pool *sub_pool; + unsigned int entry_index; + unsigned int stop_index; + int i; + + sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + stop_index = sub_pool->base_index + sub_pool->size; + entry_index = sub_pool->base_index; + + entry_index = find_next_zero_bit(pool->usage, stop_index, entry_index); + if (entry_index == stop_index) + return -ENOBUFS; + /* The sub-pools can contain non-integer number of entries + * so we must check for overflow + */ + if (entry_index + sub_pool->entry_size > stop_index) + return -ENOBUFS; + for (i = 0; i < sub_pool->entry_size; i++) + __set_bit(entry_index + i, pool->usage); + + *p_counter_index = entry_index; + return 0; +} + +void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int counter_index) +{ + struct mlxsw_sp_counter_pool *pool = mlxsw_sp->counter_pool; + struct mlxsw_sp_counter_sub_pool *sub_pool; + int i; + + if (WARN_ON(counter_index >= pool->pool_size)) + return; + sub_pool = &mlxsw_sp_counter_sub_pools[sub_pool_id]; + for (i = 0; i < sub_pool->entry_size; i++) + __clear_bit(counter_index + i, pool->usage); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h new file mode 100644 index 000000000000..fd34d0a01073 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h @@ -0,0 +1,54 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arkdis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_SPECTRUM_CNT_H +#define _MLXSW_SPECTRUM_CNT_H + +#include "spectrum.h" + +enum mlxsw_sp_counter_sub_pool_id { + MLXSW_SP_COUNTER_SUB_POOL_FLOW, + MLXSW_SP_COUNTER_SUB_POOL_RIF, +}; + +int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int *p_counter_index); +void mlxsw_sp_counter_free(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_sp_counter_sub_pool_id sub_pool_id, + unsigned int counter_index); +int mlxsw_sp_counter_pool_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp); + +#endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c new file mode 100644 index 000000000000..ea56f6ade6b4 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -0,0 +1,351 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arakdis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/kernel.h> +#include <net/devlink.h> + +#include "spectrum.h" +#include "spectrum_dpipe.h" +#include "spectrum_router.h" + +enum mlxsw_sp_field_metadata_id { + MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, +}; + +static struct devlink_dpipe_field mlxsw_sp_dpipe_fields_metadata[] = { + { .name = "erif_port", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT, + .bitwidth = 32, + .mapping_type = DEVLINK_DPIPE_FIELD_MAPPING_TYPE_IFINDEX, + }, + { .name = "l3_forward", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD, + .bitwidth = 1, + }, + { .name = "l3_drop", + .id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP, + .bitwidth = 1, + }, +}; + +enum mlxsw_sp_dpipe_header_id { + MLXSW_SP_DPIPE_HEADER_METADATA, +}; + +static struct devlink_dpipe_header mlxsw_sp_dpipe_header_metadata = { + .name = "mlxsw_meta", + .id = MLXSW_SP_DPIPE_HEADER_METADATA, + .fields = mlxsw_sp_dpipe_fields_metadata, + .fields_count = ARRAY_SIZE(mlxsw_sp_dpipe_fields_metadata), +}; + +static struct devlink_dpipe_header *mlxsw_dpipe_headers[] = { + &mlxsw_sp_dpipe_header_metadata, +}; + +static struct devlink_dpipe_headers mlxsw_sp_dpipe_headers = { + .headers = mlxsw_dpipe_headers, + .headers_count = ARRAY_SIZE(mlxsw_dpipe_headers), +}; + +static int mlxsw_sp_dpipe_table_erif_actions_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_action action = {0}; + int err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD; + + err = devlink_dpipe_action_put(skb, &action); + if (err) + return err; + + action.type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action.header = &mlxsw_sp_dpipe_header_metadata; + action.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_DROP; + + return devlink_dpipe_action_put(skb, &action); +} + +static int mlxsw_sp_dpipe_table_erif_matches_dump(void *priv, + struct sk_buff *skb) +{ + struct devlink_dpipe_match match = {0}; + + match.type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match.header = &mlxsw_sp_dpipe_header_metadata; + match.field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; + + return devlink_dpipe_match_put(skb, &match); +} + +static void mlxsw_sp_erif_entry_clear(struct devlink_dpipe_entry *entry) +{ + unsigned int value_count, value_index; + struct devlink_dpipe_value *value; + + value = entry->action_values; + value_count = entry->action_values_count; + for (value_index = 0; value_index < value_count; value_index++) { + kfree(value[value_index].value); + kfree(value[value_index].mask); + } + + value = entry->match_values; + value_count = entry->match_values_count; + for (value_index = 0; value_index < value_count; value_index++) { + kfree(value[value_index].value); + kfree(value[value_index].mask); + } +} + +static void +mlxsw_sp_erif_match_action_prepare(struct devlink_dpipe_match *match, + struct devlink_dpipe_action *action) +{ + action->type = DEVLINK_DPIPE_ACTION_TYPE_FIELD_MODIFY; + action->header = &mlxsw_sp_dpipe_header_metadata; + action->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_L3_FORWARD; + + match->type = DEVLINK_DPIPE_MATCH_TYPE_FIELD_EXACT; + match->header = &mlxsw_sp_dpipe_header_metadata; + match->field_id = MLXSW_SP_DPIPE_FIELD_METADATA_ERIF_PORT; +} + +static int mlxsw_sp_erif_entry_prepare(struct devlink_dpipe_entry *entry, + struct devlink_dpipe_value *match_value, + struct devlink_dpipe_match *match, + struct devlink_dpipe_value *action_value, + struct devlink_dpipe_action *action) +{ + entry->match_values = match_value; + entry->match_values_count = 1; + + entry->action_values = action_value; + entry->action_values_count = 1; + + match_value->match = match; + match_value->value_size = sizeof(u32); + match_value->value = kmalloc(match_value->value_size, GFP_KERNEL); + if (!match_value->value) + return -ENOMEM; + + action_value->action = action; + action_value->value_size = sizeof(u32); + action_value->value = kmalloc(action_value->value_size, GFP_KERNEL); + if (!action_value->value) + goto err_action_alloc; + return 0; + +err_action_alloc: + kfree(match_value->value); + return -ENOMEM; +} + +static int mlxsw_sp_erif_entry_get(struct mlxsw_sp *mlxsw_sp, + struct devlink_dpipe_entry *entry, + struct mlxsw_sp_rif *rif, + bool counters_enabled) +{ + u32 *action_value; + u32 *rif_value; + u64 cnt; + int err; + + /* Set Match RIF index */ + rif_value = entry->match_values->value; + *rif_value = mlxsw_sp_rif_index(rif); + entry->match_values->mapping_value = mlxsw_sp_rif_dev_ifindex(rif); + entry->match_values->mapping_valid = true; + + /* Set Action Forwarding */ + action_value = entry->action_values->value; + *action_value = 1; + + entry->counter_valid = false; + entry->counter = 0; + if (!counters_enabled) + return 0; + + entry->index = mlxsw_sp_rif_index(rif); + err = mlxsw_sp_rif_counter_value_get(mlxsw_sp, rif, + MLXSW_SP_RIF_COUNTER_EGRESS, + &cnt); + if (!err) { + entry->counter = cnt; + entry->counter_valid = true; + } + return 0; +} + +static int +mlxsw_sp_table_erif_entries_dump(void *priv, bool counters_enabled, + struct devlink_dpipe_dump_ctx *dump_ctx) +{ + struct devlink_dpipe_value match_value = {{0}}, action_value = {{0}}; + struct devlink_dpipe_action action = {0}; + struct devlink_dpipe_match match = {0}; + struct devlink_dpipe_entry entry = {0}; + struct mlxsw_sp *mlxsw_sp = priv; + unsigned int rif_count; + int i, j; + int err; + + mlxsw_sp_erif_match_action_prepare(&match, &action); + err = mlxsw_sp_erif_entry_prepare(&entry, &match_value, &match, + &action_value, &action); + if (err) + return err; + + rif_count = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + rtnl_lock(); + i = 0; +start_again: + err = devlink_dpipe_entry_ctx_prepare(dump_ctx); + if (err) + return err; + j = 0; + for (; i < rif_count; i++) { + if (!mlxsw_sp->rifs[i]) + continue; + err = mlxsw_sp_erif_entry_get(mlxsw_sp, &entry, + mlxsw_sp->rifs[i], + counters_enabled); + if (err) + goto err_entry_get; + err = devlink_dpipe_entry_ctx_append(dump_ctx, &entry); + if (err) { + if (err == -EMSGSIZE) { + if (!j) + goto err_entry_append; + break; + } + goto err_entry_append; + } + j++; + } + + devlink_dpipe_entry_ctx_close(dump_ctx); + if (i != rif_count) + goto start_again; + rtnl_unlock(); + + mlxsw_sp_erif_entry_clear(&entry); + return 0; +err_entry_append: +err_entry_get: + rtnl_unlock(); + mlxsw_sp_erif_entry_clear(&entry); + return err; +} + +static int mlxsw_sp_table_erif_counters_update(void *priv, bool enable) +{ + struct mlxsw_sp *mlxsw_sp = priv; + int i; + + rtnl_lock(); + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { + if (!mlxsw_sp->rifs[i]) + continue; + if (enable) + mlxsw_sp_rif_counter_alloc(mlxsw_sp, + mlxsw_sp->rifs[i], + MLXSW_SP_RIF_COUNTER_EGRESS); + else + mlxsw_sp_rif_counter_free(mlxsw_sp, + mlxsw_sp->rifs[i], + MLXSW_SP_RIF_COUNTER_EGRESS); + } + rtnl_unlock(); + return 0; +} + +static struct devlink_dpipe_table_ops mlxsw_sp_erif_ops = { + .matches_dump = mlxsw_sp_dpipe_table_erif_matches_dump, + .actions_dump = mlxsw_sp_dpipe_table_erif_actions_dump, + .entries_dump = mlxsw_sp_table_erif_entries_dump, + .counters_set_update = mlxsw_sp_table_erif_counters_update, +}; + +static int mlxsw_sp_dpipe_erif_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + u64 table_size; + + table_size = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + return devlink_dpipe_table_register(devlink, + MLXSW_SP_DPIPE_TABLE_NAME_ERIF, + &mlxsw_sp_erif_ops, + mlxsw_sp, table_size, + false); +} + +static void mlxsw_sp_dpipe_erif_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + devlink_dpipe_table_unregister(devlink, MLXSW_SP_DPIPE_TABLE_NAME_ERIF); +} + +int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + int err; + + err = devlink_dpipe_headers_register(devlink, + &mlxsw_sp_dpipe_headers); + if (err) + return err; + err = mlxsw_sp_dpipe_erif_table_init(mlxsw_sp); + if (err) + goto err_erif_register; + return 0; + +err_erif_register: + devlink_dpipe_headers_unregister(priv_to_devlink(mlxsw_sp->core)); + return err; +} + +void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); + + mlxsw_sp_dpipe_erif_table_fini(mlxsw_sp); + devlink_dpipe_headers_unregister(devlink); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h new file mode 100644 index 000000000000..d2089298cba3 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h @@ -0,0 +1,43 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arkadis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_PIPELINE_H_ +#define _MLXSW_PIPELINE_H_ + +int mlxsw_sp_dpipe_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_dpipe_fini(struct mlxsw_sp *mlxsw_sp); + +#define MLXSW_SP_DPIPE_TABLE_NAME_ERIF "mlxsw_erif" + +#endif /* _MLXSW_PIPELINE_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c index ae6cccc666e4..7d87e23578a3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c @@ -39,6 +39,7 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_gact.h> #include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_vlan.h> #include "spectrum.h" #include "core_acl_flex_keys.h" @@ -55,6 +56,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, if (tc_no_actions(exts)) return 0; + /* Count action is inserted first */ + err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei); + if (err) + return err; + tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { if (is_tcf_gact_shot(a)) { @@ -65,6 +71,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; + err = mlxsw_sp_acl_rulei_act_fid_set(mlxsw_sp, rulei, + MLXSW_SP_DUMMY_FID); + if (err) + return err; + out_dev = __dev_get_by_index(dev_net(dev), ifindex); if (out_dev == dev) out_dev = NULL; @@ -73,6 +84,15 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp, out_dev); if (err) return err; + } else if (is_tcf_vlan(a)) { + u16 proto = be16_to_cpu(tcf_vlan_push_proto(a)); + u32 action = tcf_vlan_action(a); + u8 prio = tcf_vlan_push_prio(a); + u16 vid = tcf_vlan_push_vid(a); + + return mlxsw_sp_acl_rulei_act_vlan(mlxsw_sp, rulei, + action, vid, + proto, prio); } else { dev_err(mlxsw_sp->bus_info->dev, "Unsupported action\n"); return -EOPNOTSUPP; @@ -173,7 +193,8 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | - BIT(FLOW_DISSECTOR_KEY_PORTS))) { + BIT(FLOW_DISSECTOR_KEY_PORTS) | + BIT(FLOW_DISSECTOR_KEY_VLAN))) { dev_err(mlxsw_sp->bus_info->dev, "Unsupported key\n"); return -EOPNOTSUPP; } @@ -234,6 +255,27 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp, sizeof(key->src)); } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + if (mask->vlan_id != 0) + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_VID, + key->vlan_id, + mask->vlan_id); + if (mask->vlan_priority != 0) + mlxsw_sp_acl_rulei_keymask_u32(rulei, + MLXSW_AFK_ELEMENT_PCP, + key->vlan_priority, + mask->vlan_priority); + } + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) mlxsw_sp_flower_parse_ipv4(rulei, f); @@ -314,3 +356,47 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); } + +int mlxsw_sp_flower_stats(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress, + struct tc_cls_flower_offload *f) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct mlxsw_sp_acl_ruleset *ruleset; + struct mlxsw_sp_acl_rule *rule; + struct tc_action *a; + LIST_HEAD(actions); + u64 packets; + u64 lastuse; + u64 bytes; + int err; + + ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, mlxsw_sp_port->dev, + ingress, + MLXSW_SP_ACL_PROFILE_FLOWER); + if (WARN_ON(IS_ERR(ruleset))) + return -EINVAL; + + rule = mlxsw_sp_acl_rule_lookup(mlxsw_sp, ruleset, f->cookie); + if (!rule) + return -EINVAL; + + err = mlxsw_sp_acl_rule_get_stats(mlxsw_sp, rule, &packets, &bytes, + &lastuse); + if (err) + goto err_rule_get_stats; + + preempt_disable(); + + tcf_exts_to_list(f->exts, &actions); + list_for_each_entry(a, &actions, list) + tcf_action_stats_update(a, bytes, packets, lastuse); + + preempt_enable(); + + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return 0; + +err_rule_get_stats: + mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c index ac321e8e5c1a..26c26cd30c3d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_kvdl.c @@ -45,7 +45,8 @@ (MLXSW_SP_KVD_LINEAR_SIZE - MLXSW_SP_KVDL_CHUNKS_BASE) #define MLXSW_SP_CHUNK_MAX 32 -int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count) +int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count, + u32 *p_entry_index) { int entry_index; int size; @@ -72,7 +73,8 @@ int mlxsw_sp_kvdl_alloc(struct mlxsw_sp *mlxsw_sp, unsigned int entry_count) for (i = 0; i < type_entries; i++) set_bit(entry_index + i, mlxsw_sp->kvdl.usage); - return entry_index; + *p_entry_index = entry_index; + return 0; } return -ENOBUFS; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index bd8de6b9be71..33cec1cc1642 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -41,14 +41,184 @@ #include <linux/in6.h> #include <linux/notifier.h> #include <linux/inetdevice.h> +#include <linux/netdevice.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> #include <net/ip_fib.h> +#include <net/fib_rules.h> +#include <net/l3mdev.h> #include "spectrum.h" #include "core.h" #include "reg.h" +#include "spectrum_cnt.h" +#include "spectrum_dpipe.h" +#include "spectrum_router.h" + +struct mlxsw_sp_rif { + struct list_head nexthop_list; + struct list_head neigh_list; + struct net_device *dev; + struct mlxsw_sp_fid *f; + unsigned char addr[ETH_ALEN]; + int mtu; + u16 rif_index; + u16 vr_id; + unsigned int counter_ingress; + bool counter_ingress_valid; + unsigned int counter_egress; + bool counter_egress_valid; +}; + +static unsigned int * +mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + switch (dir) { + case MLXSW_SP_RIF_COUNTER_EGRESS: + return &rif->counter_egress; + case MLXSW_SP_RIF_COUNTER_INGRESS: + return &rif->counter_ingress; + } + return NULL; +} + +static bool +mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + switch (dir) { + case MLXSW_SP_RIF_COUNTER_EGRESS: + return rif->counter_egress_valid; + case MLXSW_SP_RIF_COUNTER_INGRESS: + return rif->counter_ingress_valid; + } + return false; +} + +static void +mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + bool valid) +{ + switch (dir) { + case MLXSW_SP_RIF_COUNTER_EGRESS: + rif->counter_egress_valid = valid; + break; + case MLXSW_SP_RIF_COUNTER_INGRESS: + rif->counter_ingress_valid = valid; + break; + } +} + +static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + unsigned int counter_index, bool enable, + enum mlxsw_sp_rif_counter_dir dir) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + bool is_egress = false; + int err; + + if (dir == MLXSW_SP_RIF_COUNTER_EGRESS) + is_egress = true; + mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable, + is_egress); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, u64 *cnt) +{ + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + unsigned int *p_counter_index; + bool valid; + int err; + + valid = mlxsw_sp_rif_counter_valid_get(rif, dir); + if (!valid) + return -EINVAL; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index, + MLXSW_REG_RICNT_OPCODE_NOP); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); + if (err) + return err; + *cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl); + return 0; +} + +static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp, + unsigned int counter_index) +{ + char ricnt_pl[MLXSW_REG_RICNT_LEN]; + + mlxsw_reg_ricnt_pack(ricnt_pl, counter_index, + MLXSW_REG_RICNT_OPCODE_CLEAR); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl); +} + +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + unsigned int *p_counter_index; + int err; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (!p_counter_index) + return -EINVAL; + err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, + p_counter_index); + if (err) + return err; + + err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index); + if (err) + goto err_counter_clear; + + err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index, + *p_counter_index, true, dir); + if (err) + goto err_counter_edit; + mlxsw_sp_rif_counter_valid_set(rif, dir, true); + return 0; + +err_counter_edit: +err_counter_clear: + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, + *p_counter_index); + return err; +} + +void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir) +{ + unsigned int *p_counter_index; + + p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir); + if (WARN_ON(!p_counter_index)) + return; + mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index, + *p_counter_index, false, dir); + mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF, + *p_counter_index); + mlxsw_sp_rif_counter_valid_set(rif, dir, false); +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev); #define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \ for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT) @@ -89,12 +259,6 @@ mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1, } static void -mlxsw_sp_prefix_usage_zero(struct mlxsw_sp_prefix_usage *prefix_usage) -{ - memset(prefix_usage, 0, sizeof(*prefix_usage)); -} - -static void mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage, unsigned char prefix_len) { @@ -125,7 +289,7 @@ struct mlxsw_sp_fib_node { struct list_head entry_list; struct list_head list; struct rhash_head ht_node; - struct mlxsw_sp_vr *vr; + struct mlxsw_sp_fib *fib; struct mlxsw_sp_fib_key key; }; @@ -149,13 +313,17 @@ struct mlxsw_sp_fib_entry { struct mlxsw_sp_fib { struct rhashtable ht; struct list_head node_list; + struct mlxsw_sp_vr *vr; + struct mlxsw_sp_lpm_tree *lpm_tree; unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; struct mlxsw_sp_prefix_usage prefix_usage; + enum mlxsw_sp_l3proto proto; }; static const struct rhashtable_params mlxsw_sp_fib_ht_params; -static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) +static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_fib *fib; int err; @@ -167,6 +335,8 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) if (err) goto err_rhashtable_init; INIT_LIST_HEAD(&fib->node_list); + fib->proto = proto; + fib->vr = vr; return fib; err_rhashtable_init: @@ -177,24 +347,21 @@ err_rhashtable_init: static void mlxsw_sp_fib_destroy(struct mlxsw_sp_fib *fib) { WARN_ON(!list_empty(&fib->node_list)); + WARN_ON(fib->lpm_tree); rhashtable_destroy(&fib->ht); kfree(fib); } static struct mlxsw_sp_lpm_tree * -mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp, bool one_reserved) +mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp) { static struct mlxsw_sp_lpm_tree *lpm_tree; int i; - for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { - lpm_tree = &mlxsw_sp->router.lpm_trees[i]; - if (lpm_tree->ref_count == 0) { - if (one_reserved) - one_reserved = false; - else - return lpm_tree; - } + for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router.lpm.trees[i]; + if (lpm_tree->ref_count == 0) + return lpm_tree; } return NULL; } @@ -248,12 +415,12 @@ mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_lpm_tree * mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_prefix_usage *prefix_usage, - enum mlxsw_sp_l3proto proto, bool one_reserved) + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_lpm_tree *lpm_tree; int err; - lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp, one_reserved); + lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp); if (!lpm_tree) return ERR_PTR(-EBUSY); lpm_tree->proto = proto; @@ -283,13 +450,13 @@ static int mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp, static struct mlxsw_sp_lpm_tree * mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_prefix_usage *prefix_usage, - enum mlxsw_sp_l3proto proto, bool one_reserved) + enum mlxsw_sp_l3proto proto) { struct mlxsw_sp_lpm_tree *lpm_tree; int i; - for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { - lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router.lpm.trees[i]; if (lpm_tree->ref_count != 0 && lpm_tree->proto == proto && mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage, @@ -297,7 +464,7 @@ mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp, goto inc_ref_count; } lpm_tree = mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, - proto, one_reserved); + proto); if (IS_ERR(lpm_tree)) return lpm_tree; @@ -314,15 +481,41 @@ static int mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp, return 0; } -static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) +#define MLXSW_SP_LPM_TREE_MIN 2 /* trees 0 and 1 are reserved */ + +static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) { struct mlxsw_sp_lpm_tree *lpm_tree; + u64 max_trees; int i; - for (i = 0; i < MLXSW_SP_LPM_TREE_COUNT; i++) { - lpm_tree = &mlxsw_sp->router.lpm_trees[i]; + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES)) + return -EIO; + + max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES); + mlxsw_sp->router.lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN; + mlxsw_sp->router.lpm.trees = kcalloc(mlxsw_sp->router.lpm.tree_count, + sizeof(struct mlxsw_sp_lpm_tree), + GFP_KERNEL); + if (!mlxsw_sp->router.lpm.trees) + return -ENOMEM; + + for (i = 0; i < mlxsw_sp->router.lpm.tree_count; i++) { + lpm_tree = &mlxsw_sp->router.lpm.trees[i]; lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN; } + + return 0; +} + +static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->router.lpm.trees); +} + +static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr) +{ + return !!vr->fib4; } static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) @@ -332,31 +525,31 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { vr = &mlxsw_sp->router.vrs[i]; - if (!vr->used) + if (!mlxsw_sp_vr_is_used(vr)) return vr; } return NULL; } static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vr *vr) + const struct mlxsw_sp_fib *fib) { char raltb_pl[MLXSW_REG_RALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, - (enum mlxsw_reg_ralxx_protocol) vr->proto, - vr->lpm_tree->id); + mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, + fib->lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vr *vr) + const struct mlxsw_sp_fib *fib) { char raltb_pl[MLXSW_REG_RALTB_LEN]; /* Bind to tree 0 which is default */ - mlxsw_reg_raltb_pack(raltb_pl, vr->id, - (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); + mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id, + (enum mlxsw_reg_ralxx_protocol) fib->proto, 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -369,8 +562,7 @@ static u32 mlxsw_sp_fix_tb_id(u32 tb_id) } static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, - u32 tb_id, - enum mlxsw_sp_l3proto proto) + u32 tb_id) { struct mlxsw_sp_vr *vr; int i; @@ -379,69 +571,50 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { vr = &mlxsw_sp->router.vrs[i]; - if (vr->used && vr->proto == proto && vr->tb_id == tb_id) + if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id) return vr; } return NULL; } +static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto) +{ + switch (proto) { + case MLXSW_SP_L3_PROTO_IPV4: + return vr->fib4; + case MLXSW_SP_L3_PROTO_IPV6: + BUG_ON(1); + } + return NULL; +} + static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp, - unsigned char prefix_len, - u32 tb_id, - enum mlxsw_sp_l3proto proto) + u32 tb_id) { - struct mlxsw_sp_prefix_usage req_prefix_usage; - struct mlxsw_sp_lpm_tree *lpm_tree; struct mlxsw_sp_vr *vr; - int err; vr = mlxsw_sp_vr_find_unused(mlxsw_sp); if (!vr) return ERR_PTR(-EBUSY); - vr->fib = mlxsw_sp_fib_create(); - if (IS_ERR(vr->fib)) - return ERR_CAST(vr->fib); - - vr->proto = proto; + vr->fib4 = mlxsw_sp_fib_create(vr, MLXSW_SP_L3_PROTO_IPV4); + if (IS_ERR(vr->fib4)) + return ERR_CAST(vr->fib4); vr->tb_id = tb_id; - mlxsw_sp_prefix_usage_zero(&req_prefix_usage); - mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); - lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, - proto, true); - if (IS_ERR(lpm_tree)) { - err = PTR_ERR(lpm_tree); - goto err_tree_get; - } - vr->lpm_tree = lpm_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); - if (err) - goto err_tree_bind; - - vr->used = true; return vr; - -err_tree_bind: - mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); -err_tree_get: - mlxsw_sp_fib_destroy(vr->fib); - - return ERR_PTR(err); } -static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vr *vr) +static void mlxsw_sp_vr_destroy(struct mlxsw_sp_vr *vr) { - mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, vr); - mlxsw_sp_lpm_tree_put(mlxsw_sp, vr->lpm_tree); - mlxsw_sp_fib_destroy(vr->fib); - vr->used = false; + mlxsw_sp_fib_destroy(vr->fib4); + vr->fib4 = NULL; } static int -mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, +mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib *fib, struct mlxsw_sp_prefix_usage *req_prefix_usage) { - struct mlxsw_sp_lpm_tree *lpm_tree = vr->lpm_tree; + struct mlxsw_sp_lpm_tree *lpm_tree = fib->lpm_tree; struct mlxsw_sp_lpm_tree *new_tree; int err; @@ -449,7 +622,7 @@ mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, return 0; new_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, req_prefix_usage, - vr->proto, false); + fib->proto); if (IS_ERR(new_tree)) { /* We failed to get a tree according to the required * prefix usage. However, the current tree might be still good @@ -463,8 +636,8 @@ mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, } /* Prevent packet loss by overwriting existing binding */ - vr->lpm_tree = new_tree; - err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, vr); + fib->lpm_tree = new_tree; + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); if (err) goto err_tree_bind; mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); @@ -472,53 +645,26 @@ mlxsw_sp_vr_lpm_tree_check(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, return 0; err_tree_bind: - vr->lpm_tree = lpm_tree; + fib->lpm_tree = lpm_tree; mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree); return err; } -static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, - unsigned char prefix_len, - u32 tb_id, - enum mlxsw_sp_l3proto proto) +static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id) { struct mlxsw_sp_vr *vr; - int err; tb_id = mlxsw_sp_fix_tb_id(tb_id); - vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id, proto); - if (!vr) { - vr = mlxsw_sp_vr_create(mlxsw_sp, prefix_len, tb_id, proto); - if (IS_ERR(vr)) - return vr; - } else { - struct mlxsw_sp_prefix_usage req_prefix_usage; - - mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, - &vr->fib->prefix_usage); - mlxsw_sp_prefix_usage_set(&req_prefix_usage, prefix_len); - /* Need to replace LPM tree in case new prefix is required. */ - err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, - &req_prefix_usage); - if (err) - return ERR_PTR(err); - } + vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id); + if (!vr) + vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id); return vr; } -static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) +static void mlxsw_sp_vr_put(struct mlxsw_sp_vr *vr) { - /* Destroy virtual router entity in case the associated FIB is empty - * and allow it to be used for other tables in future. Otherwise, - * check if some prefix usage did not disappear and change tree if - * that is the case. Note that in case new, smaller tree cannot be - * allocated, the original one will be kept being used. - */ - if (mlxsw_sp_prefix_usage_none(&vr->fib->prefix_usage)) - mlxsw_sp_vr_destroy(mlxsw_sp, vr); - else - mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, vr, - &vr->fib->prefix_usage); + if (!vr->rif_count && list_empty(&vr->fib4->node_list)) + mlxsw_sp_vr_destroy(vr); } static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) @@ -627,14 +773,14 @@ static struct mlxsw_sp_neigh_entry * mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) { struct mlxsw_sp_neigh_entry *neigh_entry; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; int err; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); - if (!r) + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev); + if (!rif) return ERR_PTR(-EINVAL); - neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, r->rif); + neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index); if (!neigh_entry) return ERR_PTR(-ENOMEM); @@ -642,7 +788,7 @@ mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n) if (err) goto err_neigh_entry_insert; - list_add(&neigh_entry->rif_list_node, &r->neigh_list); + list_add(&neigh_entry->rif_list_node, &rif->neigh_list); return neigh_entry; @@ -1050,22 +1196,22 @@ static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp) } static int mlxsw_sp_neigh_rif_flush(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_rif *r) + const struct mlxsw_sp_rif *rif) { char rauht_pl[MLXSW_REG_RAUHT_LEN]; mlxsw_reg_rauht_pack(rauht_pl, MLXSW_REG_RAUHT_OP_WRITE_DELETE_ALL, - r->rif, r->addr); + rif->rif_index, rif->addr); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl); } static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r) + struct mlxsw_sp_rif *rif) { struct mlxsw_sp_neigh_entry *neigh_entry, *tmp; - mlxsw_sp_neigh_rif_flush(mlxsw_sp, r); - list_for_each_entry_safe(neigh_entry, tmp, &r->neigh_list, + mlxsw_sp_neigh_rif_flush(mlxsw_sp, rif); + list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list, rif_list_node) mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry); } @@ -1082,7 +1228,7 @@ struct mlxsw_sp_nexthop { */ struct rhash_head ht_node; struct mlxsw_sp_nexthop_key key; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; u8 should_offload:1, /* set indicates this neigh is connected and * should be put to KVD linear area of this group. */ @@ -1109,7 +1255,7 @@ struct mlxsw_sp_nexthop_group { u16 ecmp_size; u16 count; struct mlxsw_sp_nexthop nexthops[0]; -#define nh_rif nexthops[0].r +#define nh_rif nexthops[0].rif }; static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = { @@ -1171,7 +1317,7 @@ mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_vr *vr, + const struct mlxsw_sp_fib *fib, u32 adj_index, u16 ecmp_size, u32 new_adj_index, u16 new_ecmp_size) @@ -1179,8 +1325,8 @@ static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, char raleu_pl[MLXSW_REG_RALEU_LEN]; mlxsw_reg_raleu_pack(raleu_pl, - (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, - adj_index, ecmp_size, new_adj_index, + (enum mlxsw_reg_ralxx_protocol) fib->proto, + fib->vr->id, adj_index, ecmp_size, new_adj_index, new_ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); } @@ -1190,14 +1336,14 @@ static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp, u32 old_adj_index, u16 old_ecmp_size) { struct mlxsw_sp_fib_entry *fib_entry; - struct mlxsw_sp_vr *vr = NULL; + struct mlxsw_sp_fib *fib = NULL; int err; list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) { - if (vr == fib_entry->fib_node->vr) + if (fib == fib_entry->fib_node->fib) continue; - vr = fib_entry->fib_node->vr; - err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, vr, + fib = fib_entry->fib_node->fib; + err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib, old_adj_index, old_ecmp_size, nh_grp->adj_index, @@ -1280,7 +1426,6 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, bool old_adj_index_valid; u32 old_adj_index; u16 old_ecmp_size; - int ret; int i; int err; @@ -1318,15 +1463,14 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, */ goto set_trap; - ret = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size); - if (ret < 0) { + err = mlxsw_sp_kvdl_alloc(mlxsw_sp, ecmp_size, &adj_index); + if (err) { /* We ran out of KVD linear space, just set the * trap and let everything flow through kernel. */ dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n"); goto set_trap; } - adj_index = ret; old_adj_index_valid = nh_grp->adj_index_valid; old_adj_index = nh_grp->adj_index; old_ecmp_size = nh_grp->ecmp_size; @@ -1399,22 +1543,22 @@ mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp, } static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh, - struct mlxsw_sp_rif *r) + struct mlxsw_sp_rif *rif) { - if (nh->r) + if (nh->rif) return; - nh->r = r; - list_add(&nh->rif_list_node, &r->nexthop_list); + nh->rif = rif; + list_add(&nh->rif_list_node, &rif->nexthop_list); } static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh) { - if (!nh->r) + if (!nh->rif) return; list_del(&nh->rif_list_node); - nh->r = NULL; + nh->rif = NULL; } static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp, @@ -1505,7 +1649,7 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, { struct net_device *dev = fib_nh->nh_dev; struct in_device *in_dev; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; int err; nh->nh_grp = nh_grp; @@ -1514,15 +1658,18 @@ static int mlxsw_sp_nexthop_init(struct mlxsw_sp *mlxsw_sp, if (err) return err; + if (!dev) + return 0; + in_dev = __in_dev_get_rtnl(dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) return 0; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!r) + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) return 0; - mlxsw_sp_nexthop_rif_init(nh, r); + mlxsw_sp_nexthop_rif_init(nh, rif); err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); if (err) @@ -1548,7 +1695,7 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_key key; struct mlxsw_sp_nexthop *nh; - struct mlxsw_sp_rif *r; + struct mlxsw_sp_rif *rif; if (mlxsw_sp->router.aborted) return; @@ -1558,13 +1705,13 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, if (WARN_ON_ONCE(!nh)) return; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); - if (!r) + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fib_nh->nh_dev); + if (!rif) return; switch (event) { case FIB_EVENT_NH_ADD: - mlxsw_sp_nexthop_rif_init(nh, r); + mlxsw_sp_nexthop_rif_init(nh, rif); mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh); break; case FIB_EVENT_NH_DEL: @@ -1577,11 +1724,11 @@ static void mlxsw_sp_nexthop_event(struct mlxsw_sp *mlxsw_sp, } static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r) + struct mlxsw_sp_rif *rif) { struct mlxsw_sp_nexthop *nh, *tmp; - list_for_each_entry_safe(nh, tmp, &r->nexthop_list, rif_list_node) { + list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) { mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh); mlxsw_sp_nexthop_rif_fini(nh); mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp); @@ -1699,7 +1846,7 @@ static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) { fib_entry->offloaded = true; - switch (fib_entry->fib_node->vr->proto) { + switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: fib_info_offload_inc(fib_entry->nh_group->key.fi); break; @@ -1711,7 +1858,7 @@ static void mlxsw_sp_fib_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) static void mlxsw_sp_fib_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) { - switch (fib_entry->fib_node->vr->proto) { + switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: fib_info_offload_dec(fib_entry->nh_group->key.fi); break; @@ -1751,8 +1898,8 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, enum mlxsw_reg_ralue_op op) { char ralue_pl[MLXSW_REG_RALUE_LEN]; + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; enum mlxsw_reg_ralue_trap_action trap_action; u16 trap_id = 0; u32 adjacency_index = 0; @@ -1772,8 +1919,8 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, } mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) vr->proto, op, - vr->id, fib_entry->fib_node->key.prefix_len, + (enum mlxsw_reg_ralxx_protocol) fib->proto, op, + fib->vr->id, fib_entry->fib_node->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); @@ -1784,27 +1931,28 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { - struct mlxsw_sp_rif *r = fib_entry->nh_group->nh_rif; + struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif; + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; enum mlxsw_reg_ralue_trap_action trap_action; char ralue_pl[MLXSW_REG_RALUE_LEN]; u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; u16 trap_id = 0; - u16 rif = 0; + u16 rif_index = 0; if (mlxsw_sp_fib_entry_should_offload(fib_entry)) { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP; - rif = r->rif; + rif_index = rif->rif_index; } else { trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP; trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) vr->proto, op, - vr->id, fib_entry->fib_node->key.prefix_len, + (enum mlxsw_reg_ralxx_protocol) fib->proto, op, + fib->vr->id, fib_entry->fib_node->key.prefix_len, *p_dip); - mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, rif); + mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, + rif_index); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } @@ -1812,13 +1960,13 @@ static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) { + struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib; char ralue_pl[MLXSW_REG_RALUE_LEN]; u32 *p_dip = (u32 *) fib_entry->fib_node->key.addr; - struct mlxsw_sp_vr *vr = fib_entry->fib_node->vr; mlxsw_reg_ralue_pack4(ralue_pl, - (enum mlxsw_reg_ralxx_protocol) vr->proto, op, - vr->id, fib_entry->fib_node->key.prefix_len, + (enum mlxsw_reg_ralxx_protocol) fib->proto, op, + fib->vr->id, fib_entry->fib_node->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -1845,7 +1993,7 @@ static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp, { int err = -EINVAL; - switch (fib_entry->fib_node->vr->proto) { + switch (fib_entry->fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: err = mlxsw_sp_fib_entry_op4(mlxsw_sp, fib_entry, op); break; @@ -1877,17 +2025,29 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp, { struct fib_info *fi = fen_info->fi; - if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { + switch (fen_info->type) { + case RTN_BROADCAST: /* fall through */ + case RTN_LOCAL: fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; - } - if (fen_info->type != RTN_UNICAST) - return -EINVAL; - if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) + case RTN_UNREACHABLE: /* fall through */ + case RTN_BLACKHOLE: /* fall through */ + case RTN_PROHIBIT: + /* Packets hitting these routes need to be trapped, but + * can do so with a lower priority than packets directed + * at the host, so use action type local instead of trap. + */ fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - else - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; - return 0; + return 0; + case RTN_UNICAST: + if (fi->fib_nh->nh_scope != RT_SCOPE_LINK) + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + else + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + return 0; + default: + return -EINVAL; + } } static struct mlxsw_sp_fib_entry * @@ -1996,7 +2156,7 @@ mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr, } static struct mlxsw_sp_fib_node * -mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr, +mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr, size_t addr_len, unsigned char prefix_len) { struct mlxsw_sp_fib_node *fib_node; @@ -2006,18 +2166,15 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_vr *vr, const void *addr, return NULL; INIT_LIST_HEAD(&fib_node->entry_list); - list_add(&fib_node->list, &vr->fib->node_list); + list_add(&fib_node->list, &fib->node_list); memcpy(fib_node->key.addr, addr, addr_len); fib_node->key.prefix_len = prefix_len; - mlxsw_sp_fib_node_insert(vr->fib, fib_node); - fib_node->vr = vr; return fib_node; } static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node) { - mlxsw_sp_fib_node_remove(fib_node->vr->fib, fib_node); list_del(&fib_node->list); WARN_ON(!list_empty(&fib_node->entry_list)); kfree(fib_node); @@ -2034,7 +2191,7 @@ mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node, static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) { unsigned char prefix_len = fib_node->key.prefix_len; - struct mlxsw_sp_fib *fib = fib_node->vr->fib; + struct mlxsw_sp_fib *fib = fib_node->fib; if (fib->prefix_ref_count[prefix_len]++ == 0) mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); @@ -2043,32 +2200,98 @@ static void mlxsw_sp_fib_node_prefix_inc(struct mlxsw_sp_fib_node *fib_node) static void mlxsw_sp_fib_node_prefix_dec(struct mlxsw_sp_fib_node *fib_node) { unsigned char prefix_len = fib_node->key.prefix_len; - struct mlxsw_sp_fib *fib = fib_node->vr->fib; + struct mlxsw_sp_fib *fib = fib_node->fib; if (--fib->prefix_ref_count[prefix_len] == 0) mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); } +static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node, + struct mlxsw_sp_fib *fib) +{ + struct mlxsw_sp_prefix_usage req_prefix_usage; + struct mlxsw_sp_lpm_tree *lpm_tree; + int err; + + err = mlxsw_sp_fib_node_insert(fib, fib_node); + if (err) + return err; + fib_node->fib = fib; + + mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &fib->prefix_usage); + mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len); + + if (!mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { + err = mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, + &req_prefix_usage); + if (err) + goto err_tree_check; + } else { + lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage, + fib->proto); + if (IS_ERR(lpm_tree)) + return PTR_ERR(lpm_tree); + fib->lpm_tree = lpm_tree; + err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib); + if (err) + goto err_tree_bind; + } + + mlxsw_sp_fib_node_prefix_inc(fib_node); + + return 0; + +err_tree_bind: + fib->lpm_tree = NULL; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); +err_tree_check: + fib_node->fib = NULL; + mlxsw_sp_fib_node_remove(fib, fib_node); + return err; +} + +static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_node *fib_node) +{ + struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree; + struct mlxsw_sp_fib *fib = fib_node->fib; + + mlxsw_sp_fib_node_prefix_dec(fib_node); + + if (mlxsw_sp_prefix_usage_none(&fib->prefix_usage)) { + mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib); + fib->lpm_tree = NULL; + mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree); + } else { + mlxsw_sp_vr_lpm_tree_check(mlxsw_sp, fib, &fib->prefix_usage); + } + + fib_node->fib = NULL; + mlxsw_sp_fib_node_remove(fib, fib_node); +} + static struct mlxsw_sp_fib_node * mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib_node *fib_node; + struct mlxsw_sp_fib *fib; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, - MLXSW_SP_L3_PROTO_IPV4); + vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->tb_id); if (IS_ERR(vr)) return ERR_CAST(vr); + fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4); - fib_node = mlxsw_sp_fib_node_lookup(vr->fib, &fen_info->dst, + fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst, sizeof(fen_info->dst), fen_info->dst_len); if (fib_node) return fib_node; - fib_node = mlxsw_sp_fib_node_create(vr, &fen_info->dst, + fib_node = mlxsw_sp_fib_node_create(fib, &fen_info->dst, sizeof(fen_info->dst), fen_info->dst_len); if (!fib_node) { @@ -2076,22 +2299,29 @@ mlxsw_sp_fib4_node_get(struct mlxsw_sp *mlxsw_sp, goto err_fib_node_create; } + err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib); + if (err) + goto err_fib_node_init; + return fib_node; +err_fib_node_init: + mlxsw_sp_fib_node_destroy(fib_node); err_fib_node_create: - mlxsw_sp_vr_put(mlxsw_sp, vr); + mlxsw_sp_vr_put(vr); return ERR_PTR(err); } static void mlxsw_sp_fib4_node_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - struct mlxsw_sp_vr *vr = fib_node->vr; + struct mlxsw_sp_vr *vr = fib_node->fib->vr; if (!list_empty(&fib_node->entry_list)) return; + mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node); mlxsw_sp_fib_node_destroy(fib_node); - mlxsw_sp_vr_put(mlxsw_sp, vr); + mlxsw_sp_vr_put(vr); } static struct mlxsw_sp_fib_entry * @@ -2236,8 +2466,6 @@ static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp, if (err) goto err_fib4_node_entry_add; - mlxsw_sp_fib_node_prefix_inc(fib_node); - return 0; err_fib4_node_entry_add: @@ -2251,7 +2479,6 @@ mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node; - mlxsw_sp_fib_node_prefix_dec(fib_node); mlxsw_sp_fib4_node_entry_del(mlxsw_sp, fib_node, fib_entry); mlxsw_sp_fib4_node_list_remove(fib_entry); } @@ -2340,9 +2567,7 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) { char ralta_pl[MLXSW_REG_RALTA_LEN]; char ralst_pl[MLXSW_REG_RALST_LEN]; - char raltb_pl[MLXSW_REG_RALTB_LEN]; - char ralue_pl[MLXSW_REG_RALUE_LEN]; - int err; + int i, err; mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, MLXSW_SP_LPM_TREE_MIN); @@ -2355,16 +2580,33 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) if (err) return err; - mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, - MLXSW_SP_LPM_TREE_MIN); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); - if (err) - return err; + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { + struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i]; + char raltb_pl[MLXSW_REG_RALTB_LEN]; + char ralue_pl[MLXSW_REG_RALUE_LEN]; - mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, - MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); - mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); + if (!mlxsw_sp_vr_is_used(vr)) + continue; + + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_SP_LPM_TREE_MIN); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), + raltb_pl); + if (err) + return err; + + mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, + MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0, + 0); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), + ralue_pl); + if (err) + return err; + } + + return 0; } static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, @@ -2390,7 +2632,7 @@ static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_node *fib_node) { - switch (fib_node->vr->proto) { + switch (fib_node->fib->proto) { case MLXSW_SP_L3_PROTO_IPV4: mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node); break; @@ -2400,26 +2642,32 @@ static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp, } } -static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) +static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_vr *vr, + enum mlxsw_sp_l3proto proto) { + struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto); struct mlxsw_sp_fib_node *fib_node, *tmp; - struct mlxsw_sp_vr *vr; + + list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) { + bool do_break = &tmp->list == &fib->node_list; + + mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node); + if (do_break) + break; + } +} + +static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp) +{ int i; for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) { - vr = &mlxsw_sp->router.vrs[i]; + struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[i]; - if (!vr->used) + if (!mlxsw_sp_vr_is_used(vr)) continue; - - list_for_each_entry_safe(fib_node, tmp, &vr->fib->node_list, - list) { - bool do_break = &tmp->list == &vr->fib->node_list; - - mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node); - if (do_break) - break; - } + mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4); } } @@ -2437,74 +2685,11 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); } -static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) -{ - char ritr_pl[MLXSW_REG_RITR_LEN]; - int err; - - mlxsw_reg_ritr_rif_pack(ritr_pl, rif); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); - if (WARN_ON_ONCE(err)) - return err; - - mlxsw_reg_ritr_enable_set(ritr_pl, false); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); -} - -void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_rif *r) -{ - mlxsw_sp_router_rif_disable(mlxsw_sp, r->rif); - mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, r); - mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, r); -} - -static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) -{ - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - u64 max_rifs; - int err; - - if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) - return -EIO; - - max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); - mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), - GFP_KERNEL); - if (!mlxsw_sp->rifs) - return -ENOMEM; - - mlxsw_reg_rgcr_pack(rgcr_pl, true); - mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - if (err) - goto err_rgcr_fail; - - return 0; - -err_rgcr_fail: - kfree(mlxsw_sp->rifs); - return err; -} - -static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) -{ - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - int i; - - mlxsw_reg_rgcr_pack(rgcr_pl, false); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); - - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) - WARN_ON_ONCE(mlxsw_sp->rifs[i]); - - kfree(mlxsw_sp->rifs); -} - struct mlxsw_sp_fib_event_work { struct work_struct work; union { struct fib_entry_notifier_info fen_info; + struct fib_rule_notifier_info fr_info; struct fib_nh_notifier_info fnh_info; }; struct mlxsw_sp *mlxsw_sp; @@ -2516,6 +2701,7 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) struct mlxsw_sp_fib_event_work *fib_work = container_of(work, struct mlxsw_sp_fib_event_work, work); struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp; + struct fib_rule *rule; bool replace, append; int err; @@ -2539,7 +2725,10 @@ static void mlxsw_sp_router_fib_event_work(struct work_struct *work) break; case FIB_EVENT_RULE_ADD: /* fall through */ case FIB_EVENT_RULE_DEL: - mlxsw_sp_router_fib4_abort(mlxsw_sp); + rule = fib_work->fr_info.rule; + if (!fib4_rule_default(rule) && !rule->l3mdev) + mlxsw_sp_router_fib4_abort(mlxsw_sp); + fib_rule_put(rule); break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: @@ -2582,6 +2771,11 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, */ fib_info_hold(fib_work->fen_info.fi); break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + memcpy(&fib_work->fr_info, ptr, sizeof(fib_work->fr_info)); + fib_rule_get(fib_work->fr_info.rule); + break; case FIB_EVENT_NH_ADD: /* fall through */ case FIB_EVENT_NH_DEL: memcpy(&fib_work->fnh_info, ptr, sizeof(fib_work->fnh_info)); @@ -2594,6 +2788,707 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb, return NOTIFY_DONE; } +static struct mlxsw_sp_rif * +mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *dev) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) + return mlxsw_sp->rifs[i]; + + return NULL; +} + +static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (WARN_ON_ONCE(err)) + return err; + + mlxsw_reg_ritr_enable_set(ritr_pl, false); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index); + mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif); + mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif); +} + +static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, + const struct in_device *in_dev, + unsigned long event) +{ + switch (event) { + case NETDEV_UP: + if (!rif) + return true; + return false; + case NETDEV_DOWN: + if (rif && !in_dev->ifa_list && + !netif_is_l3_slave(rif->dev)) + return true; + /* It is possible we already removed the RIF ourselves + * if it was assigned to a netdev that is now a bridge + * or LAG slave. + */ + return false; + } + + return false; +} + +#define MLXSW_SP_INVALID_INDEX_RIF 0xffff +static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) +{ + int i; + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + if (!mlxsw_sp->rifs[i]) + return i; + + return MLXSW_SP_INVALID_INDEX_RIF; +} + +static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, + bool *p_lagged, u16 *p_system_port) +{ + u8 local_port = mlxsw_sp_vport->local_port; + + *p_lagged = mlxsw_sp_vport->lagged; + *p_system_port = *p_lagged ? mlxsw_sp_vport->lag_id : local_port; +} + +static int mlxsw_sp_vport_rif_sp_op(struct mlxsw_sp_port *mlxsw_sp_vport, + u16 vr_id, struct net_device *l3_dev, + u16 rif_index, bool create) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + bool lagged = mlxsw_sp_vport->lagged; + char ritr_pl[MLXSW_REG_RITR_LEN]; + u16 system_port; + + mlxsw_reg_ritr_pack(ritr_pl, create, MLXSW_REG_RITR_SP_IF, rif_index, + vr_id, l3_dev->mtu, l3_dev->dev_addr); + + mlxsw_sp_vport_rif_sp_attr_get(mlxsw_sp_vport, &lagged, &system_port); + mlxsw_reg_ritr_sp_if_pack(ritr_pl, lagged, system_port, + mlxsw_sp_vport_vid_get(mlxsw_sp_vport)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport); + +static u16 mlxsw_sp_rif_sp_to_fid(u16 rif_index) +{ + return MLXSW_SP_RFID_BASE + rif_index; +} + +static struct mlxsw_sp_fid * +mlxsw_sp_rfid_alloc(u16 fid, struct net_device *l3_dev) +{ + struct mlxsw_sp_fid *f; + + f = kzalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + + f->leave = mlxsw_sp_vport_rif_sp_leave; + f->ref_count = 0; + f->dev = l3_dev; + f->fid = fid; + + return f; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_rif_alloc(u16 rif_index, u16 vr_id, struct net_device *l3_dev, + struct mlxsw_sp_fid *f) +{ + struct mlxsw_sp_rif *rif; + + rif = kzalloc(sizeof(*rif), GFP_KERNEL); + if (!rif) + return NULL; + + INIT_LIST_HEAD(&rif->nexthop_list); + INIT_LIST_HEAD(&rif->neigh_list); + ether_addr_copy(rif->addr, l3_dev->dev_addr); + rif->mtu = l3_dev->mtu; + rif->vr_id = vr_id; + rif->dev = l3_dev; + rif->rif_index = rif_index; + rif->f = f; + + return rif; +} + +u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif) +{ + return rif->rif_index; +} + +int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif) +{ + return rif->dev->ifindex; +} + +static struct mlxsw_sp_rif * +mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + u32 tb_id = l3mdev_fib_table(l3_dev); + struct mlxsw_sp_vr *vr; + struct mlxsw_sp_fid *f; + struct mlxsw_sp_rif *rif; + u16 fid, rif_index; + int err; + + rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif_index == MLXSW_SP_INVALID_INDEX_RIF) + return ERR_PTR(-ERANGE); + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); + if (IS_ERR(vr)) + return ERR_CAST(vr); + + err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, + rif_index, true); + if (err) + goto err_vport_rif_sp_op; + + fid = mlxsw_sp_rif_sp_to_fid(rif_index); + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, true); + if (err) + goto err_rif_fdb_op; + + f = mlxsw_sp_rfid_alloc(fid, l3_dev); + if (!f) { + err = -ENOMEM; + goto err_rfid_alloc; + } + + rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f); + if (!rif) { + err = -ENOMEM; + goto err_rif_alloc; + } + + if (devlink_dpipe_table_counter_enabled(priv_to_devlink(mlxsw_sp->core), + MLXSW_SP_DPIPE_TABLE_NAME_ERIF)) { + err = mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, + MLXSW_SP_RIF_COUNTER_EGRESS); + if (err) + netdev_dbg(mlxsw_sp_vport->dev, + "Counter alloc Failed err=%d\n", err); + } + + f->rif = rif; + mlxsw_sp->rifs[rif_index] = rif; + vr->rif_count++; + + return rif; + +err_rif_alloc: + kfree(f); +err_rfid_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); +err_rif_fdb_op: + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index, + false); +err_vport_rif_sp_op: + mlxsw_sp_vr_put(vr); + return ERR_PTR(err); +} + +static void mlxsw_sp_vport_rif_sp_destroy(struct mlxsw_sp_port *mlxsw_sp_vport, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id]; + struct net_device *l3_dev = rif->dev; + struct mlxsw_sp_fid *f = rif->f; + u16 rif_index = rif->rif_index; + u16 fid = f->fid; + + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); + + mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS); + mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_INGRESS); + + vr->rif_count--; + mlxsw_sp->rifs[rif_index] = NULL; + f->rif = NULL; + + kfree(rif); + + kfree(f); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, fid, false); + + mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, vr->id, l3_dev, rif_index, + false); + mlxsw_sp_vr_put(vr); +} + +static int mlxsw_sp_vport_rif_sp_join(struct mlxsw_sp_port *mlxsw_sp_vport, + struct net_device *l3_dev) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_vport->mlxsw_sp; + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (!rif) { + rif = mlxsw_sp_vport_rif_sp_create(mlxsw_sp_vport, l3_dev); + if (IS_ERR(rif)) + return PTR_ERR(rif); + } + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, rif->f); + rif->f->ref_count++; + + netdev_dbg(mlxsw_sp_vport->dev, "Joined FID=%d\n", rif->f->fid); + + return 0; +} + +static void mlxsw_sp_vport_rif_sp_leave(struct mlxsw_sp_port *mlxsw_sp_vport) +{ + struct mlxsw_sp_fid *f = mlxsw_sp_vport_fid_get(mlxsw_sp_vport); + + netdev_dbg(mlxsw_sp_vport->dev, "Left FID=%d\n", f->fid); + + mlxsw_sp_vport_fid_set(mlxsw_sp_vport, NULL); + if (--f->ref_count == 0) + mlxsw_sp_vport_rif_sp_destroy(mlxsw_sp_vport, f->rif); +} + +static int mlxsw_sp_inetaddr_vport_event(struct net_device *l3_dev, + struct net_device *port_dev, + unsigned long event, u16 vid) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev); + struct mlxsw_sp_port *mlxsw_sp_vport; + + mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); + if (WARN_ON(!mlxsw_sp_vport)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_vport_rif_sp_join(mlxsw_sp_vport, l3_dev); + case NETDEV_DOWN: + mlxsw_sp_vport_rif_sp_leave(mlxsw_sp_vport); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev, + unsigned long event) +{ + if (netif_is_bridge_port(port_dev) || + netif_is_lag_port(port_dev) || + netif_is_ovs_port(port_dev)) + return 0; + + return mlxsw_sp_inetaddr_vport_event(port_dev, port_dev, event, 1); +} + +static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev, + struct net_device *lag_dev, + unsigned long event, u16 vid) +{ + struct net_device *port_dev; + struct list_head *iter; + int err; + + netdev_for_each_lower_dev(lag_dev, port_dev, iter) { + if (mlxsw_sp_port_dev_check(port_dev)) { + err = mlxsw_sp_inetaddr_vport_event(l3_dev, port_dev, + event, vid); + if (err) + return err; + } + } + + return 0; +} + +static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev, + unsigned long event) +{ + if (netif_is_bridge_port(lag_dev)) + return 0; + + return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event, 1); +} + +static struct mlxsw_sp_fid *mlxsw_sp_bridge_fid_get(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + u16 fid; + + if (is_vlan_dev(l3_dev)) + fid = vlan_dev_vlan_id(l3_dev); + else if (mlxsw_sp->master_bridge.dev == l3_dev) + fid = 1; + else + return mlxsw_sp_vfid_find(mlxsw_sp, l3_dev); + + return mlxsw_sp_fid_find(mlxsw_sp, fid); +} + +static u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp) +{ + return mlxsw_core_max_ports(mlxsw_sp->core) + 1; +} + +static enum mlxsw_flood_table_type mlxsw_sp_flood_table_type_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? MLXSW_REG_SFGC_TABLE_TYPE_FID : + MLXSW_REG_SFGC_TABLE_TYPE_FID_OFFEST; +} + +static u16 mlxsw_sp_flood_table_index_get(u16 fid) +{ + return mlxsw_sp_fid_is_vfid(fid) ? mlxsw_sp_fid_to_vfid(fid) : fid; +} + +static int mlxsw_sp_router_port_flood_set(struct mlxsw_sp *mlxsw_sp, u16 fid, + bool set) +{ + u8 router_port = mlxsw_sp_router_port(mlxsw_sp); + enum mlxsw_flood_table_type table_type; + char *sftr_pl; + u16 index; + int err; + + sftr_pl = kmalloc(MLXSW_REG_SFTR_LEN, GFP_KERNEL); + if (!sftr_pl) + return -ENOMEM; + + table_type = mlxsw_sp_flood_table_type_get(fid); + index = mlxsw_sp_flood_table_index_get(fid); + mlxsw_reg_sftr_pack(sftr_pl, MLXSW_SP_FLOOD_TABLE_BC, index, table_type, + 1, router_port, set); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sftr), sftr_pl); + + kfree(sftr_pl); + return err; +} + +static enum mlxsw_reg_ritr_if_type mlxsw_sp_rif_type_get(u16 fid) +{ + if (mlxsw_sp_fid_is_vfid(fid)) + return MLXSW_REG_RITR_FID_IF; + else + return MLXSW_REG_RITR_VLAN_IF; +} + +static int mlxsw_sp_rif_bridge_op(struct mlxsw_sp *mlxsw_sp, u16 vr_id, + struct net_device *l3_dev, + u16 fid, u16 rif, + bool create) +{ + enum mlxsw_reg_ritr_if_type rif_type; + char ritr_pl[MLXSW_REG_RITR_LEN]; + + rif_type = mlxsw_sp_rif_type_get(fid); + mlxsw_reg_ritr_pack(ritr_pl, create, rif_type, rif, vr_id, l3_dev->mtu, + l3_dev->dev_addr); + mlxsw_reg_ritr_fid_set(ritr_pl, rif_type, fid); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev, + struct mlxsw_sp_fid *f) +{ + u32 tb_id = l3mdev_fib_table(l3_dev); + struct mlxsw_sp_rif *rif; + struct mlxsw_sp_vr *vr; + u16 rif_index; + int err; + + rif_index = mlxsw_sp_avail_rif_get(mlxsw_sp); + if (rif_index == MLXSW_SP_INVALID_INDEX_RIF) + return -ERANGE; + + vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN); + if (IS_ERR(vr)) + return PTR_ERR(vr); + + err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); + if (err) + goto err_port_flood_set; + + err = mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, + rif_index, true); + if (err) + goto err_rif_bridge_op; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, true); + if (err) + goto err_rif_fdb_op; + + rif = mlxsw_sp_rif_alloc(rif_index, vr->id, l3_dev, f); + if (!rif) { + err = -ENOMEM; + goto err_rif_alloc; + } + + f->rif = rif; + mlxsw_sp->rifs[rif_index] = rif; + vr->rif_count++; + + netdev_dbg(l3_dev, "RIF=%d created\n", rif_index); + + return 0; + +err_rif_alloc: + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); +err_rif_fdb_op: + mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index, + false); +err_rif_bridge_op: + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); +err_port_flood_set: + mlxsw_sp_vr_put(vr); + return err; +} + +void mlxsw_sp_rif_bridge_destroy(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif) +{ + struct mlxsw_sp_vr *vr = &mlxsw_sp->router.vrs[rif->vr_id]; + struct net_device *l3_dev = rif->dev; + struct mlxsw_sp_fid *f = rif->f; + u16 rif_index = rif->rif_index; + + mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); + + vr->rif_count--; + mlxsw_sp->rifs[rif_index] = NULL; + f->rif = NULL; + + kfree(rif); + + mlxsw_sp_rif_fdb_op(mlxsw_sp, l3_dev->dev_addr, f->fid, false); + + mlxsw_sp_rif_bridge_op(mlxsw_sp, vr->id, l3_dev, f->fid, rif_index, + false); + + mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, false); + + mlxsw_sp_vr_put(vr); + + netdev_dbg(l3_dev, "RIF=%d destroyed\n", rif_index); +} + +static int mlxsw_sp_inetaddr_bridge_event(struct net_device *l3_dev, + struct net_device *br_dev, + unsigned long event) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); + struct mlxsw_sp_fid *f; + + /* FID can either be an actual FID if the L3 device is the + * VLAN-aware bridge or a VLAN device on top. Otherwise, the + * L3 device is a VLAN-unaware bridge and we get a vFID. + */ + f = mlxsw_sp_bridge_fid_get(mlxsw_sp, l3_dev); + if (WARN_ON(!f)) + return -EINVAL; + + switch (event) { + case NETDEV_UP: + return mlxsw_sp_rif_bridge_create(mlxsw_sp, l3_dev, f); + case NETDEV_DOWN: + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); + break; + } + + return 0; +} + +static int mlxsw_sp_inetaddr_vlan_event(struct net_device *vlan_dev, + unsigned long event) +{ + struct net_device *real_dev = vlan_dev_real_dev(vlan_dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(vlan_dev); + u16 vid = vlan_dev_vlan_id(vlan_dev); + + if (mlxsw_sp_port_dev_check(real_dev)) + return mlxsw_sp_inetaddr_vport_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_lag_master(real_dev)) + return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event, + vid); + else if (netif_is_bridge_master(real_dev) && + mlxsw_sp->master_bridge.dev == real_dev) + return mlxsw_sp_inetaddr_bridge_event(vlan_dev, real_dev, + event); + + return 0; +} + +static int __mlxsw_sp_inetaddr_event(struct net_device *dev, + unsigned long event) +{ + if (mlxsw_sp_port_dev_check(dev)) + return mlxsw_sp_inetaddr_port_event(dev, event); + else if (netif_is_lag_master(dev)) + return mlxsw_sp_inetaddr_lag_event(dev, event); + else if (netif_is_bridge_master(dev)) + return mlxsw_sp_inetaddr_bridge_event(dev, dev, event); + else if (is_vlan_dev(dev)) + return mlxsw_sp_inetaddr_vlan_event(dev, event); + else + return 0; +} + +int mlxsw_sp_inetaddr_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct in_ifaddr *ifa = (struct in_ifaddr *) ptr; + struct net_device *dev = ifa->ifa_dev->dev; + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err = 0; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + goto out; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!mlxsw_sp_rif_should_config(rif, ifa->ifa_dev, event)) + goto out; + + err = __mlxsw_sp_inetaddr_event(dev, event); +out: + return notifier_from_errno(err); +} + +static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + const char *mac, int mtu) +{ + char ritr_pl[MLXSW_REG_RITR_LEN]; + int err; + + mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); + if (err) + return err; + + mlxsw_reg_ritr_mtu_set(ritr_pl, mtu); + mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac); + mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl); +} + +int mlxsw_sp_netdevice_router_port_event(struct net_device *dev) +{ + struct mlxsw_sp *mlxsw_sp; + struct mlxsw_sp_rif *rif; + int err; + + mlxsw_sp = mlxsw_sp_lower_get(dev); + if (!mlxsw_sp) + return 0; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); + if (!rif) + return 0; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, false); + if (err) + return err; + + err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr, + dev->mtu); + if (err) + goto err_rif_edit; + + err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, rif->f->fid, true); + if (err) + goto err_rif_fdb_op; + + ether_addr_copy(rif->addr, dev->dev_addr); + rif->mtu = dev->mtu; + + netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index); + + return 0; + +err_rif_fdb_op: + mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu); +err_rif_edit: + mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, rif->f->fid, true); + return err; +} + +static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + struct mlxsw_sp_rif *rif; + + /* If netdev is already associated with a RIF, then we need to + * destroy it and create a new one with the new virtual router ID. + */ + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (rif) + __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN); + + return __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_UP); +} + +static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp, + struct net_device *l3_dev) +{ + struct mlxsw_sp_rif *rif; + + rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev); + if (!rif) + return; + __mlxsw_sp_inetaddr_event(l3_dev, NETDEV_DOWN); +} + +int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event, + struct netdev_notifier_changeupper_info *info) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev); + int err = 0; + + if (!mlxsw_sp) + return 0; + + switch (event) { + case NETDEV_PRECHANGEUPPER: + return 0; + case NETDEV_CHANGEUPPER: + if (info->linking) + err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev); + else + mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev); + break; + } + + return err; +} + static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) { struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); @@ -2606,6 +3501,48 @@ static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb) mlxsw_sp_router_fib_flush(mlxsw_sp); } +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + u64 max_rifs; + int err; + + if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS)) + return -EIO; + + max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); + mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *), + GFP_KERNEL); + if (!mlxsw_sp->rifs) + return -ENOMEM; + + mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + if (err) + goto err_rgcr_fail; + + return 0; + +err_rgcr_fail: + kfree(mlxsw_sp->rifs); + return err; +} + +static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int i; + + mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + + for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); + + kfree(mlxsw_sp->rifs); +} + int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { int err; @@ -2625,7 +3562,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_nexthop_group_ht_init; - mlxsw_sp_lpm_init(mlxsw_sp); + err = mlxsw_sp_lpm_init(mlxsw_sp); + if (err) + goto err_lpm_init; + err = mlxsw_sp_vrs_init(mlxsw_sp); if (err) goto err_vrs_init; @@ -2647,6 +3587,8 @@ err_register_fib_notifier: err_neigh_init: mlxsw_sp_vrs_fini(mlxsw_sp); err_vrs_init: + mlxsw_sp_lpm_fini(mlxsw_sp); +err_lpm_init: rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); err_nexthop_group_ht_init: rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); @@ -2660,6 +3602,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) unregister_fib_notifier(&mlxsw_sp->fib_nb); mlxsw_sp_neigh_fini(mlxsw_sp); mlxsw_sp_vrs_fini(mlxsw_sp); + mlxsw_sp_lpm_fini(mlxsw_sp); rhashtable_destroy(&mlxsw_sp->router.nexthop_group_ht); rhashtable_destroy(&mlxsw_sp->router.nexthop_ht); __mlxsw_sp_router_fini(mlxsw_sp); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h new file mode 100644 index 000000000000..c3095fef6697 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -0,0 +1,58 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h + * Copyright (c) 2017 Mellanox Technologies. All rights reserved. + * Copyright (c) 2017 Arkadi Sharshevsky <arkadis@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _MLXSW_ROUTER_H_ +#define _MLXSW_ROUTER_H_ + +#include "spectrum.h" + +enum mlxsw_sp_rif_counter_dir { + MLXSW_SP_RIF_COUNTER_INGRESS, + MLXSW_SP_RIF_COUNTER_EGRESS, +}; + +u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); +int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir, + u64 *cnt); +void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir); +int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_rif *rif, + enum mlxsw_sp_rif_counter_dir dir); + +#endif /* _MLXSW_ROUTER_H_*/ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 598727d578c1..0d8411f1f954 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -568,8 +568,8 @@ void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f) list_del(&f->list); - if (f->r) - mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->r); + if (f->rif) + mlxsw_sp_rif_bridge_destroy(mlxsw_sp, f->rif); kfree(f); @@ -745,27 +745,6 @@ err_port_allow_untagged_set: return err; } -static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, bool is_member, - bool untagged) -{ - u16 vid, vid_e; - int err; - - for (vid = vid_begin; vid <= vid_end; - vid += MLXSW_REG_SPVM_REC_MAX_COUNT) { - vid_e = min((u16) (vid + MLXSW_REG_SPVM_REC_MAX_COUNT - 1), - vid_end); - - err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid, vid_e, - is_member, untagged); - if (err) - return err; - } - - return 0; -} - static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool learn_enable) @@ -804,8 +783,8 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, return err; } - err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, - true, flag_untagged); + err = mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid_begin, vid_end, + true, flag_untagged); if (err) { netdev_err(dev, "Unable to add VIDs %d-%d\n", vid_begin, vid_end); @@ -863,8 +842,8 @@ err_port_vid_learning_set: if (old_pvid != mlxsw_sp_port->pvid) mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid); err_port_pvid_set: - __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, - false); + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid_begin, vid_end, + false, false); err_port_vlans_set: mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); return err; @@ -1012,7 +991,7 @@ static int mlxsw_sp_port_smid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mid, mlxsw_reg_smid_pack(smid_pl, mid, mlxsw_sp_port->local_port, add); if (clear_all_ports) { - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sp->core); i++) if (mlxsw_sp->ports[i]) mlxsw_reg_smid_port_mask_set(smid_pl, i, 1); } @@ -1171,8 +1150,8 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, if (pvid >= vid_begin && pvid <= vid_end) mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); - __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, - false); + mlxsw_sp_port_vlan_set(mlxsw_sp_port, vid_begin, vid_end, + false, false); mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index ec1e886d4566..3b0f72455681 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -1321,7 +1321,7 @@ static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx) { int i; - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) + for (i = 1; i < mlxsw_core_max_ports(mlxsw_sx->core); i++) if (mlxsw_sx_port_created(mlxsw_sx, i)) mlxsw_sx_port_remove(mlxsw_sx, i); kfree(mlxsw_sx->ports); @@ -1329,17 +1329,18 @@ static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx) static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx) { + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sx->core); size_t alloc_size; u8 module, width; int i; int err; - alloc_size = sizeof(struct mlxsw_sx_port *) * MLXSW_PORT_MAX_PORTS; + alloc_size = sizeof(struct mlxsw_sx_port *) * max_ports; mlxsw_sx->ports = kzalloc(alloc_size, GFP_KERNEL); if (!mlxsw_sx->ports) return -ENOMEM; - for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) { + for (i = 1; i < max_ports; i++) { err = mlxsw_sx_port_module_info_get(mlxsw_sx, i, &module, &width); if (err) diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 02ea48b15eb5..e008fdbed20f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -55,6 +55,7 @@ enum { MLXSW_TRAP_ID_IGMP_V2_LEAVE = 0x33, MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, MLXSW_TRAP_ID_PKT_SAMPLE = 0x38, + MLXSW_TRAP_ID_FID_MISS = 0x3D, MLXSW_TRAP_ID_ARPBC = 0x50, MLXSW_TRAP_ID_ARPUC = 0x51, MLXSW_TRAP_ID_MTUERROR = 0x52, |