diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-18 02:26:30 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-18 02:26:30 +0300 |
commit | a7fd20d1c476af4563e66865213474a2f9f473a4 (patch) | |
tree | fb1399e2f82842450245fb058a8fb23c52865f43 /drivers/net/ethernet/mellanox | |
parent | b80fed9595513384424cd141923c9161c4b5021b (diff) | |
parent | 917fa5353da05e8a0045b8acacba8d50400d5b12 (diff) | |
download | linux-a7fd20d1c476af4563e66865213474a2f9f473a4.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Support SPI based w5100 devices, from Akinobu Mita.
2) Partial Segmentation Offload, from Alexander Duyck.
3) Add GMAC4 support to stmmac driver, from Alexandre TORGUE.
4) Allow cls_flower stats offload, from Amir Vadai.
5) Implement bpf blinding, from Daniel Borkmann.
6) Optimize _ASYNC_ bit twiddling on sockets, unless the socket is
actually using FASYNC these atomics are superfluous. From Eric
Dumazet.
7) Run TCP more preemptibly, also from Eric Dumazet.
8) Support LED blinking, EEPROM dumps, and rxvlan offloading in mlx5e
driver, from Gal Pressman.
9) Allow creating ppp devices via rtnetlink, from Guillaume Nault.
10) Improve BPF usage documentation, from Jesper Dangaard Brouer.
11) Support tunneling offloads in qed, from Manish Chopra.
12) aRFS offloading in mlx5e, from Maor Gottlieb.
13) Add RFS and RPS support to SCTP protocol, from Marcelo Ricardo
Leitner.
14) Add MSG_EOR support to TCP, this allows controlling packet
coalescing on application record boundaries for more accurate
socket timestamp sampling. From Martin KaFai Lau.
15) Fix alignment of 64-bit netlink attributes across the board, from
Nicolas Dichtel.
16) Per-vlan stats in bridging, from Nikolay Aleksandrov.
17) Several conversions of drivers to ethtool ksettings, from Philippe
Reynes.
18) Checksum neutral ILA in ipv6, from Tom Herbert.
19) Factorize all of the various marvell dsa drivers into one, from
Vivien Didelot
20) Add VF support to qed driver, from Yuval Mintz"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1649 commits)
Revert "phy dp83867: Fix compilation with CONFIG_OF_MDIO=m"
Revert "phy dp83867: Make rgmii parameters optional"
r8169: default to 64-bit DMA on recent PCIe chips
phy dp83867: Make rgmii parameters optional
phy dp83867: Fix compilation with CONFIG_OF_MDIO=m
bpf: arm64: remove callee-save registers use for tmp registers
asix: Fix offset calculation in asix_rx_fixup() causing slow transmissions
switchdev: pass pointer to fib_info instead of copy
net_sched: close another race condition in tcf_mirred_release()
tipc: fix nametable publication field in nl compat
drivers: net: Don't print unpopulated net_device name
qed: add support for dcbx.
ravb: Add missing free_irq() calls to ravb_close()
qed: Remove a stray tab
net: ethernet: fec-mpc52xx: use phy_ethtool_{get|set}_link_ksettings
net: ethernet: fec-mpc52xx: use phydev from struct net_device
bpf, doc: fix typo on bpf_asm descriptions
stmmac: hardware TX COE doesn't work when force_thresh_dma_mode is set
net: ethernet: fs-enet: use phy_ethtool_{get|set}_link_ksettings
net: ethernet: fs-enet: use phydev from struct net_device
...
Diffstat (limited to 'drivers/net/ethernet/mellanox')
47 files changed, 8967 insertions, 2339 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 0c51c69f802f..249a4584401a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -576,41 +576,48 @@ out: return res; } -/* - * Handling for queue buffers -- we allocate a bunch of memory and - * register it in a memory region at HCA virtual address 0. If the - * requested size is > max_direct, we split the allocation into - * multiple pages, so we don't require too much contiguous memory. - */ -int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, - struct mlx4_buf *buf, gfp_t gfp) +static int mlx4_buf_direct_alloc(struct mlx4_dev *dev, int size, + struct mlx4_buf *buf, gfp_t gfp) { dma_addr_t t; - if (size <= max_direct) { - buf->nbufs = 1; - buf->npages = 1; - buf->page_shift = get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev, - size, &t, gfp); - if (!buf->direct.buf) - return -ENOMEM; + buf->nbufs = 1; + buf->npages = 1; + buf->page_shift = get_order(size) + PAGE_SHIFT; + buf->direct.buf = + dma_zalloc_coherent(&dev->persist->pdev->dev, + size, &t, gfp); + if (!buf->direct.buf) + return -ENOMEM; - buf->direct.map = t; + buf->direct.map = t; - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } - memset(buf->direct.buf, 0, size); + return 0; +} + +/* Handling for queue buffers -- we allocate a bunch of memory and + * register it in a memory region at HCA virtual address 0. If the + * requested size is > max_direct, we split the allocation into + * multiple pages, so we don't require too much contiguous memory. + */ +int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, + struct mlx4_buf *buf, gfp_t gfp) +{ + if (size <= max_direct) { + return mlx4_buf_direct_alloc(dev, size, buf, gfp); } else { + dma_addr_t t; int i; - buf->direct.buf = NULL; - buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; - buf->npages = buf->nbufs; + buf->direct.buf = NULL; + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; + buf->npages = buf->nbufs; buf->page_shift = PAGE_SHIFT; buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), gfp); @@ -619,28 +626,12 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = - dma_alloc_coherent(&dev->persist->pdev->dev, - PAGE_SIZE, - &t, gfp); + dma_zalloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &t, gfp); if (!buf->page_list[i].buf) goto err_free; buf->page_list[i].map = t; - - memset(buf->page_list[i].buf, 0, PAGE_SIZE); - } - - if (BITS_PER_LONG == 64) { - struct page **pages; - pages = kmalloc(sizeof *pages * buf->nbufs, gfp); - if (!pages) - goto err_free; - for (i = 0; i < buf->nbufs; ++i) - pages[i] = virt_to_page(buf->page_list[i].buf); - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); - kfree(pages); - if (!buf->direct.buf) - goto err_free; } } @@ -655,15 +646,11 @@ EXPORT_SYMBOL_GPL(mlx4_buf_alloc); void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) { - int i; - - if (buf->nbufs == 1) + if (buf->nbufs == 1) { dma_free_coherent(&dev->persist->pdev->dev, size, - buf->direct.buf, - buf->direct.map); - else { - if (BITS_PER_LONG == 64) - vunmap(buf->direct.buf); + buf->direct.buf, buf->direct.map); + } else { + int i; for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) @@ -789,7 +776,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db) EXPORT_SYMBOL_GPL(mlx4_db_free); int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, - int size, int max_direct) + int size) { int err; @@ -799,7 +786,7 @@ int mlx4_alloc_hwq_res(struct mlx4_dev *dev, struct mlx4_hwq_resources *wqres, *wqres->db.db = 0; - err = mlx4_buf_alloc(dev, size, max_direct, &wqres->buf, GFP_KERNEL); + err = mlx4_buf_direct_alloc(dev, size, &wqres->buf, GFP_KERNEL); if (err) goto err_db; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index af975a2b74c6..132cea655920 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -73,22 +73,16 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, */ set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, - cq->buf_size, 2 * PAGE_SIZE); + cq->buf_size); set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_cq; - err = mlx4_en_map_buffer(&cq->wqres.buf); - if (err) - goto err_res; - cq->buf = (struct mlx4_cqe *)cq->wqres.buf.direct.buf; *pcq = cq; return 0; -err_res: - mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); err_cq: kfree(cq); *pcq = NULL; @@ -177,7 +171,6 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq) struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq = *pcq; - mlx4_en_unmap_buffer(&cq->wqres.buf); mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size); if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) && cq->is_tx == RX) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index b4b258c8ca47..92e0624f4cf0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1856,6 +1856,7 @@ static void mlx4_en_restart(struct work_struct *work) en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); + rtnl_lock(); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_stop_port(dev, 1); @@ -1863,6 +1864,7 @@ static void mlx4_en_restart(struct work_struct *work) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); + rtnl_unlock(); } static void mlx4_en_clear_stats(struct net_device *dev) @@ -2355,8 +2357,12 @@ out: } /* set offloads */ - priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM | - NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL; + priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; } static void mlx4_en_del_vxlan_offloads(struct work_struct *work) @@ -2365,8 +2371,12 @@ static void mlx4_en_del_vxlan_offloads(struct work_struct *work) struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, vxlan_del_task); /* unset offloads */ - priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM | - NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL); + priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | + NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL); ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port, VXLAN_STEER_BY_OUTER_MAC, 0); @@ -2425,7 +2435,18 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, netdev_features_t features) { features = vlan_features_check(skb, features); - return vxlan_features_check(skb, features); + features = vxlan_features_check(skb, features); + + /* The ConnectX-3 doesn't support outer IPv6 checksums but it does + * support inner IPv6 checksums and segmentation so we need to + * strip that feature if this is an IPv6 encapsulated frame. + */ + if (skb->encapsulation && + (skb->ip_summed == CHECKSUM_PARTIAL) && + (ip_hdr(skb)->version != 4)) + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + + return features; } #endif @@ -2907,7 +2928,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, - MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); + MLX4_EN_PAGE_SIZE); if (err) { en_err(priv, "Failed to allocate page for rx qps\n"); goto out; @@ -2990,8 +3011,13 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) { - dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - dev->features |= NETIF_F_GSO_UDP_TUNNEL; + dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } mdev->pndev[port] = dev; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c index 02e925d6f734..a6b0db0e0383 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c @@ -107,37 +107,6 @@ int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, return ret; } -int mlx4_en_map_buffer(struct mlx4_buf *buf) -{ - struct page **pages; - int i; - - if (BITS_PER_LONG == 64 || buf->nbufs == 1) - return 0; - - pages = kmalloc(sizeof *pages * buf->nbufs, GFP_KERNEL); - if (!pages) - return -ENOMEM; - - for (i = 0; i < buf->nbufs; ++i) - pages[i] = virt_to_page(buf->page_list[i].buf); - - buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, PAGE_KERNEL); - kfree(pages); - if (!buf->direct.buf) - return -ENOMEM; - - return 0; -} - -void mlx4_en_unmap_buffer(struct mlx4_buf *buf) -{ - if (BITS_PER_LONG == 64 || buf->nbufs == 1) - return; - - vunmap(buf->direct.buf); -} - void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event) { return; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index ca3a38421ee7..c1b3a9c8cf3b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -394,17 +394,11 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, /* Allocate HW buffers on provided NUMA node */ set_dev_node(&mdev->dev->persist->pdev->dev, node); - err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, - ring->buf_size, 2 * PAGE_SIZE); + err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_info; - err = mlx4_en_map_buffer(&ring->wqres.buf); - if (err) { - en_err(priv, "Failed to map RX buffer\n"); - goto err_hwq; - } ring->buf = ring->wqres.buf.direct.buf; ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter; @@ -412,8 +406,6 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, *pring = ring; return 0; -err_hwq: - mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_info: vfree(ring->rx_info); ring->rx_info = NULL; @@ -517,7 +509,6 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring = *pring; - mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); vfree(ring->rx_info); ring->rx_info = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index a386f047c1af..f6e61570cb2c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -41,6 +41,7 @@ #include <linux/vmalloc.h> #include <linux/tcp.h> #include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/moduleparam.h> #include "mlx4_en.h" @@ -93,20 +94,13 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, /* Allocate HW buffers on provided NUMA node */ set_dev_node(&mdev->dev->persist->pdev->dev, node); - err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, - 2 * PAGE_SIZE); + err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; } - err = mlx4_en_map_buffer(&ring->wqres.buf); - if (err) { - en_err(priv, "Failed to map TX buffer\n"); - goto err_hwq_res; - } - ring->buf = ring->wqres.buf.direct.buf; en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n", @@ -117,7 +111,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, MLX4_RESERVE_ETH_BF_QP); if (err) { en_err(priv, "failed reserving qp for TX ring\n"); - goto err_map; + goto err_hwq_res; } err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL); @@ -154,8 +148,6 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, err_reserve: mlx4_qp_release_range(mdev->dev, ring->qpn, 1); -err_map: - mlx4_en_unmap_buffer(&ring->wqres.buf); err_hwq_res: mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); err_bounce: @@ -182,7 +174,6 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, mlx4_qp_remove(mdev->dev, &ring->qp); mlx4_qp_free(mdev->dev, &ring->qp); mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1); - mlx4_en_unmap_buffer(&ring->wqres.buf); mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); kfree(ring->bounce_buf); ring->bounce_buf = NULL; @@ -920,8 +911,18 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) tx_ind, fragptr); if (skb->encapsulation) { - struct iphdr *ipv4 = (struct iphdr *)skb_inner_network_header(skb); - if (ipv4->protocol == IPPROTO_TCP || ipv4->protocol == IPPROTO_UDP) + union { + struct iphdr *v4; + struct ipv6hdr *v6; + unsigned char *hdr; + } ip; + u8 proto; + + ip.hdr = skb_inner_network_header(skb); + proto = (ip.v4->version == 4) ? ip.v4->protocol : + ip.v6->nexthdr; + + if (proto == IPPROTO_TCP || proto == IPPROTO_UDP) op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP | MLX4_WQE_CTRL_ILP); else op_own |= cpu_to_be32(MLX4_WQE_CTRL_IIP); diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index 6aa73972d478..f2d0920018a5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1102,7 +1102,7 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], struct mlx4_cmd_mailbox *mailbox; struct mlx4_mgm *mgm; u32 members_count; - int index, prev; + int index = -1, prev; int link = 0; int i; int err; @@ -1181,7 +1181,7 @@ int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], goto out; out: - if (prot == MLX4_PROT_ETH) { + if (prot == MLX4_PROT_ETH && index != -1) { /* manage the steering entry for promisc mode */ if (new_entry) err = new_steering_entry(dev, port, steer, diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 63b1aeae2c03..cc84e09f324a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -672,8 +672,6 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, struct mlx4_qp_context *context); void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event); -int mlx4_en_map_buffer(struct mlx4_buf *buf); -void mlx4_en_unmap_buffer(struct mlx4_buf *buf); int mlx4_en_change_mcast_lb(struct mlx4_en_priv *priv, struct mlx4_qp *qp, int loopback); void mlx4_en_calc_rx_buf(struct net_device *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index f5c3b9465d8d..1cf722eba607 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -31,10 +31,3 @@ config MLX5_CORE_EN_DCB This flag is depended on the kernel's DCB support. If unsure, set to Y - -config MLX5_CORE_EN_VXLAN - bool "VXLAN offloads Support" - default y - depends on MLX5_CORE_EN && VXLAN && !(MLX5_CORE=y && VXLAN=m) - ---help--- - Say Y here if you want to use VXLAN offloads in the driver. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index bf65b71c7360..9ea7b583096a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -2,11 +2,10 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o + mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o fs_counters.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_txrx.o en_clock.o en_tc.o + en_txrx.o en_clock.o vxlan.o en_tc.o en_arfs.o -mlx5_core-$(CONFIG_MLX5_CORE_EN_VXLAN) += vxlan.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index eb926e1ee71c..dcd2df6518de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -294,6 +294,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DESTROY_FLOW_TABLE: case MLX5_CMD_OP_DESTROY_FLOW_GROUP: case MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_DEALLOC_FLOW_COUNTER: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -395,6 +396,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_GROUP: case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: + case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: + case MLX5_CMD_OP_QUERY_FLOW_COUNTER: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -406,178 +409,142 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, const char *mlx5_command_str(int command) { - switch (command) { - case MLX5_CMD_OP_QUERY_HCA_VPORT_CONTEXT: - return "QUERY_HCA_VPORT_CONTEXT"; - - case MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT: - return "MODIFY_HCA_VPORT_CONTEXT"; - - case MLX5_CMD_OP_QUERY_HCA_CAP: - return "QUERY_HCA_CAP"; - - case MLX5_CMD_OP_SET_HCA_CAP: - return "SET_HCA_CAP"; - - case MLX5_CMD_OP_QUERY_ADAPTER: - return "QUERY_ADAPTER"; - - case MLX5_CMD_OP_INIT_HCA: - return "INIT_HCA"; - - case MLX5_CMD_OP_TEARDOWN_HCA: - return "TEARDOWN_HCA"; - - case MLX5_CMD_OP_ENABLE_HCA: - return "MLX5_CMD_OP_ENABLE_HCA"; - - case MLX5_CMD_OP_DISABLE_HCA: - return "MLX5_CMD_OP_DISABLE_HCA"; - - case MLX5_CMD_OP_QUERY_PAGES: - return "QUERY_PAGES"; - - case MLX5_CMD_OP_MANAGE_PAGES: - return "MANAGE_PAGES"; - - case MLX5_CMD_OP_CREATE_MKEY: - return "CREATE_MKEY"; - - case MLX5_CMD_OP_QUERY_MKEY: - return "QUERY_MKEY"; - - case MLX5_CMD_OP_DESTROY_MKEY: - return "DESTROY_MKEY"; - - case MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS: - return "QUERY_SPECIAL_CONTEXTS"; - - case MLX5_CMD_OP_CREATE_EQ: - return "CREATE_EQ"; - - case MLX5_CMD_OP_DESTROY_EQ: - return "DESTROY_EQ"; - - case MLX5_CMD_OP_QUERY_EQ: - return "QUERY_EQ"; - - case MLX5_CMD_OP_CREATE_CQ: - return "CREATE_CQ"; - - case MLX5_CMD_OP_DESTROY_CQ: - return "DESTROY_CQ"; - - case MLX5_CMD_OP_QUERY_CQ: - return "QUERY_CQ"; - - case MLX5_CMD_OP_MODIFY_CQ: - return "MODIFY_CQ"; - - case MLX5_CMD_OP_CREATE_QP: - return "CREATE_QP"; - - case MLX5_CMD_OP_DESTROY_QP: - return "DESTROY_QP"; - - case MLX5_CMD_OP_RST2INIT_QP: - return "RST2INIT_QP"; - - case MLX5_CMD_OP_INIT2RTR_QP: - return "INIT2RTR_QP"; - - case MLX5_CMD_OP_RTR2RTS_QP: - return "RTR2RTS_QP"; - - case MLX5_CMD_OP_RTS2RTS_QP: - return "RTS2RTS_QP"; - - case MLX5_CMD_OP_SQERR2RTS_QP: - return "SQERR2RTS_QP"; - - case MLX5_CMD_OP_2ERR_QP: - return "2ERR_QP"; - - case MLX5_CMD_OP_2RST_QP: - return "2RST_QP"; - - case MLX5_CMD_OP_QUERY_QP: - return "QUERY_QP"; - - case MLX5_CMD_OP_MAD_IFC: - return "MAD_IFC"; - - case MLX5_CMD_OP_INIT2INIT_QP: - return "INIT2INIT_QP"; - - case MLX5_CMD_OP_CREATE_PSV: - return "CREATE_PSV"; - - case MLX5_CMD_OP_DESTROY_PSV: - return "DESTROY_PSV"; - - case MLX5_CMD_OP_CREATE_SRQ: - return "CREATE_SRQ"; - - case MLX5_CMD_OP_DESTROY_SRQ: - return "DESTROY_SRQ"; - - case MLX5_CMD_OP_QUERY_SRQ: - return "QUERY_SRQ"; - - case MLX5_CMD_OP_ARM_RQ: - return "ARM_RQ"; - - case MLX5_CMD_OP_CREATE_XRC_SRQ: - return "CREATE_XRC_SRQ"; - - case MLX5_CMD_OP_DESTROY_XRC_SRQ: - return "DESTROY_XRC_SRQ"; - - case MLX5_CMD_OP_QUERY_XRC_SRQ: - return "QUERY_XRC_SRQ"; - - case MLX5_CMD_OP_ARM_XRC_SRQ: - return "ARM_XRC_SRQ"; - - case MLX5_CMD_OP_ALLOC_PD: - return "ALLOC_PD"; - - case MLX5_CMD_OP_DEALLOC_PD: - return "DEALLOC_PD"; - - case MLX5_CMD_OP_ALLOC_UAR: - return "ALLOC_UAR"; - - case MLX5_CMD_OP_DEALLOC_UAR: - return "DEALLOC_UAR"; - - case MLX5_CMD_OP_ATTACH_TO_MCG: - return "ATTACH_TO_MCG"; - - case MLX5_CMD_OP_DETTACH_FROM_MCG: - return "DETTACH_FROM_MCG"; - - case MLX5_CMD_OP_ALLOC_XRCD: - return "ALLOC_XRCD"; - - case MLX5_CMD_OP_DEALLOC_XRCD: - return "DEALLOC_XRCD"; - - case MLX5_CMD_OP_ACCESS_REG: - return "MLX5_CMD_OP_ACCESS_REG"; - - case MLX5_CMD_OP_SET_WOL_ROL: - return "SET_WOL_ROL"; - - case MLX5_CMD_OP_QUERY_WOL_ROL: - return "QUERY_WOL_ROL"; - - case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: - return "ADD_VXLAN_UDP_DPORT"; - - case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: - return "DELETE_VXLAN_UDP_DPORT"; +#define MLX5_COMMAND_STR_CASE(__cmd) case MLX5_CMD_OP_ ## __cmd: return #__cmd + switch (command) { + MLX5_COMMAND_STR_CASE(QUERY_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ADAPTER); + MLX5_COMMAND_STR_CASE(INIT_HCA); + MLX5_COMMAND_STR_CASE(TEARDOWN_HCA); + MLX5_COMMAND_STR_CASE(ENABLE_HCA); + MLX5_COMMAND_STR_CASE(DISABLE_HCA); + MLX5_COMMAND_STR_CASE(QUERY_PAGES); + MLX5_COMMAND_STR_CASE(MANAGE_PAGES); + MLX5_COMMAND_STR_CASE(SET_HCA_CAP); + MLX5_COMMAND_STR_CASE(QUERY_ISSI); + MLX5_COMMAND_STR_CASE(SET_ISSI); + MLX5_COMMAND_STR_CASE(CREATE_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_MKEY); + MLX5_COMMAND_STR_CASE(DESTROY_MKEY); + MLX5_COMMAND_STR_CASE(QUERY_SPECIAL_CONTEXTS); + MLX5_COMMAND_STR_CASE(PAGE_FAULT_RESUME); + MLX5_COMMAND_STR_CASE(CREATE_EQ); + MLX5_COMMAND_STR_CASE(DESTROY_EQ); + MLX5_COMMAND_STR_CASE(QUERY_EQ); + MLX5_COMMAND_STR_CASE(GEN_EQE); + MLX5_COMMAND_STR_CASE(CREATE_CQ); + MLX5_COMMAND_STR_CASE(DESTROY_CQ); + MLX5_COMMAND_STR_CASE(QUERY_CQ); + MLX5_COMMAND_STR_CASE(MODIFY_CQ); + MLX5_COMMAND_STR_CASE(CREATE_QP); + MLX5_COMMAND_STR_CASE(DESTROY_QP); + MLX5_COMMAND_STR_CASE(RST2INIT_QP); + MLX5_COMMAND_STR_CASE(INIT2RTR_QP); + MLX5_COMMAND_STR_CASE(RTR2RTS_QP); + MLX5_COMMAND_STR_CASE(RTS2RTS_QP); + MLX5_COMMAND_STR_CASE(SQERR2RTS_QP); + MLX5_COMMAND_STR_CASE(2ERR_QP); + MLX5_COMMAND_STR_CASE(2RST_QP); + MLX5_COMMAND_STR_CASE(QUERY_QP); + MLX5_COMMAND_STR_CASE(SQD_RTS_QP); + MLX5_COMMAND_STR_CASE(INIT2INIT_QP); + MLX5_COMMAND_STR_CASE(CREATE_PSV); + MLX5_COMMAND_STR_CASE(DESTROY_PSV); + MLX5_COMMAND_STR_CASE(CREATE_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_SRQ); + MLX5_COMMAND_STR_CASE(ARM_RQ); + MLX5_COMMAND_STR_CASE(CREATE_XRC_SRQ); + MLX5_COMMAND_STR_CASE(DESTROY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(QUERY_XRC_SRQ); + MLX5_COMMAND_STR_CASE(ARM_XRC_SRQ); + MLX5_COMMAND_STR_CASE(CREATE_DCT); + MLX5_COMMAND_STR_CASE(DESTROY_DCT); + MLX5_COMMAND_STR_CASE(DRAIN_DCT); + MLX5_COMMAND_STR_CASE(QUERY_DCT); + MLX5_COMMAND_STR_CASE(ARM_DCT_FOR_KEY_VIOLATION); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(MODIFY_VPORT_STATE); + MLX5_COMMAND_STR_CASE(QUERY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_ESW_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_NIC_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(SET_ROCE_ADDRESS); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(MODIFY_HCA_VPORT_CONTEXT); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_GID); + MLX5_COMMAND_STR_CASE(QUERY_HCA_VPORT_PKEY); + MLX5_COMMAND_STR_CASE(QUERY_VPORT_COUNTER); + MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER); + MLX5_COMMAND_STR_CASE(ALLOC_PD); + MLX5_COMMAND_STR_CASE(DEALLOC_PD); + MLX5_COMMAND_STR_CASE(ALLOC_UAR); + MLX5_COMMAND_STR_CASE(DEALLOC_UAR); + MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION); + MLX5_COMMAND_STR_CASE(ACCESS_REG); + MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG); + MLX5_COMMAND_STR_CASE(DETTACH_FROM_MCG); + MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG); + MLX5_COMMAND_STR_CASE(MAD_IFC); + MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(SET_MAD_DEMUX); + MLX5_COMMAND_STR_CASE(NOP); + MLX5_COMMAND_STR_CASE(ALLOC_XRCD); + MLX5_COMMAND_STR_CASE(DEALLOC_XRCD); + MLX5_COMMAND_STR_CASE(ALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(DEALLOC_TRANSPORT_DOMAIN); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_STATUS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(MODIFY_CONG_PARAMS); + MLX5_COMMAND_STR_CASE(QUERY_CONG_STATISTICS); + MLX5_COMMAND_STR_CASE(ADD_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(DELETE_VXLAN_UDP_DPORT); + MLX5_COMMAND_STR_CASE(SET_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(SET_WOL_ROL); + MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL); + MLX5_COMMAND_STR_CASE(CREATE_TIR); + MLX5_COMMAND_STR_CASE(MODIFY_TIR); + MLX5_COMMAND_STR_CASE(DESTROY_TIR); + MLX5_COMMAND_STR_CASE(QUERY_TIR); + MLX5_COMMAND_STR_CASE(CREATE_SQ); + MLX5_COMMAND_STR_CASE(MODIFY_SQ); + MLX5_COMMAND_STR_CASE(DESTROY_SQ); + MLX5_COMMAND_STR_CASE(QUERY_SQ); + MLX5_COMMAND_STR_CASE(CREATE_RQ); + MLX5_COMMAND_STR_CASE(MODIFY_RQ); + MLX5_COMMAND_STR_CASE(DESTROY_RQ); + MLX5_COMMAND_STR_CASE(QUERY_RQ); + MLX5_COMMAND_STR_CASE(CREATE_RMP); + MLX5_COMMAND_STR_CASE(MODIFY_RMP); + MLX5_COMMAND_STR_CASE(DESTROY_RMP); + MLX5_COMMAND_STR_CASE(QUERY_RMP); + MLX5_COMMAND_STR_CASE(CREATE_TIS); + MLX5_COMMAND_STR_CASE(MODIFY_TIS); + MLX5_COMMAND_STR_CASE(DESTROY_TIS); + MLX5_COMMAND_STR_CASE(QUERY_TIS); + MLX5_COMMAND_STR_CASE(CREATE_RQT); + MLX5_COMMAND_STR_CASE(MODIFY_RQT); + MLX5_COMMAND_STR_CASE(DESTROY_RQT); + MLX5_COMMAND_STR_CASE(QUERY_RQT); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ROOT); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(CREATE_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(DESTROY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_GROUP); + MLX5_COMMAND_STR_CASE(SET_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(DELETE_FLOW_TABLE_ENTRY); + MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); default: return "unknown command opcode"; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 24344aafbd36..e8a6c3325b39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -46,6 +46,9 @@ #include <linux/rhashtable.h> #include "wq.h" #include "mlx5_core.h" +#include "en_stats.h" + +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) #define MLX5E_MAX_NUM_TC 8 @@ -57,12 +60,30 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd +#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 +#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 + +#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_LOG_WQE_SZ 17 +#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ + MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) +#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \ + MLX5_MPWRQ_WQE_PAGE_ORDER) +#define MLX5_CHANNEL_MAX_NUM_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8) * \ + BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)) +#define MLX5_UMR_ALIGN (2048) +#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) + #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80 +#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) @@ -73,233 +94,70 @@ #define MLX5E_NUM_MAIN_GROUPS 9 +static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW, + wq_size / 2); + default: + return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES, + wq_size / 2); + } +} + +static inline int mlx5_min_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + } +} + +static inline int mlx5_max_log_rq_size(int wq_type) +{ + switch (wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW; + default: + return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + } +} + +struct mlx5e_tx_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_eth_seg eth; +}; + +struct mlx5e_rx_wqe { + struct mlx5_wqe_srq_next_seg next; + struct mlx5_wqe_data_seg data; +}; + +struct mlx5e_umr_wqe { + struct mlx5_wqe_ctrl_seg ctrl; + struct mlx5_wqe_umr_ctrl_seg uctrl; + struct mlx5_mkey_seg mkc; + struct mlx5_wqe_data_seg data; +}; + #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ #define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ #endif -static const char vport_strings[][ETH_GSTRING_LEN] = { - /* vport statistics */ - "rx_packets", - "rx_bytes", - "tx_packets", - "tx_bytes", - "rx_error_packets", - "rx_error_bytes", - "tx_error_packets", - "tx_error_bytes", - "rx_unicast_packets", - "rx_unicast_bytes", - "tx_unicast_packets", - "tx_unicast_bytes", - "rx_multicast_packets", - "rx_multicast_bytes", - "tx_multicast_packets", - "tx_multicast_bytes", - "rx_broadcast_packets", - "rx_broadcast_bytes", - "tx_broadcast_packets", - "tx_broadcast_bytes", - - /* SW counters */ - "tso_packets", - "tso_bytes", - "tso_inner_packets", - "tso_inner_bytes", - "lro_packets", - "lro_bytes", - "rx_csum_good", - "rx_csum_none", - "rx_csum_sw", - "tx_csum_offload", - "tx_csum_inner", - "tx_queue_stopped", - "tx_queue_wake", - "tx_queue_dropped", - "rx_wqe_err", -}; - -struct mlx5e_vport_stats { - /* HW counters */ - u64 rx_packets; - u64 rx_bytes; - u64 tx_packets; - u64 tx_bytes; - u64 rx_error_packets; - u64 rx_error_bytes; - u64 tx_error_packets; - u64 tx_error_bytes; - u64 rx_unicast_packets; - u64 rx_unicast_bytes; - u64 tx_unicast_packets; - u64 tx_unicast_bytes; - u64 rx_multicast_packets; - u64 rx_multicast_bytes; - u64 tx_multicast_packets; - u64 tx_multicast_bytes; - u64 rx_broadcast_packets; - u64 rx_broadcast_bytes; - u64 tx_broadcast_packets; - u64 tx_broadcast_bytes; - - /* SW counters */ - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 lro_packets; - u64 lro_bytes; - u64 rx_csum_good; - u64 rx_csum_none; - u64 rx_csum_sw; - u64 tx_csum_offload; - u64 tx_csum_inner; - u64 tx_queue_stopped; - u64 tx_queue_wake; - u64 tx_queue_dropped; - u64 rx_wqe_err; - -#define NUM_VPORT_COUNTERS 35 -}; - -static const char pport_strings[][ETH_GSTRING_LEN] = { - /* IEEE802.3 counters */ - "frames_tx", - "frames_rx", - "check_seq_err", - "alignment_err", - "octets_tx", - "octets_received", - "multicast_xmitted", - "broadcast_xmitted", - "multicast_rx", - "broadcast_rx", - "in_range_len_errors", - "out_of_range_len", - "too_long_errors", - "symbol_err", - "mac_control_tx", - "mac_control_rx", - "unsupported_op_rx", - "pause_ctrl_rx", - "pause_ctrl_tx", - - /* RFC2863 counters */ - "in_octets", - "in_ucast_pkts", - "in_discards", - "in_errors", - "in_unknown_protos", - "out_octets", - "out_ucast_pkts", - "out_discards", - "out_errors", - "in_multicast_pkts", - "in_broadcast_pkts", - "out_multicast_pkts", - "out_broadcast_pkts", - - /* RFC2819 counters */ - "drop_events", - "octets", - "pkts", - "broadcast_pkts", - "multicast_pkts", - "crc_align_errors", - "undersize_pkts", - "oversize_pkts", - "fragments", - "jabbers", - "collisions", - "p64octets", - "p65to127octets", - "p128to255octets", - "p256to511octets", - "p512to1023octets", - "p1024to1518octets", - "p1519to2047octets", - "p2048to4095octets", - "p4096to8191octets", - "p8192to10239octets", -}; - -#define NUM_IEEE_802_3_COUNTERS 19 -#define NUM_RFC_2863_COUNTERS 13 -#define NUM_RFC_2819_COUNTERS 21 -#define NUM_PPORT_COUNTERS (NUM_IEEE_802_3_COUNTERS + \ - NUM_RFC_2863_COUNTERS + \ - NUM_RFC_2819_COUNTERS) - -struct mlx5e_pport_stats { - __be64 IEEE_802_3_counters[NUM_IEEE_802_3_COUNTERS]; - __be64 RFC_2863_counters[NUM_RFC_2863_COUNTERS]; - __be64 RFC_2819_counters[NUM_RFC_2819_COUNTERS]; -}; - -static const char rq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", - "bytes", - "csum_none", - "csum_sw", - "lro_packets", - "lro_bytes", - "wqe_err" -}; - -struct mlx5e_rq_stats { - u64 packets; - u64 bytes; - u64 csum_none; - u64 csum_sw; - u64 lro_packets; - u64 lro_bytes; - u64 wqe_err; -#define NUM_RQ_STATS 7 -}; - -static const char sq_stats_strings[][ETH_GSTRING_LEN] = { - "packets", - "bytes", - "tso_packets", - "tso_bytes", - "tso_inner_packets", - "tso_inner_bytes", - "csum_offload_inner", - "nop", - "csum_offload_none", - "stopped", - "wake", - "dropped", -}; - -struct mlx5e_sq_stats { - /* commonly accessed in data path */ - u64 packets; - u64 bytes; - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 csum_offload_inner; - u64 nop; - /* less likely accessed in data path */ - u64 csum_offload_none; - u64 stopped; - u64 wake; - u64 dropped; -#define NUM_SQ_STATS 12 -}; - -struct mlx5e_stats { - struct mlx5e_vport_stats vport; - struct mlx5e_pport_stats pport; -}; - struct mlx5e_params { u8 log_sq_size; + u8 rq_wq_type; + u8 mpwqe_log_stride_sz; + u8 mpwqe_log_num_strides; u8 log_rq_size; u16 num_channels; u8 num_tc; + bool rx_cqe_compress_admin; + bool rx_cqe_compress; u16 rx_cq_moderation_usec; u16 rx_cq_moderation_pkts; u16 tx_cq_moderation_usec; @@ -311,6 +169,7 @@ struct mlx5e_params { u8 rss_hfunc; u8 toeplitz_hash_key[40]; u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE]; + bool vlan_strip_disable; #ifdef CONFIG_MLX5_CORE_EN_DCB struct ieee_ets ets; #endif @@ -331,6 +190,7 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, + MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, }; struct mlx5e_cq { @@ -343,32 +203,88 @@ struct mlx5e_cq { struct mlx5e_channel *channel; struct mlx5e_priv *priv; + /* cqe decompression */ + struct mlx5_cqe64 title; + struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE]; + u8 mini_arr_idx; + u16 decmprs_left; + u16 decmprs_wqe_counter; + /* control */ struct mlx5_wq_ctrl wq_ctrl; } ____cacheline_aligned_in_smp; +struct mlx5e_rq; +typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe); +typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, + u16 ix); + +struct mlx5e_dma_info { + struct page *page; + dma_addr_t addr; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; u32 wqe_sz; struct sk_buff **skb; + struct mlx5e_mpw_info *wqe_info; + __be32 mkey_be; + __be32 umr_mkey_be; struct device *pdev; struct net_device *netdev; struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; + mlx5e_fp_handle_rx_cqe handle_rx_cqe; + mlx5e_fp_alloc_wqe alloc_wqe; unsigned long state; int ix; /* control */ struct mlx5_wq_ctrl wq_ctrl; + u8 wq_type; + u32 mpwqe_stride_sz; + u32 mpwqe_num_strides; u32 rqn; struct mlx5e_channel *channel; struct mlx5e_priv *priv; } ____cacheline_aligned_in_smp; +struct mlx5e_umr_dma_info { + __be64 *mtt; + __be64 *mtt_no_align; + dma_addr_t mtt_addr; + struct mlx5e_dma_info *dma_info; +}; + +struct mlx5e_mpw_info { + union { + struct mlx5e_dma_info dma_info; + struct mlx5e_umr_dma_info umr; + }; + u16 consumed_strides; + u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; + + void (*dma_pre_sync)(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len); + void (*add_skb_frag)(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, u32 len); + void (*copy_skb_header)(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen); + void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); +}; + struct mlx5e_tx_wqe_info { u32 num_bytes; u8 num_wqebbs; @@ -391,6 +307,11 @@ enum { MLX5E_SQ_STATE_BF_ENABLE, }; +struct mlx5e_ico_wqe_info { + u8 opcode; + u8 num_wqebbs; +}; + struct mlx5e_sq { /* data path */ @@ -432,6 +353,7 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; + struct mlx5e_ico_wqe_info *ico_wqe_info; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) @@ -448,6 +370,7 @@ struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; + struct mlx5e_sq icosq; /* internal control operations */ struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -474,42 +397,42 @@ enum mlx5e_traffic_types { MLX5E_TT_IPV6, MLX5E_TT_ANY, MLX5E_NUM_TT, + MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, }; -#define IS_HASHING_TT(tt) (tt != MLX5E_TT_ANY) +enum { + MLX5E_STATE_ASYNC_EVENTS_ENABLE, + MLX5E_STATE_OPENED, + MLX5E_STATE_DESTROYING, +}; -enum mlx5e_rqt_ix { - MLX5E_INDIRECTION_RQT, - MLX5E_SINGLE_RQ_RQT, - MLX5E_NUM_RQT, +struct mlx5e_vxlan_db { + spinlock_t lock; /* protect vxlan table */ + struct radix_tree_root tree; }; -struct mlx5e_eth_addr_info { +struct mlx5e_l2_rule { u8 addr[ETH_ALEN + 2]; - u32 tt_vec; - struct mlx5_flow_rule *ft_rule[MLX5E_NUM_TT]; + struct mlx5_flow_rule *rule; }; -#define MLX5E_ETH_ADDR_HASH_SIZE (1 << BITS_PER_BYTE) - -struct mlx5e_eth_addr_db { - struct hlist_head netdev_uc[MLX5E_ETH_ADDR_HASH_SIZE]; - struct hlist_head netdev_mc[MLX5E_ETH_ADDR_HASH_SIZE]; - struct mlx5e_eth_addr_info broadcast; - struct mlx5e_eth_addr_info allmulti; - struct mlx5e_eth_addr_info promisc; - bool broadcast_enabled; - bool allmulti_enabled; - bool promisc_enabled; +struct mlx5e_flow_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; }; -enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLE, - MLX5E_STATE_OPENED, - MLX5E_STATE_DESTROYING, +#define MLX5E_L2_ADDR_HASH_SIZE BIT(BITS_PER_BYTE) + +struct mlx5e_tc_table { + struct mlx5_flow_table *t; + + struct rhashtable_params ht_params; + struct rhashtable ht; }; -struct mlx5e_vlan_db { +struct mlx5e_vlan_table { + struct mlx5e_flow_table ft; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; struct mlx5_flow_rule *active_vlans_rule[VLAN_N_VID]; struct mlx5_flow_rule *untagged_rule; @@ -517,29 +440,74 @@ struct mlx5e_vlan_db { bool filter_disabled; }; -struct mlx5e_vxlan_db { - spinlock_t lock; /* protect vxlan table */ - struct radix_tree_root tree; +struct mlx5e_l2_table { + struct mlx5e_flow_table ft; + struct hlist_head netdev_uc[MLX5E_L2_ADDR_HASH_SIZE]; + struct hlist_head netdev_mc[MLX5E_L2_ADDR_HASH_SIZE]; + struct mlx5e_l2_rule broadcast; + struct mlx5e_l2_rule allmulti; + struct mlx5e_l2_rule promisc; + bool broadcast_enabled; + bool allmulti_enabled; + bool promisc_enabled; }; -struct mlx5e_flow_table { - int num_groups; - struct mlx5_flow_table *t; - struct mlx5_flow_group **g; +/* L3/L4 traffic type classifier */ +struct mlx5e_ttc_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_rule *rules[MLX5E_NUM_TT]; }; -struct mlx5e_tc_flow_table { - struct mlx5_flow_table *t; +#define ARFS_HASH_SHIFT BITS_PER_BYTE +#define ARFS_HASH_SIZE BIT(BITS_PER_BYTE) +struct arfs_table { + struct mlx5e_flow_table ft; + struct mlx5_flow_rule *default_rule; + struct hlist_head rules_hash[ARFS_HASH_SIZE]; +}; - struct rhashtable_params ht_params; - struct rhashtable ht; +enum arfs_type { + ARFS_IPV4_TCP, + ARFS_IPV6_TCP, + ARFS_IPV4_UDP, + ARFS_IPV6_UDP, + ARFS_NUM_TYPES, +}; + +struct mlx5e_arfs_tables { + struct arfs_table arfs_tables[ARFS_NUM_TYPES]; + /* Protect aRFS rules list */ + spinlock_t arfs_lock; + struct list_head rules; + int last_filter_id; + struct workqueue_struct *wq; +}; + +/* NIC prio FTS */ +enum { + MLX5E_VLAN_FT_LEVEL = 0, + MLX5E_L2_FT_LEVEL, + MLX5E_TTC_FT_LEVEL, + MLX5E_ARFS_FT_LEVEL }; -struct mlx5e_flow_tables { - struct mlx5_flow_namespace *ns; - struct mlx5e_tc_flow_table tc; - struct mlx5e_flow_table vlan; - struct mlx5e_flow_table main; +struct mlx5e_flow_steering { + struct mlx5_flow_namespace *ns; + struct mlx5e_tc_table tc; + struct mlx5e_vlan_table vlan; + struct mlx5e_l2_table l2; + struct mlx5e_ttc_table ttc; + struct mlx5e_arfs_tables arfs; +}; + +struct mlx5e_direct_tir { + u32 tirn; + u32 rqtn; +}; + +enum { + MLX5E_TC_PRIO = 0, + MLX5E_NIC_PRIO }; struct mlx5e_priv { @@ -554,19 +522,17 @@ struct mlx5e_priv { u32 pdn; u32 tdn; struct mlx5_core_mkey mkey; + struct mlx5_core_mkey umr_mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; u32 tisn[MLX5E_MAX_NUM_TC]; - u32 rqtn[MLX5E_NUM_RQT]; - u32 tirn[MLX5E_NUM_TT]; + u32 indir_rqtn; + u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_direct_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; - struct mlx5e_flow_tables fts; - struct mlx5e_eth_addr_db eth_addr; - struct mlx5e_vlan_db vlan; -#ifdef CONFIG_MLX5_CORE_EN_VXLAN + struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; -#endif struct mlx5e_params params; struct workqueue_struct *wq; @@ -578,18 +544,7 @@ struct mlx5e_priv { struct net_device *netdev; struct mlx5e_stats stats; struct mlx5e_tstamp tstamp; -}; - -#define MLX5E_NET_IP_ALIGN 2 - -struct mlx5e_tx_wqe { - struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_eth_seg eth; -}; - -struct mlx5e_rx_wqe { - struct mlx5_wqe_srq_next_seg next; - struct mlx5_wqe_data_seg data; + u16 q_counter; }; enum mlx5e_link_mode { @@ -634,14 +589,35 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); +void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u16 byte_cnt, + struct mlx5e_mpw_info *wi, + struct sk_buff *skb); +void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u16 byte_cnt, + struct mlx5e_mpw_info *wi, + struct sk_buff *skb); +void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi); +void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_update_stats(struct mlx5e_priv *priv); -int mlx5e_create_flow_tables(struct mlx5e_priv *priv); -void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv); -void mlx5e_init_eth_addr(struct mlx5e_priv *priv); +int mlx5e_create_flow_steering(struct mlx5e_priv *priv); +void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); +void mlx5e_init_l2_addr(struct mlx5e_priv *priv); +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); void mlx5e_set_rx_mode_work(struct work_struct *work); void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp, @@ -650,6 +626,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv); void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); +void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); @@ -658,16 +635,20 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv); -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix); +int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd); + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix); void mlx5e_build_tir_ctx_hash(void *tirc, struct mlx5e_priv *priv); int mlx5e_open_locked(struct net_device *netdev); int mlx5e_close_locked(struct net_device *netdev); -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, + u32 *indirection_rqt, int len, int num_channels); +int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, - struct mlx5e_tx_wqe *wqe, int bf_sz) + struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) { u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; @@ -681,9 +662,9 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, */ wmb(); if (bf_sz) - __iowrite64_copy(sq->uar_map + ofst, &wqe->ctrl, bf_sz); + __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz); else - mlx5_write64((__be32 *)&wqe->ctrl, sq->uar_map + ofst, NULL); + mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL); /* flush the write-combining mapped buffer */ wmb(); @@ -704,12 +685,43 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) MLX5E_MAX_NUM_CHANNELS); } +static inline int mlx5e_get_mtt_octw(int npages) +{ + return ALIGN(npages, 8) / 2; +} + extern const struct ethtool_ops mlx5e_ethtool_ops; #ifdef CONFIG_MLX5_CORE_EN_DCB extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops; int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets); #endif +#ifndef CONFIG_RFS_ACCEL +static inline int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) {} + +static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) +{ + return -ENOTSUPP; +} + +static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) +{ + return -ENOTSUPP; +} +#else +int mlx5e_arfs_create_tables(struct mlx5e_priv *priv); +void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv); +int mlx5e_arfs_enable(struct mlx5e_priv *priv); +int mlx5e_arfs_disable(struct mlx5e_priv *priv); +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id); +#endif + u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c new file mode 100644 index 000000000000..3515e78ba68f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -0,0 +1,752 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifdef CONFIG_RFS_ACCEL + +#include <linux/hash.h> +#include <linux/mlx5/fs.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "en.h" + +struct arfs_tuple { + __be16 etype; + u8 ip_proto; + union { + __be32 src_ipv4; + struct in6_addr src_ipv6; + }; + union { + __be32 dst_ipv4; + struct in6_addr dst_ipv6; + }; + __be16 src_port; + __be16 dst_port; +}; + +struct arfs_rule { + struct mlx5e_priv *priv; + struct work_struct arfs_work; + struct mlx5_flow_rule *rule; + struct hlist_node hlist; + int rxq; + /* Flow ID passed to ndo_rx_flow_steer */ + int flow_id; + /* Filter ID returned by ndo_rx_flow_steer */ + int filter_id; + struct arfs_tuple tuple; +}; + +#define mlx5e_for_each_arfs_rule(hn, tmp, arfs_tables, i, j) \ + for (i = 0; i < ARFS_NUM_TYPES; i++) \ + mlx5e_for_each_hash_arfs_rule(hn, tmp, arfs_tables[i].rules_hash, j) + +#define mlx5e_for_each_hash_arfs_rule(hn, tmp, hash, j) \ + for (j = 0; j < ARFS_HASH_SIZE; j++) \ + hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist) + +static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) +{ + switch (type) { + case ARFS_IPV4_TCP: + return MLX5E_TT_IPV4_TCP; + case ARFS_IPV4_UDP: + return MLX5E_TT_IPV4_UDP; + case ARFS_IPV6_TCP: + return MLX5E_TT_IPV6_TCP; + case ARFS_IPV6_UDP: + return MLX5E_TT_IPV6_UDP; + default: + return -EINVAL; + } +} + +static int arfs_disable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + u32 *tirn = priv->indir_tirn; + int err = 0; + int tt; + int i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.tir_num = tirn[i]; + tt = arfs_get_tt(i); + /* Modify ttc rules destination to bypass the aRFS tables*/ + err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], + &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc destination failed\n", + __func__); + return err; + } + } + return 0; +} + +static void arfs_del_rules(struct mlx5e_priv *priv); + +int mlx5e_arfs_disable(struct mlx5e_priv *priv) +{ + arfs_del_rules(priv); + + return arfs_disable(priv); +} + +int mlx5e_arfs_enable(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + int err = 0; + int tt; + int i; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + for (i = 0; i < ARFS_NUM_TYPES; i++) { + dest.ft = priv->fs.arfs.arfs_tables[i].ft.t; + tt = arfs_get_tt(i); + /* Modify ttc rules destination to point on the aRFS FTs */ + err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], + &dest); + if (err) { + netdev_err(priv->netdev, + "%s: modify ttc destination failed err=%d\n", + __func__, err); + arfs_disable(priv); + return err; + } + } + return 0; +} + +static void arfs_destroy_table(struct arfs_table *arfs_t) +{ + mlx5_del_flow_rule(arfs_t->default_rule); + mlx5e_destroy_flow_table(&arfs_t->ft); +} + +void mlx5e_arfs_destroy_tables(struct mlx5e_priv *priv) +{ + int i; + + if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + return; + + arfs_del_rules(priv); + destroy_workqueue(priv->fs.arfs.wq); + for (i = 0; i < ARFS_NUM_TYPES; i++) { + if (!IS_ERR_OR_NULL(priv->fs.arfs.arfs_tables[i].ft.t)) + arfs_destroy_table(&priv->fs.arfs.arfs_tables[i]); + } +} + +static int arfs_add_default_rule(struct mlx5e_priv *priv, + enum arfs_type type) +{ + struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; + struct mlx5_flow_destination dest; + u8 match_criteria_enable = 0; + u32 *tirn = priv->indir_tirn; + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + switch (type) { + case ARFS_IPV4_TCP: + dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + break; + case ARFS_IPV4_UDP: + dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + break; + case ARFS_IPV6_TCP: + dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; + break; + case ARFS_IPV6_UDP: + dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; + break; + default: + err = -EINVAL; + goto out; + } + + arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, match_criteria_enable, + match_criteria, match_value, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest); + if (IS_ERR(arfs_t->default_rule)) { + err = PTR_ERR(arfs_t->default_rule); + arfs_t->default_rule = NULL; + netdev_err(priv->netdev, "%s: add rule failed, arfs type=%d\n", + __func__, type); + } +out: + kvfree(match_criteria); + kvfree(match_value); + return err; +} + +#define MLX5E_ARFS_NUM_GROUPS 2 +#define MLX5E_ARFS_GROUP1_SIZE BIT(12) +#define MLX5E_ARFS_GROUP2_SIZE BIT(0) +#define MLX5E_ARFS_TABLE_SIZE (MLX5E_ARFS_GROUP1_SIZE +\ + MLX5E_ARFS_GROUP2_SIZE) +static int arfs_create_groups(struct mlx5e_flow_table *ft, + enum arfs_type type) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + void *outer_headers_c; + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_ARFS_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + in = mlx5_vzalloc(inlen); + if (!in || !ft->g) { + kvfree(ft->g); + kvfree(in); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ethertype); + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV6_TCP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport); + break; + case ARFS_IPV4_UDP: + case ARFS_IPV6_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_sport); + break; + default: + err = -EINVAL; + goto out; + } + + switch (type) { + case ARFS_IPV4_TCP: + case ARFS_IPV4_UDP: + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + break; + case ARFS_IPV6_TCP: + case ARFS_IPV6_UDP: + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, 16); + memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, 16); + break; + default: + err = -EINVAL; + goto out; + } + + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5E_ARFS_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); + if (IS_ERR(ft->g[ft->num_groups])) + goto err; + ft->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; +out: + kvfree(in); + + return err; +} + +static int arfs_create_table(struct mlx5e_priv *priv, + enum arfs_type type) +{ + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct mlx5e_flow_table *ft = &arfs->arfs_tables[type].ft; + int err; + + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = arfs_create_groups(ft, type); + if (err) + goto err; + + err = arfs_add_default_rule(priv, type); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +int mlx5e_arfs_create_tables(struct mlx5e_priv *priv) +{ + int err = 0; + int i; + + if (!(priv->netdev->hw_features & NETIF_F_NTUPLE)) + return 0; + + spin_lock_init(&priv->fs.arfs.arfs_lock); + INIT_LIST_HEAD(&priv->fs.arfs.rules); + priv->fs.arfs.wq = create_singlethread_workqueue("mlx5e_arfs"); + if (!priv->fs.arfs.wq) + return -ENOMEM; + + for (i = 0; i < ARFS_NUM_TYPES; i++) { + err = arfs_create_table(priv, i); + if (err) + goto err; + } + return 0; +err: + mlx5e_arfs_destroy_tables(priv); + return err; +} + +#define MLX5E_ARFS_EXPIRY_QUOTA 60 + +static void arfs_may_expire_flow(struct mlx5e_priv *priv) +{ + struct arfs_rule *arfs_rule; + struct hlist_node *htmp; + int quota = 0; + int i; + int j; + + HLIST_HEAD(del_list); + spin_lock_bh(&priv->fs.arfs.arfs_lock); + mlx5e_for_each_arfs_rule(arfs_rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + if (quota++ > MLX5E_ARFS_EXPIRY_QUOTA) + break; + if (!work_pending(&arfs_rule->arfs_work) && + rps_may_expire_flow(priv->netdev, + arfs_rule->rxq, arfs_rule->flow_id, + arfs_rule->filter_id)) { + hlist_del_init(&arfs_rule->hlist); + hlist_add_head(&arfs_rule->hlist, &del_list); + } + } + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) { + if (arfs_rule->rule) + mlx5_del_flow_rule(arfs_rule->rule); + hlist_del(&arfs_rule->hlist); + kfree(arfs_rule); + } +} + +static void arfs_del_rules(struct mlx5e_priv *priv) +{ + struct hlist_node *htmp; + struct arfs_rule *rule; + int i; + int j; + + HLIST_HEAD(del_list); + spin_lock_bh(&priv->fs.arfs.arfs_lock); + mlx5e_for_each_arfs_rule(rule, htmp, priv->fs.arfs.arfs_tables, i, j) { + hlist_del_init(&rule->hlist); + hlist_add_head(&rule->hlist, &del_list); + } + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + + hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) { + cancel_work_sync(&rule->arfs_work); + if (rule->rule) + mlx5_del_flow_rule(rule->rule); + hlist_del(&rule->hlist); + kfree(rule); + } +} + +static struct hlist_head * +arfs_hash_bucket(struct arfs_table *arfs_t, __be16 src_port, + __be16 dst_port) +{ + unsigned long l; + int bucket_idx; + + l = (__force unsigned long)src_port | + ((__force unsigned long)dst_port << 2); + + bucket_idx = hash_long(l, ARFS_HASH_SHIFT); + + return &arfs_t->rules_hash[bucket_idx]; +} + +static u8 arfs_get_ip_proto(const struct sk_buff *skb) +{ + return (skb->protocol == htons(ETH_P_IP)) ? + ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr; +} + +static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs, + u8 ip_proto, __be16 etype) +{ + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV4_TCP]; + if (etype == htons(ETH_P_IP) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV4_UDP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_TCP) + return &arfs->arfs_tables[ARFS_IPV6_TCP]; + if (etype == htons(ETH_P_IPV6) && ip_proto == IPPROTO_UDP) + return &arfs->arfs_tables[ARFS_IPV6_UDP]; + + return NULL; +} + +static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv, + struct arfs_rule *arfs_rule) +{ + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct arfs_tuple *tuple = &arfs_rule->tuple; + struct mlx5_flow_rule *rule = NULL; + struct mlx5_flow_destination dest; + struct arfs_table *arfs_table; + u8 match_criteria_enable = 0; + struct mlx5_flow_table *ft; + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto out; + } + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.ethertype); + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, + ntohs(tuple->etype)); + arfs_table = arfs_get_table(arfs, tuple->ip_proto, tuple->etype); + if (!arfs_table) { + err = -EINVAL; + goto out; + } + + ft = arfs_table->ft.t; + if (tuple->ip_proto == IPPROTO_TCP) { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.tcp_dport); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.tcp_sport); + MLX5_SET(fte_match_param, match_value, outer_headers.tcp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, match_value, outer_headers.tcp_sport, + ntohs(tuple->src_port)); + } else { + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.udp_dport); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.udp_sport); + MLX5_SET(fte_match_param, match_value, outer_headers.udp_dport, + ntohs(tuple->dst_port)); + MLX5_SET(fte_match_param, match_value, outer_headers.udp_sport, + ntohs(tuple->src_port)); + } + if (tuple->etype == htons(ETH_P_IP)) { + memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4), + &tuple->src_ipv4, + 4); + memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4), + &tuple->dst_ipv4, + 4); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4); + } else { + memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + &tuple->src_ipv6, + 16); + memcpy(MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + &tuple->dst_ipv6, + 16); + memset(MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + memset(MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6), + 0xff, + 16); + } + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn; + rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, + match_value, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + &dest); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n", + __func__, arfs_rule->filter_id, arfs_rule->rxq, err); + } + +out: + kvfree(match_criteria); + kvfree(match_value); + return err ? ERR_PTR(err) : rule; +} + +static void arfs_modify_rule_rq(struct mlx5e_priv *priv, + struct mlx5_flow_rule *rule, u16 rxq) +{ + struct mlx5_flow_destination dst; + int err = 0; + + dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dst.tir_num = priv->direct_tir[rxq].tirn; + err = mlx5_modify_rule_destination(rule, &dst); + if (err) + netdev_warn(priv->netdev, + "Failed to modfiy aRFS rule destination to rq=%d\n", rxq); +} + +static void arfs_handle_work(struct work_struct *work) +{ + struct arfs_rule *arfs_rule = container_of(work, + struct arfs_rule, + arfs_work); + struct mlx5e_priv *priv = arfs_rule->priv; + struct mlx5_flow_rule *rule; + + mutex_lock(&priv->state_lock); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + spin_lock_bh(&priv->fs.arfs.arfs_lock); + hlist_del(&arfs_rule->hlist); + spin_unlock_bh(&priv->fs.arfs.arfs_lock); + + mutex_unlock(&priv->state_lock); + kfree(arfs_rule); + goto out; + } + mutex_unlock(&priv->state_lock); + + if (!arfs_rule->rule) { + rule = arfs_add_rule(priv, arfs_rule); + if (IS_ERR(rule)) + goto out; + arfs_rule->rule = rule; + } else { + arfs_modify_rule_rq(priv, arfs_rule->rule, + arfs_rule->rxq); + } +out: + arfs_may_expire_flow(priv); +} + +/* return L4 destination port from ip4/6 packets */ +static __be16 arfs_get_dst_port(const struct sk_buff *skb) +{ + char *transport_header; + + transport_header = skb_transport_header(skb); + if (arfs_get_ip_proto(skb) == IPPROTO_TCP) + return ((struct tcphdr *)transport_header)->dest; + return ((struct udphdr *)transport_header)->dest; +} + +/* return L4 source port from ip4/6 packets */ +static __be16 arfs_get_src_port(const struct sk_buff *skb) +{ + char *transport_header; + + transport_header = skb_transport_header(skb); + if (arfs_get_ip_proto(skb) == IPPROTO_TCP) + return ((struct tcphdr *)transport_header)->source; + return ((struct udphdr *)transport_header)->source; +} + +static struct arfs_rule *arfs_alloc_rule(struct mlx5e_priv *priv, + struct arfs_table *arfs_t, + const struct sk_buff *skb, + u16 rxq, u32 flow_id) +{ + struct arfs_rule *rule; + struct arfs_tuple *tuple; + + rule = kzalloc(sizeof(*rule), GFP_ATOMIC); + if (!rule) + return NULL; + + rule->priv = priv; + rule->rxq = rxq; + INIT_WORK(&rule->arfs_work, arfs_handle_work); + + tuple = &rule->tuple; + tuple->etype = skb->protocol; + if (tuple->etype == htons(ETH_P_IP)) { + tuple->src_ipv4 = ip_hdr(skb)->saddr; + tuple->dst_ipv4 = ip_hdr(skb)->daddr; + } else { + memcpy(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + memcpy(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, + sizeof(struct in6_addr)); + } + tuple->ip_proto = arfs_get_ip_proto(skb); + tuple->src_port = arfs_get_src_port(skb); + tuple->dst_port = arfs_get_dst_port(skb); + + rule->flow_id = flow_id; + rule->filter_id = priv->fs.arfs.last_filter_id++ % RPS_NO_FILTER; + + hlist_add_head(&rule->hlist, + arfs_hash_bucket(arfs_t, tuple->src_port, + tuple->dst_port)); + return rule; +} + +static bool arfs_cmp_ips(struct arfs_tuple *tuple, + const struct sk_buff *skb) +{ + if (tuple->etype == htons(ETH_P_IP) && + tuple->src_ipv4 == ip_hdr(skb)->saddr && + tuple->dst_ipv4 == ip_hdr(skb)->daddr) + return true; + if (tuple->etype == htons(ETH_P_IPV6) && + (!memcmp(&tuple->src_ipv6, &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr))) && + (!memcmp(&tuple->dst_ipv6, &ipv6_hdr(skb)->daddr, + sizeof(struct in6_addr)))) + return true; + return false; +} + +static struct arfs_rule *arfs_find_rule(struct arfs_table *arfs_t, + const struct sk_buff *skb) +{ + struct arfs_rule *arfs_rule; + struct hlist_head *head; + __be16 src_port = arfs_get_src_port(skb); + __be16 dst_port = arfs_get_dst_port(skb); + + head = arfs_hash_bucket(arfs_t, src_port, dst_port); + hlist_for_each_entry(arfs_rule, head, hlist) { + if (arfs_rule->tuple.src_port == src_port && + arfs_rule->tuple.dst_port == dst_port && + arfs_cmp_ips(&arfs_rule->tuple, skb)) { + return arfs_rule; + } + } + + return NULL; +} + +int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, + u16 rxq_index, u32 flow_id) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_arfs_tables *arfs = &priv->fs.arfs; + struct arfs_table *arfs_t; + struct arfs_rule *arfs_rule; + + if (skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6)) + return -EPROTONOSUPPORT; + + arfs_t = arfs_get_table(arfs, arfs_get_ip_proto(skb), skb->protocol); + if (!arfs_t) + return -EPROTONOSUPPORT; + + spin_lock_bh(&arfs->arfs_lock); + arfs_rule = arfs_find_rule(arfs_t, skb); + if (arfs_rule) { + if (arfs_rule->rxq == rxq_index) { + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; + } + arfs_rule->rxq = rxq_index; + } else { + arfs_rule = arfs_alloc_rule(priv, arfs_t, skb, + rxq_index, flow_id); + if (!arfs_rule) { + spin_unlock_bh(&arfs->arfs_lock); + return -ENOMEM; + } + } + queue_work(priv->fs.arfs.wq, &arfs_rule->arfs_work); + spin_unlock_bh(&arfs->arfs_lock); + return arfs_rule->filter_id; +} +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 2018eebe1531..847a8f3ac2b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -93,6 +93,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: + /* Reset CQE compression to Admin default */ + mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_admin); break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -108,6 +110,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + /* Disable CQE compression */ + mlx5e_modify_rx_cqe_compression(priv, false); config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 3036f279a8fd..b2db180ae2a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -174,8 +174,14 @@ static int mlx5e_dcbnl_ieee_getpfc(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + int i; pfc->pfc_cap = mlx5_max_tc(mdev) + 1; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + pfc->requests[i] = PPORT_PER_PRIO_GET(pstats, i, tx_pause); + pfc->indications[i] = PPORT_PER_PRIO_GET(pstats, i, rx_pause); + } return mlx5_query_port_pfc(mdev, &pfc->pfc_en, NULL); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 3476ab844634..fc7dcc03b1de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -165,26 +165,112 @@ static const struct { }, }; +static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u8 pfc_en_tx; + u8 pfc_en_rx; + int err; + + err = mlx5_query_port_pfc(mdev, &pfc_en_tx, &pfc_en_rx); + + return err ? 0 : pfc_en_tx | pfc_en_rx; +} + +#define MLX5E_NUM_Q_CNTRS(priv) (NUM_Q_COUNTERS * (!!priv->q_counter)) +#define MLX5E_NUM_RQ_STATS(priv) \ + (NUM_RQ_STATS * priv->params.num_channels * \ + test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_SQ_STATS(priv) \ + (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ + test_bit(MLX5E_STATE_OPENED, &priv->state)) +#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv)) + static int mlx5e_get_sset_count(struct net_device *dev, int sset) { struct mlx5e_priv *priv = netdev_priv(dev); switch (sset) { case ETH_SS_STATS: - return NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + - priv->params.num_channels * NUM_RQ_STATS + - priv->params.num_channels * priv->params.num_tc * - NUM_SQ_STATS; + return NUM_SW_COUNTERS + + MLX5E_NUM_Q_CNTRS(priv) + + NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS + + MLX5E_NUM_RQ_STATS(priv) + + MLX5E_NUM_SQ_STATS(priv) + + MLX5E_NUM_PFC_COUNTERS(priv); /* fallthrough */ default: return -EOPNOTSUPP; } } +static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) +{ + int i, j, tc, prio, idx = 0; + unsigned long pfc_combined; + + /* SW counters */ + for (i = 0; i < NUM_SW_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name); + + /* Q counters */ + for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name); + + /* VPORT counters */ + for (i = 0; i < NUM_VPORT_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + vport_stats_desc[i].name); + + /* PPORT counters */ + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_802_3_stats_desc[i].name); + + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_2863_stats_desc[i].name); + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + strcpy(data + (idx++) * ETH_GSTRING_LEN, + pport_2819_stats_desc[i].name); + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", + prio, + pport_per_prio_traffic_stats_desc[i].name); + } + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", + prio, pport_per_prio_pfc_stats_desc[i].name); + } + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return; + + /* per channel counters */ + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_RQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i, + rq_stats_desc[j].name); + + for (tc = 0; tc < priv->params.num_tc; tc++) + for (i = 0; i < priv->params.num_channels; i++) + for (j = 0; j < NUM_SQ_STATS; j++) + sprintf(data + (idx++) * ETH_GSTRING_LEN, + "tx%d_%s", + priv->channeltc_to_txq_map[i][tc], + sq_stats_desc[j].name); +} + static void mlx5e_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { - int i, j, tc, idx = 0; struct mlx5e_priv *priv = netdev_priv(dev); switch (stringset) { @@ -195,30 +281,7 @@ static void mlx5e_get_strings(struct net_device *dev, break; case ETH_SS_STATS: - /* VPORT counters */ - for (i = 0; i < NUM_VPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_strings[i]); - - /* PPORT counters */ - for (i = 0; i < NUM_PPORT_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_strings[i]); - - /* per channel counters */ - for (i = 0; i < priv->params.num_channels; i++) - for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, - "rx%d_%s", i, rq_stats_strings[j]); - - for (tc = 0; tc < priv->params.num_tc; tc++) - for (i = 0; i < priv->params.num_channels; i++) - for (j = 0; j < NUM_SQ_STATS; j++) - sprintf(data + - (idx++) * ETH_GSTRING_LEN, - "tx%d_%s", - priv->channeltc_to_txq_map[i][tc], - sq_stats_strings[j]); + mlx5e_fill_stats_strings(priv, data); break; } } @@ -227,7 +290,8 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct mlx5e_priv *priv = netdev_priv(dev); - int i, j, tc, idx = 0; + int i, j, tc, prio, idx = 0; + unsigned long pfc_combined; if (!data) return; @@ -237,33 +301,68 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev, mlx5e_update_stats(priv); mutex_unlock(&priv->state_lock); + for (i = 0; i < NUM_SW_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_stats_desc, i); + + for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) + data[idx++] = MLX5E_READ_CTR32_CPU(&priv->stats.qcnt, + q_stats_desc, i); + for (i = 0; i < NUM_VPORT_COUNTERS; i++) - data[idx++] = ((u64 *)&priv->stats.vport)[i]; + data[idx++] = MLX5E_READ_CTR64_BE(priv->stats.vport.query_vport_out, + vport_stats_desc, i); + + for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.IEEE_802_3_counters, + pport_802_3_stats_desc, i); - for (i = 0; i < NUM_PPORT_COUNTERS; i++) - data[idx++] = be64_to_cpu(((__be64 *)&priv->stats.pport)[i]); + for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2863_counters, + pport_2863_stats_desc, i); + + for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters, + pport_2819_stats_desc, i); + + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_traffic_stats_desc, i); + } + + pfc_combined = mlx5e_query_pfc_combined(priv); + for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { + for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { + data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio], + pport_per_prio_pfc_stats_desc, i); + } + } + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return; /* per channel counters */ for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_RQ_STATS; j++) - data[idx++] = !test_bit(MLX5E_STATE_OPENED, - &priv->state) ? 0 : - ((u64 *)&priv->channel[i]->rq.stats)[j]; + data[idx++] = + MLX5E_READ_CTR64_CPU(&priv->channel[i]->rq.stats, + rq_stats_desc, j); for (tc = 0; tc < priv->params.num_tc; tc++) for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_SQ_STATS; j++) - data[idx++] = !test_bit(MLX5E_STATE_OPENED, - &priv->state) ? 0 : - ((u64 *)&priv->channel[i]->sq[tc].stats)[j]; + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->channel[i]->sq[tc].stats, + sq_stats_desc, j); } static void mlx5e_get_ringparam(struct net_device *dev, struct ethtool_ringparam *param) { struct mlx5e_priv *priv = netdev_priv(dev); + int rq_wq_type = priv->params.rq_wq_type; - param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE; + param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type); param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE; param->rx_pending = 1 << priv->params.log_rq_size; param->tx_pending = 1 << priv->params.log_sq_size; @@ -274,6 +373,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); bool was_opened; + int rq_wq_type = priv->params.rq_wq_type; u16 min_rx_wqes; u8 log_rq_size; u8 log_sq_size; @@ -289,16 +389,16 @@ static int mlx5e_set_ringparam(struct net_device *dev, __func__); return -EINVAL; } - if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) { + if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) { netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE); + 1 << mlx5_min_log_rq_size(rq_wq_type)); return -EINVAL; } - if (param->rx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE)) { + if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) { netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n", __func__, param->rx_pending, - 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE); + 1 << mlx5_max_log_rq_size(rq_wq_type)); return -EINVAL; } if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) { @@ -316,8 +416,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, log_rq_size = order_base_2(param->rx_pending); log_sq_size = order_base_2(param->tx_pending); - min_rx_wqes = min_t(u16, param->rx_pending - 1, - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES); + min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending); if (log_rq_size == priv->params.log_rq_size && log_sq_size == priv->params.log_sq_size && @@ -357,6 +456,7 @@ static int mlx5e_set_channels(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); int ncv = mlx5e_get_max_num_channels(priv->mdev); unsigned int count = ch->combined_count; + bool arfs_enabled; bool was_opened; int err = 0; @@ -385,13 +485,27 @@ static int mlx5e_set_channels(struct net_device *dev, if (was_opened) mlx5e_close_locked(dev); + arfs_enabled = dev->features & NETIF_F_NTUPLE; + if (arfs_enabled) + mlx5e_arfs_disable(priv); + priv->params.num_channels = count; - mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + mlx5e_build_default_indir_rqt(priv->mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); if (was_opened) err = mlx5e_open_locked(dev); + if (err) + goto out; + + if (arfs_enabled) { + err = mlx5e_arfs_enable(priv); + if (err) + netdev_err(dev, "%s: mlx5e_arfs_enable failed: %d\n", + __func__, err); + } +out: mutex_unlock(&priv->state_lock); return err; @@ -499,6 +613,25 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) return 0; } +int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + u32 max_speed = 0; + u32 proto_cap; + int err; + int i; + + err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + if (err) + return err; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) + if (proto_cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, ptys2ethtool_table[i].speed); + + *speed = max_speed; + return 0; +} + static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, struct ethtool_cmd *cmd) @@ -727,9 +860,8 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) MLX5_SET(modify_tir_in, in, bitmask.hash, 1); mlx5e_build_tir_ctx_hash(tirc, priv); - for (i = 0; i < MLX5E_NUM_TT; i++) - if (IS_HASHING_TT(i)) - mlx5_core_modify_tir(mdev, priv->tirn[i], in, inlen); + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5_core_modify_tir(mdev, priv->indir_tirn[i], in, inlen); } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, @@ -751,9 +883,11 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); if (indir) { + u32 rqtn = priv->indir_rqtn; + memcpy(priv->params.indirection_rqt, indir, sizeof(priv->params.indirection_rqt)); - mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); } if (key) @@ -1036,6 +1170,108 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return mlx5_set_port_wol(mdev, mlx5_wol_mode); } +static int mlx5e_set_phys_id(struct net_device *dev, + enum ethtool_phys_id_state state) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 beacon_duration; + + if (!MLX5_CAP_GEN(mdev, beacon_led)) + return -EOPNOTSUPP; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + beacon_duration = MLX5_BEACON_DURATION_INF; + break; + case ETHTOOL_ID_INACTIVE: + beacon_duration = MLX5_BEACON_DURATION_OFF; + break; + default: + return -EOPNOTSUPP; + } + + return mlx5_set_port_beacon(mdev, beacon_duration); +} + +static int mlx5e_get_module_info(struct net_device *netdev, + struct ethtool_modinfo *modinfo) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *dev = priv->mdev; + int size_read = 0; + u8 data[4]; + + size_read = mlx5_query_module_eeprom(dev, 0, 2, data); + if (size_read < 2) + return -EIO; + + /* data[0] = identifier byte */ + switch (data[0]) { + case MLX5_MODULE_ID_QSFP: + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + break; + case MLX5_MODULE_ID_QSFP_PLUS: + case MLX5_MODULE_ID_QSFP28: + /* data[1] = revision id */ + if (data[0] == MLX5_MODULE_ID_QSFP28 || data[1] >= 0x3) { + modinfo->type = ETH_MODULE_SFF_8636; + modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; + } else { + modinfo->type = ETH_MODULE_SFF_8436; + modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; + } + break; + case MLX5_MODULE_ID_SFP: + modinfo->type = ETH_MODULE_SFF_8472; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + break; + default: + netdev_err(priv->netdev, "%s: cable type not recognized:0x%x\n", + __func__, data[0]); + return -EINVAL; + } + + return 0; +} + +static int mlx5e_get_module_eeprom(struct net_device *netdev, + struct ethtool_eeprom *ee, + u8 *data) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + int offset = ee->offset; + int size_read; + int i = 0; + + if (!ee->len) + return -EINVAL; + + memset(data, 0, ee->len); + + while (i < ee->len) { + size_read = mlx5_query_module_eeprom(mdev, offset, ee->len - i, + data + i); + + if (!size_read) + /* Done reading */ + return 0; + + if (size_read < 0) { + netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", + __func__, size_read); + return 0; + } + + i += size_read; + offset += size_read; + } + + return 0; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1060,6 +1296,9 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .get_pauseparam = mlx5e_get_pauseparam, .set_pauseparam = mlx5e_set_pauseparam, .get_ts_info = mlx5e_get_ts_info, + .set_phys_id = mlx5e_set_phys_id, .get_wol = mlx5e_get_wol, .set_wol = mlx5e_set_wol, + .get_module_info = mlx5e_get_module_info, + .get_module_eeprom = mlx5e_get_module_eeprom, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index d00a24203410..b32740092854 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -37,7 +37,10 @@ #include <linux/mlx5/fs.h> #include "en.h" -#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) +static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai, int type); +static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai); enum { MLX5E_FULLMATCH = 0, @@ -58,21 +61,21 @@ enum { MLX5E_ACTION_DEL = 2, }; -struct mlx5e_eth_addr_hash_node { +struct mlx5e_l2_hash_node { struct hlist_node hlist; u8 action; - struct mlx5e_eth_addr_info ai; + struct mlx5e_l2_rule ai; }; -static inline int mlx5e_hash_eth_addr(u8 *addr) +static inline int mlx5e_hash_l2(u8 *addr) { return addr[5]; } -static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) +static void mlx5e_add_l2_to_hash(struct hlist_head *hash, u8 *addr) { - struct mlx5e_eth_addr_hash_node *hn; - int ix = mlx5e_hash_eth_addr(addr); + struct mlx5e_l2_hash_node *hn; + int ix = mlx5e_hash_l2(addr); int found = 0; hlist_for_each_entry(hn, &hash[ix], hlist) @@ -96,371 +99,12 @@ static void mlx5e_add_eth_addr_to_hash(struct hlist_head *hash, u8 *addr) hlist_add_head(&hn->hlist, &hash[ix]); } -static void mlx5e_del_eth_addr_from_hash(struct mlx5e_eth_addr_hash_node *hn) +static void mlx5e_del_l2_from_hash(struct mlx5e_l2_hash_node *hn) { hlist_del(&hn->hlist); kfree(hn); } -static void mlx5e_del_eth_addr_from_flow_table(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai) -{ - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_TCP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_TCP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_TCP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6_UDP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4_UDP)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4_UDP]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV6)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV6]); - - if (ai->tt_vec & BIT(MLX5E_TT_IPV4)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_IPV4]); - - if (ai->tt_vec & BIT(MLX5E_TT_ANY)) - mlx5_del_flow_rule(ai->ft_rule[MLX5E_TT_ANY]); -} - -static int mlx5e_get_eth_addr_type(u8 *addr) -{ - if (is_unicast_ether_addr(addr)) - return MLX5E_UC; - - if ((addr[0] == 0x01) && - (addr[1] == 0x00) && - (addr[2] == 0x5e) && - !(addr[3] & 0x80)) - return MLX5E_MC_IPV4; - - if ((addr[0] == 0x33) && - (addr[1] == 0x33)) - return MLX5E_MC_IPV6; - - return MLX5E_MC_OTHER; -} - -static u32 mlx5e_get_tt_vec(struct mlx5e_eth_addr_info *ai, int type) -{ - int eth_addr_type; - u32 ret; - - switch (type) { - case MLX5E_FULLMATCH: - eth_addr_type = mlx5e_get_eth_addr_type(ai->addr); - switch (eth_addr_type) { - case MLX5E_UC: - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - case MLX5E_MC_IPV4: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV4) | - 0; - break; - - case MLX5E_MC_IPV6: - ret = - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV6) | - 0; - break; - - case MLX5E_MC_OTHER: - ret = - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - break; - - case MLX5E_ALLMULTI: - ret = - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - - default: /* MLX5E_PROMISC */ - ret = - BIT(MLX5E_TT_IPV4_TCP) | - BIT(MLX5E_TT_IPV6_TCP) | - BIT(MLX5E_TT_IPV4_UDP) | - BIT(MLX5E_TT_IPV6_UDP) | - BIT(MLX5E_TT_IPV4_IPSEC_AH) | - BIT(MLX5E_TT_IPV6_IPSEC_AH) | - BIT(MLX5E_TT_IPV4_IPSEC_ESP) | - BIT(MLX5E_TT_IPV6_IPSEC_ESP) | - BIT(MLX5E_TT_IPV4) | - BIT(MLX5E_TT_IPV6) | - BIT(MLX5E_TT_ANY) | - 0; - break; - } - - return ret; -} - -static int __mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, - int type, u32 *mc, u32 *mv) -{ - struct mlx5_flow_destination dest; - u8 match_criteria_enable = 0; - struct mlx5_flow_rule **rule_p; - struct mlx5_flow_table *ft = priv->fts.main.t; - u8 *mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, - outer_headers.dmac_47_16); - u8 *mv_dmac = MLX5_ADDR_OF(fte_match_param, mv, - outer_headers.dmac_47_16); - u32 *tirn = priv->tirn; - u32 tt_vec; - int err = 0; - - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - - switch (type) { - case MLX5E_FULLMATCH: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - eth_broadcast_addr(mc_dmac); - ether_addr_copy(mv_dmac, ai->addr); - break; - - case MLX5E_ALLMULTI: - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - mc_dmac[0] = 0x01; - mv_dmac[0] = 0x01; - break; - - case MLX5E_PROMISC: - break; - } - - tt_vec = mlx5e_get_tt_vec(ai, type); - - if (tt_vec & BIT(MLX5E_TT_ANY)) { - rule_p = &ai->ft_rule[MLX5E_TT_ANY]; - dest.tir_num = tirn[MLX5E_TT_ANY]; - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_ANY); - } - - match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - - if (tt_vec & BIT(MLX5E_TT_IPV4)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4]; - dest.tir_num = tirn[MLX5E_TT_IPV4]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6]; - dest.tir_num = tirn[MLX5E_TT_IPV6]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6); - } - - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_UDP); - - if (tt_vec & BIT(MLX5E_TT_IPV4_UDP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_UDP]; - dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_UDP); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_UDP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_UDP]; - dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_UDP); - } - - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_TCP); - - if (tt_vec & BIT(MLX5E_TT_IPV4_TCP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_TCP]; - dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_TCP); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_TCP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_TCP]; - dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - - ai->tt_vec |= BIT(MLX5E_TT_IPV6_TCP); - } - - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_AH); - - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_AH)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_AH]; - dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_AH]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_AH); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_AH)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_AH]; - dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_AH]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_AH); - } - - MLX5_SET(fte_match_param, mv, outer_headers.ip_protocol, IPPROTO_ESP); - - if (tt_vec & BIT(MLX5E_TT_IPV4_IPSEC_ESP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV4_IPSEC_ESP]; - dest.tir_num = tirn[MLX5E_TT_IPV4_IPSEC_ESP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IP); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV4_IPSEC_ESP); - } - - if (tt_vec & BIT(MLX5E_TT_IPV6_IPSEC_ESP)) { - rule_p = &ai->ft_rule[MLX5E_TT_IPV6_IPSEC_ESP]; - dest.tir_num = tirn[MLX5E_TT_IPV6_IPSEC_ESP]; - MLX5_SET(fte_match_param, mv, outer_headers.ethertype, - ETH_P_IPV6); - *rule_p = mlx5_add_flow_rule(ft, match_criteria_enable, mc, mv, - MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, - MLX5_FS_DEFAULT_FLOW_TAG, &dest); - if (IS_ERR_OR_NULL(*rule_p)) - goto err_del_ai; - ai->tt_vec |= BIT(MLX5E_TT_IPV6_IPSEC_ESP); - } - - return 0; - -err_del_ai: - err = PTR_ERR(*rule_p); - *rule_p = NULL; - mlx5e_del_eth_addr_from_flow_table(priv, ai); - - return err; -} - -static int mlx5e_add_eth_addr_rule(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_info *ai, int type) -{ - u32 *match_criteria; - u32 *match_value; - int err = 0; - - match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); - if (!match_value || !match_criteria) { - netdev_err(priv->netdev, "%s: alloc failed\n", __func__); - err = -ENOMEM; - goto add_eth_addr_rule_out; - } - - err = __mlx5e_add_eth_addr_rule(priv, ai, type, match_criteria, - match_value); - -add_eth_addr_rule_out: - kvfree(match_criteria); - kvfree(match_value); - - return err; -} - static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) { struct net_device *ndev = priv->netdev; @@ -472,7 +116,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) int i; list_size = 0; - for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) + for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) list_size++; max_list_size = 1 << MLX5_CAP_GEN(priv->mdev, log_max_vlan_list); @@ -489,7 +133,7 @@ static int mlx5e_vport_context_update_vlans(struct mlx5e_priv *priv) return -ENOMEM; i = 0; - for_each_set_bit(vlan, priv->vlan.active_vlans, VLAN_N_VID) { + for_each_set_bit(vlan, priv->fs.vlan.active_vlans, VLAN_N_VID) { if (i >= list_size) break; vlans[i++] = vlan; @@ -514,28 +158,28 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv, enum mlx5e_vlan_rule_type rule_type, u16 vid, u32 *mc, u32 *mv) { - struct mlx5_flow_table *ft = priv->fts.vlan.t; + struct mlx5_flow_table *ft = priv->fs.vlan.ft.t; struct mlx5_flow_destination dest; u8 match_criteria_enable = 0; struct mlx5_flow_rule **rule_p; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fts.main.t; + dest.ft = priv->fs.l2.ft.t; match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.vlan_tag); switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - rule_p = &priv->vlan.untagged_rule; + rule_p = &priv->fs.vlan.untagged_rule; break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: - rule_p = &priv->vlan.any_vlan_rule; + rule_p = &priv->fs.vlan.any_vlan_rule; MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); break; default: /* MLX5E_VLAN_RULE_TYPE_MATCH_VID */ - rule_p = &priv->vlan.active_vlans_rule[vid]; + rule_p = &priv->fs.vlan.active_vlans_rule[vid]; MLX5_SET(fte_match_param, mv, outer_headers.vlan_tag, 1); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.first_vid); MLX5_SET(fte_match_param, mv, outer_headers.first_vid, vid); @@ -589,22 +233,22 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, { switch (rule_type) { case MLX5E_VLAN_RULE_TYPE_UNTAGGED: - if (priv->vlan.untagged_rule) { - mlx5_del_flow_rule(priv->vlan.untagged_rule); - priv->vlan.untagged_rule = NULL; + if (priv->fs.vlan.untagged_rule) { + mlx5_del_flow_rule(priv->fs.vlan.untagged_rule); + priv->fs.vlan.untagged_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_ANY_VID: - if (priv->vlan.any_vlan_rule) { - mlx5_del_flow_rule(priv->vlan.any_vlan_rule); - priv->vlan.any_vlan_rule = NULL; + if (priv->fs.vlan.any_vlan_rule) { + mlx5_del_flow_rule(priv->fs.vlan.any_vlan_rule); + priv->fs.vlan.any_vlan_rule = NULL; } break; case MLX5E_VLAN_RULE_TYPE_MATCH_VID: mlx5e_vport_context_update_vlans(priv); - if (priv->vlan.active_vlans_rule[vid]) { - mlx5_del_flow_rule(priv->vlan.active_vlans_rule[vid]); - priv->vlan.active_vlans_rule[vid] = NULL; + if (priv->fs.vlan.active_vlans_rule[vid]) { + mlx5_del_flow_rule(priv->fs.vlan.active_vlans_rule[vid]); + priv->fs.vlan.active_vlans_rule[vid] = NULL; } mlx5e_vport_context_update_vlans(priv); break; @@ -613,10 +257,10 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv, void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) { - if (!priv->vlan.filter_disabled) + if (!priv->fs.vlan.filter_disabled) return; - priv->vlan.filter_disabled = false; + priv->fs.vlan.filter_disabled = false; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); @@ -624,10 +268,10 @@ void mlx5e_enable_vlan_filter(struct mlx5e_priv *priv) void mlx5e_disable_vlan_filter(struct mlx5e_priv *priv) { - if (priv->vlan.filter_disabled) + if (priv->fs.vlan.filter_disabled) return; - priv->vlan.filter_disabled = true; + priv->fs.vlan.filter_disabled = true; if (priv->netdev->flags & IFF_PROMISC) return; mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); @@ -638,7 +282,7 @@ int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); - set_bit(vid, priv->vlan.active_vlans); + set_bit(vid, priv->fs.vlan.active_vlans); return mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); } @@ -648,7 +292,7 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, { struct mlx5e_priv *priv = netdev_priv(dev); - clear_bit(vid, priv->vlan.active_vlans); + clear_bit(vid, priv->fs.vlan.active_vlans); mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, vid); @@ -656,21 +300,21 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, } #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ - for (i = 0; i < MLX5E_ETH_ADDR_HASH_SIZE; i++) \ + for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) -static void mlx5e_execute_action(struct mlx5e_priv *priv, - struct mlx5e_eth_addr_hash_node *hn) +static void mlx5e_execute_l2_action(struct mlx5e_priv *priv, + struct mlx5e_l2_hash_node *hn) { switch (hn->action) { case MLX5E_ACTION_ADD: - mlx5e_add_eth_addr_rule(priv, &hn->ai, MLX5E_FULLMATCH); + mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH); hn->action = MLX5E_ACTION_NONE; break; case MLX5E_ACTION_DEL: - mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); - mlx5e_del_eth_addr_from_hash(hn); + mlx5e_del_l2_flow_rule(priv, &hn->ai); + mlx5e_del_l2_from_hash(hn); break; } } @@ -682,14 +326,14 @@ static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv) netif_addr_lock_bh(netdev); - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, - priv->netdev->dev_addr); + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, + priv->netdev->dev_addr); netdev_for_each_uc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_uc, ha->addr); + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_uc, ha->addr); netdev_for_each_mc_addr(ha, netdev) - mlx5e_add_eth_addr_to_hash(priv->eth_addr.netdev_mc, ha->addr); + mlx5e_add_l2_to_hash(priv->fs.l2.netdev_mc, ha->addr); netif_addr_unlock_bh(netdev); } @@ -699,17 +343,17 @@ static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, { bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); struct net_device *ndev = priv->netdev; - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; struct hlist_head *addr_list; struct hlist_node *tmp; int i = 0; int hi; - addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; if (is_uc) /* Make sure our own address is pushed first */ ether_addr_copy(addr_array[i++], ndev->dev_addr); - else if (priv->eth_addr.broadcast_enabled) + else if (priv->fs.l2.broadcast_enabled) ether_addr_copy(addr_array[i++], ndev->broadcast); mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) { @@ -725,7 +369,7 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, int list_type) { bool is_uc = (list_type == MLX5_NVPRT_LIST_TYPE_UC); - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; u8 (*addr_array)[ETH_ALEN] = NULL; struct hlist_head *addr_list; struct hlist_node *tmp; @@ -734,12 +378,12 @@ static void mlx5e_vport_context_update_addr_list(struct mlx5e_priv *priv, int err; int hi; - size = is_uc ? 0 : (priv->eth_addr.broadcast_enabled ? 1 : 0); + size = is_uc ? 0 : (priv->fs.l2.broadcast_enabled ? 1 : 0); max_size = is_uc ? 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_uc_list) : 1 << MLX5_CAP_GEN(priv->mdev, log_max_current_mc_list); - addr_list = is_uc ? priv->eth_addr.netdev_uc : priv->eth_addr.netdev_mc; + addr_list = is_uc ? priv->fs.l2.netdev_uc : priv->fs.l2.netdev_mc; mlx5e_for_each_hash_node(hn, tmp, addr_list, hi) size++; @@ -770,7 +414,7 @@ out: static void mlx5e_vport_context_update(struct mlx5e_priv *priv) { - struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct mlx5e_l2_table *ea = &priv->fs.l2; mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_UC); mlx5e_vport_context_update_addr_list(priv, MLX5_NVPRT_LIST_TYPE_MC); @@ -781,26 +425,26 @@ static void mlx5e_vport_context_update(struct mlx5e_priv *priv) static void mlx5e_apply_netdev_addr(struct mlx5e_priv *priv) { - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; struct hlist_node *tmp; int i; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) - mlx5e_execute_action(priv, hn); + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) + mlx5e_execute_l2_action(priv, hn); - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) - mlx5e_execute_action(priv, hn); + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) + mlx5e_execute_l2_action(priv, hn); } static void mlx5e_handle_netdev_addr(struct mlx5e_priv *priv) { - struct mlx5e_eth_addr_hash_node *hn; + struct mlx5e_l2_hash_node *hn; struct hlist_node *tmp; int i; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_uc, i) + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_uc, i) hn->action = MLX5E_ACTION_DEL; - mlx5e_for_each_hash_node(hn, tmp, priv->eth_addr.netdev_mc, i) + mlx5e_for_each_hash_node(hn, tmp, priv->fs.l2.netdev_mc, i) hn->action = MLX5E_ACTION_DEL; if (!test_bit(MLX5E_STATE_DESTROYING, &priv->state)) @@ -814,7 +458,7 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, set_rx_mode_work); - struct mlx5e_eth_addr_db *ea = &priv->eth_addr; + struct mlx5e_l2_table *ea = &priv->fs.l2; struct net_device *ndev = priv->netdev; bool rx_mode_enable = !test_bit(MLX5E_STATE_DESTROYING, &priv->state); @@ -830,27 +474,27 @@ void mlx5e_set_rx_mode_work(struct work_struct *work) bool disable_broadcast = ea->broadcast_enabled && !broadcast_enabled; if (enable_promisc) { - mlx5e_add_eth_addr_rule(priv, &ea->promisc, MLX5E_PROMISC); - if (!priv->vlan.filter_disabled) + mlx5e_add_l2_flow_rule(priv, &ea->promisc, MLX5E_PROMISC); + if (!priv->fs.vlan.filter_disabled) mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); } if (enable_allmulti) - mlx5e_add_eth_addr_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); + mlx5e_add_l2_flow_rule(priv, &ea->allmulti, MLX5E_ALLMULTI); if (enable_broadcast) - mlx5e_add_eth_addr_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); + mlx5e_add_l2_flow_rule(priv, &ea->broadcast, MLX5E_FULLMATCH); mlx5e_handle_netdev_addr(priv); if (disable_broadcast) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->broadcast); + mlx5e_del_l2_flow_rule(priv, &ea->broadcast); if (disable_allmulti) - mlx5e_del_eth_addr_from_flow_table(priv, &ea->allmulti); + mlx5e_del_l2_flow_rule(priv, &ea->allmulti); if (disable_promisc) { - if (!priv->vlan.filter_disabled) + if (!priv->fs.vlan.filter_disabled) mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); - mlx5e_del_eth_addr_from_flow_table(priv, &ea->promisc); + mlx5e_del_l2_flow_rule(priv, &ea->promisc); } ea->promisc_enabled = promisc_enabled; @@ -872,224 +516,454 @@ static void mlx5e_destroy_groups(struct mlx5e_flow_table *ft) ft->num_groups = 0; } -void mlx5e_init_eth_addr(struct mlx5e_priv *priv) +void mlx5e_init_l2_addr(struct mlx5e_priv *priv) { - ether_addr_copy(priv->eth_addr.broadcast.addr, priv->netdev->broadcast); + ether_addr_copy(priv->fs.l2.broadcast.addr, priv->netdev->broadcast); } -#define MLX5E_MAIN_GROUP0_SIZE BIT(3) -#define MLX5E_MAIN_GROUP1_SIZE BIT(1) -#define MLX5E_MAIN_GROUP2_SIZE BIT(0) -#define MLX5E_MAIN_GROUP3_SIZE BIT(14) -#define MLX5E_MAIN_GROUP4_SIZE BIT(13) -#define MLX5E_MAIN_GROUP5_SIZE BIT(11) -#define MLX5E_MAIN_GROUP6_SIZE BIT(2) -#define MLX5E_MAIN_GROUP7_SIZE BIT(1) -#define MLX5E_MAIN_GROUP8_SIZE BIT(0) -#define MLX5E_MAIN_TABLE_SIZE (MLX5E_MAIN_GROUP0_SIZE +\ - MLX5E_MAIN_GROUP1_SIZE +\ - MLX5E_MAIN_GROUP2_SIZE +\ - MLX5E_MAIN_GROUP3_SIZE +\ - MLX5E_MAIN_GROUP4_SIZE +\ - MLX5E_MAIN_GROUP5_SIZE +\ - MLX5E_MAIN_GROUP6_SIZE +\ - MLX5E_MAIN_GROUP7_SIZE +\ - MLX5E_MAIN_GROUP8_SIZE) - -static int __mlx5e_create_main_groups(struct mlx5e_flow_table *ft, u32 *in, - int inlen) +void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) { - u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - u8 *dmac = MLX5_ADDR_OF(create_flow_group_in, in, - match_criteria.outer_headers.dmac_47_16); + mlx5e_destroy_groups(ft); + kfree(ft->g); + mlx5_destroy_flow_table(ft->t); + ft->t = NULL; +} + +static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) +{ + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->rules[i])) { + mlx5_del_flow_rule(ttc->rules[i]); + ttc->rules[i] = NULL; + } + } +} + +static struct { + u16 etype; + u8 proto; +} ttc_rules[] = { + [MLX5E_TT_IPV4_TCP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_TCP, + }, + [MLX5E_TT_IPV6_TCP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_TCP, + }, + [MLX5E_TT_IPV4_UDP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_UDP, + }, + [MLX5E_TT_IPV6_UDP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_UDP, + }, + [MLX5E_TT_IPV4_IPSEC_AH] = { + .etype = ETH_P_IP, + .proto = IPPROTO_AH, + }, + [MLX5E_TT_IPV6_IPSEC_AH] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_AH, + }, + [MLX5E_TT_IPV4_IPSEC_ESP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_ESP, + }, + [MLX5E_TT_IPV6_IPSEC_ESP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_ESP, + }, + [MLX5E_TT_IPV4] = { + .etype = ETH_P_IP, + .proto = 0, + }, + [MLX5E_TT_IPV6] = { + .etype = ETH_P_IPV6, + .proto = 0, + }, + [MLX5E_TT_ANY] = { + .etype = 0, + .proto = 0, + }, +}; + +static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, + u8 proto) +{ + struct mlx5_flow_rule *rule; + u8 match_criteria_enable = 0; + u32 *match_criteria; + u32 *match_value; + int err = 0; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto out; + } + + if (proto) { + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, match_value, outer_headers.ip_protocol, proto); + } + if (etype) { + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, match_value, outer_headers.ethertype, etype); + } + + rule = mlx5_add_flow_rule(ft, match_criteria_enable, + match_criteria, match_value, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, + dest); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + netdev_err(priv->netdev, "%s: add rule failed\n", __func__); + } +out: + kvfree(match_criteria); + kvfree(match_value); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) +{ + struct mlx5_flow_destination dest; + struct mlx5e_ttc_table *ttc; + struct mlx5_flow_rule **rules; + struct mlx5_flow_table *ft; + int tt; int err; + + ttc = &priv->fs.ttc; + ft = ttc->ft.t; + rules = ttc->rules; + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + for (tt = 0; tt < MLX5E_NUM_TT; tt++) { + if (tt == MLX5E_TT_ANY) + dest.tir_num = priv->direct_tir[0].tirn; + else + dest.tir_num = priv->indir_tirn[tt]; + rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rules[tt])) + goto del_rules; + } + + return 0; + +del_rules: + err = PTR_ERR(rules[tt]); + rules[tt] = NULL; + mlx5e_cleanup_ttc_rules(ttc); + return err; +} + +#define MLX5E_TTC_NUM_GROUPS 3 +#define MLX5E_TTC_GROUP1_SIZE BIT(3) +#define MLX5E_TTC_GROUP2_SIZE BIT(1) +#define MLX5E_TTC_GROUP3_SIZE BIT(0) +#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ + MLX5E_TTC_GROUP2_SIZE +\ + MLX5E_TTC_GROUP3_SIZE) +static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &ttc->ft; int ix = 0; + u32 *in; + int err; + u8 *mc; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP0_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; + ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS, + sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = mlx5_vzalloc(inlen); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP1_SIZE; + ix += MLX5E_TTC_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; + goto err; ft->num_groups++; - memset(in, 0, inlen); + /* L3 Group */ + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP2_SIZE; + ix += MLX5E_TTC_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; + goto err; ft->num_groups++; + /* Any Group */ memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - eth_broadcast_addr(dmac); MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP3_SIZE; + ix += MLX5E_TTC_GROUP3_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; + goto err; ft->num_groups++; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - eth_broadcast_addr(dmac); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP4_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; + kvfree(in); + return 0; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - eth_broadcast_addr(dmac); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP5_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err_destroy_groups; - ft->num_groups++; +err: + err = PTR_ERR(ft->g[ft->num_groups]); + ft->g[ft->num_groups] = NULL; + kvfree(in); - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - dmac[0] = 0x01; + return err; +} + +static void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.ttc; + + mlx5e_cleanup_ttc_rules(ttc); + mlx5e_destroy_flow_table(&ttc->ft); +} + +static int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +{ + struct mlx5e_ttc_table *ttc = &priv->fs.ttc; + struct mlx5e_flow_table *ft = &ttc->ft; + int err; + + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL); + if (IS_ERR(ft->t)) { + err = PTR_ERR(ft->t); + ft->t = NULL; + return err; + } + + err = mlx5e_create_ttc_table_groups(ttc); + if (err) + goto err; + + err = mlx5e_generate_ttc_table_rules(priv); + if (err) + goto err; + + return 0; +err: + mlx5e_destroy_flow_table(ft); + return err; +} + +static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai) +{ + if (!IS_ERR_OR_NULL(ai->rule)) { + mlx5_del_flow_rule(ai->rule); + ai->rule = NULL; + } +} + +static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, + struct mlx5e_l2_rule *ai, int type) +{ + struct mlx5_flow_table *ft = priv->fs.l2.ft.t; + struct mlx5_flow_destination dest; + u8 match_criteria_enable = 0; + u32 *match_criteria; + u32 *match_value; + int err = 0; + u8 *mc_dmac; + u8 *mv_dmac; + + match_value = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + match_criteria = mlx5_vzalloc(MLX5_ST_SZ_BYTES(fte_match_param)); + if (!match_value || !match_criteria) { + netdev_err(priv->netdev, "%s: alloc failed\n", __func__); + err = -ENOMEM; + goto add_l2_rule_out; + } + + mc_dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, + outer_headers.dmac_47_16); + mv_dmac = MLX5_ADDR_OF(fte_match_param, match_value, + outer_headers.dmac_47_16); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.ttc.ft.t; + + switch (type) { + case MLX5E_FULLMATCH: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + eth_broadcast_addr(mc_dmac); + ether_addr_copy(mv_dmac, ai->addr); + break; + + case MLX5E_ALLMULTI: + match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + mc_dmac[0] = 0x01; + mv_dmac[0] = 0x01; + break; + + case MLX5E_PROMISC: + break; + } + + ai->rule = mlx5_add_flow_rule(ft, match_criteria_enable, match_criteria, + match_value, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + MLX5_FS_DEFAULT_FLOW_TAG, &dest); + if (IS_ERR(ai->rule)) { + netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n", + __func__, mv_dmac); + err = PTR_ERR(ai->rule); + ai->rule = NULL; + } + +add_l2_rule_out: + kvfree(match_criteria); + kvfree(match_value); + + return err; +} + +#define MLX5E_NUM_L2_GROUPS 3 +#define MLX5E_L2_GROUP1_SIZE BIT(0) +#define MLX5E_L2_GROUP2_SIZE BIT(15) +#define MLX5E_L2_GROUP3_SIZE BIT(0) +#define MLX5E_L2_TABLE_SIZE (MLX5E_L2_GROUP1_SIZE +\ + MLX5E_L2_GROUP2_SIZE +\ + MLX5E_L2_GROUP3_SIZE) +static int mlx5e_create_l2_table_groups(struct mlx5e_l2_table *l2_table) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5e_flow_table *ft = &l2_table->ft; + int ix = 0; + u8 *mc_dmac; + u32 *in; + int err; + u8 *mc; + + ft->g = kcalloc(MLX5E_NUM_L2_GROUPS, sizeof(*ft->g), GFP_KERNEL); + if (!ft->g) + return -ENOMEM; + in = mlx5_vzalloc(inlen); + if (!in) { + kfree(ft->g); + return -ENOMEM; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + mc_dmac = MLX5_ADDR_OF(fte_match_param, mc, + outer_headers.dmac_47_16); + /* Flow Group for promiscuous */ MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP6_SIZE; + ix += MLX5E_L2_GROUP1_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destroy_groups; ft->num_groups++; - memset(in, 0, inlen); + /* Flow Group for full match */ + eth_broadcast_addr(mc_dmac); MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP7_SIZE; + ix += MLX5E_L2_GROUP2_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destroy_groups; ft->num_groups++; - memset(in, 0, inlen); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - dmac[0] = 0x01; + /* Flow Group for allmulti */ + eth_zero_addr(mc_dmac); + mc_dmac[0] = 0x01; MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_MAIN_GROUP8_SIZE; + ix += MLX5E_L2_GROUP3_SIZE; MLX5_SET_CFG(in, end_flow_index, ix - 1); ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); if (IS_ERR(ft->g[ft->num_groups])) goto err_destroy_groups; ft->num_groups++; + kvfree(in); return 0; err_destroy_groups: err = PTR_ERR(ft->g[ft->num_groups]); ft->g[ft->num_groups] = NULL; mlx5e_destroy_groups(ft); + kvfree(in); return err; } -static int mlx5e_create_main_groups(struct mlx5e_flow_table *ft) +static void mlx5e_destroy_l2_table(struct mlx5e_priv *priv) { - u32 *in; - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - int err; - - in = mlx5_vzalloc(inlen); - if (!in) - return -ENOMEM; - - err = __mlx5e_create_main_groups(ft, in, inlen); - - kvfree(in); - return err; + mlx5e_destroy_flow_table(&priv->fs.l2.ft); } -static int mlx5e_create_main_flow_table(struct mlx5e_priv *priv) +static int mlx5e_create_l2_table(struct mlx5e_priv *priv) { - struct mlx5e_flow_table *ft = &priv->fts.main; + struct mlx5e_l2_table *l2_table = &priv->fs.l2; + struct mlx5e_flow_table *ft = &l2_table->ft; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_MAIN_TABLE_SIZE); + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); ft->t = NULL; return err; } - ft->g = kcalloc(MLX5E_NUM_MAIN_GROUPS, sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) { - err = -ENOMEM; - goto err_destroy_main_flow_table; - } - err = mlx5e_create_main_groups(ft); + err = mlx5e_create_l2_table_groups(l2_table); if (err) - goto err_free_g; - return 0; + goto err_destroy_flow_table; -err_free_g: - kfree(ft->g); + return 0; -err_destroy_main_flow_table: +err_destroy_flow_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return err; } -static void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) -{ - mlx5e_destroy_groups(ft); - kfree(ft->g); - mlx5_destroy_flow_table(ft->t); - ft->t = NULL; -} - -static void mlx5e_destroy_main_flow_table(struct mlx5e_priv *priv) -{ - mlx5e_destroy_flow_table(&priv->fts.main); -} - #define MLX5E_NUM_VLAN_GROUPS 2 #define MLX5E_VLAN_GROUP0_SIZE BIT(12) #define MLX5E_VLAN_GROUP1_SIZE BIT(1) #define MLX5E_VLAN_TABLE_SIZE (MLX5E_VLAN_GROUP0_SIZE +\ MLX5E_VLAN_GROUP1_SIZE) -static int __mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft, u32 *in, - int inlen) +static int __mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft, u32 *in, + int inlen) { int err; int ix = 0; @@ -1128,7 +1002,7 @@ err_destroy_groups: return err; } -static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) +static int mlx5e_create_vlan_table_groups(struct mlx5e_flow_table *ft) { u32 *in; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -1138,19 +1012,20 @@ static int mlx5e_create_vlan_groups(struct mlx5e_flow_table *ft) if (!in) return -ENOMEM; - err = __mlx5e_create_vlan_groups(ft, in, inlen); + err = __mlx5e_create_vlan_table_groups(ft, in, inlen); kvfree(in); return err; } -static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) +static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) { - struct mlx5e_flow_table *ft = &priv->fts.vlan; + struct mlx5e_flow_table *ft = &priv->fs.vlan.ft; int err; ft->num_groups = 0; - ft->t = mlx5_create_flow_table(priv->fts.ns, 1, MLX5E_VLAN_TABLE_SIZE); + ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO, + MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL); if (IS_ERR(ft->t)) { err = PTR_ERR(ft->t); @@ -1160,65 +1035,90 @@ static int mlx5e_create_vlan_flow_table(struct mlx5e_priv *priv) ft->g = kcalloc(MLX5E_NUM_VLAN_GROUPS, sizeof(*ft->g), GFP_KERNEL); if (!ft->g) { err = -ENOMEM; - goto err_destroy_vlan_flow_table; + goto err_destroy_vlan_table; } - err = mlx5e_create_vlan_groups(ft); + err = mlx5e_create_vlan_table_groups(ft); if (err) goto err_free_g; + err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + if (err) + goto err_destroy_vlan_flow_groups; + return 0; +err_destroy_vlan_flow_groups: + mlx5e_destroy_groups(ft); err_free_g: kfree(ft->g); - -err_destroy_vlan_flow_table: +err_destroy_vlan_table: mlx5_destroy_flow_table(ft->t); ft->t = NULL; return err; } -static void mlx5e_destroy_vlan_flow_table(struct mlx5e_priv *priv) +static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) { - mlx5e_destroy_flow_table(&priv->fts.vlan); + mlx5e_destroy_flow_table(&priv->fs.vlan.ft); } -int mlx5e_create_flow_tables(struct mlx5e_priv *priv) +int mlx5e_create_flow_steering(struct mlx5e_priv *priv) { int err; - priv->fts.ns = mlx5_get_flow_namespace(priv->mdev, + priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); - if (!priv->fts.ns) + if (!priv->fs.ns) return -EINVAL; - err = mlx5e_create_vlan_flow_table(priv); - if (err) - return err; + err = mlx5e_arfs_create_tables(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create arfs tables, err=%d\n", + err); + priv->netdev->hw_features &= ~NETIF_F_NTUPLE; + } - err = mlx5e_create_main_flow_table(priv); - if (err) - goto err_destroy_vlan_flow_table; + err = mlx5e_create_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; + } - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - if (err) - goto err_destroy_main_flow_table; + err = mlx5e_create_l2_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create l2 table, err=%d\n", + err); + goto err_destroy_ttc_table; + } + + err = mlx5e_create_vlan_table(priv); + if (err) { + netdev_err(priv->netdev, "Failed to create vlan table, err=%d\n", + err); + goto err_destroy_l2_table; + } return 0; -err_destroy_main_flow_table: - mlx5e_destroy_main_flow_table(priv); -err_destroy_vlan_flow_table: - mlx5e_destroy_vlan_flow_table(priv); +err_destroy_l2_table: + mlx5e_destroy_l2_table(priv); +err_destroy_ttc_table: + mlx5e_destroy_ttc_table(priv); +err_destroy_arfs_tables: + mlx5e_arfs_destroy_tables(priv); return err; } -void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv) +void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - mlx5e_destroy_main_flow_table(priv); - mlx5e_destroy_vlan_flow_table(priv); + mlx5e_destroy_vlan_table(priv); + mlx5e_destroy_l2_table(priv); + mlx5e_destroy_ttc_table(priv); + mlx5e_arfs_destroy_tables(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 94fef705890b..fd4392999eee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -48,6 +48,7 @@ struct mlx5e_sq_param { u32 sqc[MLX5_ST_SZ_DW(sqc)]; struct mlx5_wq_param wq; u16 max_inline; + bool icosq; }; struct mlx5e_cq_param { @@ -59,8 +60,10 @@ struct mlx5e_cq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; + struct mlx5e_cq_param icosq_cq; }; static void mlx5e_update_carrier(struct mlx5e_priv *priv) @@ -88,82 +91,15 @@ static void mlx5e_update_carrier_work(struct work_struct *work) mutex_unlock(&priv->state_lock); } -static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) { - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_pport_stats *s = &priv->stats.pport; - u32 *in; - u32 *out; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - - in = mlx5_vzalloc(sz); - out = mlx5_vzalloc(sz); - if (!in || !out) - goto free_out; - - MLX5_SET(ppcnt_reg, in, local_port, 1); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->IEEE_802_3_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->IEEE_802_3_counters)); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->RFC_2863_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->RFC_2863_counters)); - - MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); - mlx5_core_access_reg(mdev, in, sz, out, - sz, MLX5_REG_PPCNT, 0, 0); - memcpy(s->RFC_2819_counters, - MLX5_ADDR_OF(ppcnt_reg, out, counter_set), - sizeof(s->RFC_2819_counters)); - -free_out: - kvfree(in); - kvfree(out); -} - -void mlx5e_update_stats(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_vport_stats *s = &priv->stats.vport; + struct mlx5e_sw_stats *s = &priv->stats.sw; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; - u32 *out; - int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); - u64 tx_offload_none; + u64 tx_offload_none = 0; int i, j; - out = mlx5_vzalloc(outlen); - if (!out) - return; - - /* Collect firts the SW counters and then HW for consistency */ - s->rx_packets = 0; - s->rx_bytes = 0; - s->tx_packets = 0; - s->tx_bytes = 0; - s->tso_packets = 0; - s->tso_bytes = 0; - s->tso_inner_packets = 0; - s->tso_inner_bytes = 0; - s->tx_queue_stopped = 0; - s->tx_queue_wake = 0; - s->tx_queue_dropped = 0; - s->tx_csum_inner = 0; - tx_offload_none = 0; - s->lro_packets = 0; - s->lro_bytes = 0; - s->rx_csum_none = 0; - s->rx_csum_sw = 0; - s->rx_wqe_err = 0; + memset(s, 0, sizeof(*s)); for (i = 0; i < priv->params.num_channels; i++) { rq_stats = &priv->channel[i]->rq.stats; @@ -173,7 +109,13 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) s->lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; s->rx_csum_sw += rq_stats->csum_sw; + s->rx_csum_inner += rq_stats->csum_inner; s->rx_wqe_err += rq_stats->wqe_err; + s->rx_mpwqe_filler += rq_stats->mpwqe_filler; + s->rx_mpwqe_frag += rq_stats->mpwqe_frag; + s->rx_buff_alloc_err += rq_stats->buff_alloc_err; + s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; + s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -192,7 +134,23 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) } } - /* HW counters */ + /* Update calculated offload counters */ + s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; + s->rx_csum_good = s->rx_packets - s->rx_csum_none - + s->rx_csum_sw; + + s->link_down_events = MLX5_GET(ppcnt_reg, + priv->stats.pport.phy_counters, + counter_set.phys_layer_cntrs.link_down_events); +} + +static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) +{ + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); + u32 *out = (u32 *)priv->stats.vport.query_vport_out; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + struct mlx5_core_dev *mdev = priv->mdev; + memset(in, 0, sizeof(in)); MLX5_SET(query_vport_counter_in, in, opcode, @@ -202,56 +160,69 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) memset(out, 0, outlen); - if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) + mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); +} + +static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + struct mlx5_core_dev *mdev = priv->mdev; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + int prio; + void *out; + u32 *in; + + in = mlx5_vzalloc(sz); + if (!in) goto free_out; -#define MLX5_GET_CTR(p, x) \ - MLX5_GET64(query_vport_counter_out, p, x) - - s->rx_error_packets = - MLX5_GET_CTR(out, received_errors.packets); - s->rx_error_bytes = - MLX5_GET_CTR(out, received_errors.octets); - s->tx_error_packets = - MLX5_GET_CTR(out, transmit_errors.packets); - s->tx_error_bytes = - MLX5_GET_CTR(out, transmit_errors.octets); - - s->rx_unicast_packets = - MLX5_GET_CTR(out, received_eth_unicast.packets); - s->rx_unicast_bytes = - MLX5_GET_CTR(out, received_eth_unicast.octets); - s->tx_unicast_packets = - MLX5_GET_CTR(out, transmitted_eth_unicast.packets); - s->tx_unicast_bytes = - MLX5_GET_CTR(out, transmitted_eth_unicast.octets); - - s->rx_multicast_packets = - MLX5_GET_CTR(out, received_eth_multicast.packets); - s->rx_multicast_bytes = - MLX5_GET_CTR(out, received_eth_multicast.octets); - s->tx_multicast_packets = - MLX5_GET_CTR(out, transmitted_eth_multicast.packets); - s->tx_multicast_bytes = - MLX5_GET_CTR(out, transmitted_eth_multicast.octets); - - s->rx_broadcast_packets = - MLX5_GET_CTR(out, received_eth_broadcast.packets); - s->rx_broadcast_bytes = - MLX5_GET_CTR(out, received_eth_broadcast.octets); - s->tx_broadcast_packets = - MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); - s->tx_broadcast_bytes = - MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + MLX5_SET(ppcnt_reg, in, local_port, 1); - /* Update calculated offload counters */ - s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; - s->rx_csum_good = s->rx_packets - s->rx_csum_none - - s->rx_csum_sw; + out = pstats->IEEE_802_3_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + out = pstats->RFC_2863_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + out = pstats->RFC_2819_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + out = pstats->phy_counters; + MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); + mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); + + MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); + for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { + out = pstats->per_prio_counters[prio]; + MLX5_SET(ppcnt_reg, in, prio_tc, prio); + mlx5_core_access_reg(mdev, in, sz, out, sz, + MLX5_REG_PPCNT, 0, 0); + } - mlx5e_update_pport_counters(priv); free_out: - kvfree(out); + kvfree(in); +} + +static void mlx5e_update_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt; + + if (!priv->q_counter) + return; + + mlx5_core_query_out_of_buffer(priv->mdev, priv->q_counter, + &qcnt->rx_out_of_buffer); +} + +void mlx5e_update_stats(struct mlx5e_priv *priv) +{ + mlx5e_update_q_counter(priv); + mlx5e_update_vport_counters(priv); + mlx5e_update_pport_counters(priv); + mlx5e_update_sw_counters(priv); } static void mlx5e_update_stats_work(struct work_struct *work) @@ -309,6 +280,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = priv->mdev; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); + u32 byte_count; int wq_sz; int err; int i; @@ -323,32 +295,56 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; wq_sz = mlx5_wq_ll_get_size(&rq->wq); - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } - rq->wqe_sz = (priv->params.lro_en) ? priv->params.lro_wqe_sz : - MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz + MLX5E_NET_IP_ALIGN); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->wqe_info) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; + rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + + rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); + rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); + rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; + byte_count = rq->wqe_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!rq->skb) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + rq->alloc_wqe = mlx5e_alloc_rx_wqe; + + rq->wqe_sz = (priv->params.lro_en) ? + priv->params.lro_wqe_sz : + MLX5E_SW2HW_MTU(priv->netdev->mtu); + rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); + byte_count = rq->wqe_sz; + byte_count |= MLX5_HW_START_PADDING; + } for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); - u32 byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; - wqe->data.lkey = c->mkey_be; - wqe->data.byte_count = - cpu_to_be32(byte_count | MLX5_HW_START_PADDING); + wqe->data.byte_count = cpu_to_be32(byte_count); } + rq->wq_type = priv->params.rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; rq->tstamp = &priv->tstamp; rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; + rq->mkey_be = c->mkey_be; + rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key); return 0; @@ -360,7 +356,14 @@ err_rq_wq_destroy: static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { - kfree(rq->skb); + switch (rq->wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + kfree(rq->wqe_info); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + kfree(rq->skb); + } + mlx5_wq_destroy(&rq->wq_ctrl); } @@ -389,6 +392,7 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) MLX5_SET(rqc, rqc, cqn, rq->cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); + MLX5_SET(rqc, rqc, vsd, priv->params.vlan_strip_disable); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); @@ -403,7 +407,8 @@ static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) return err; } -static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) +static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, + int next_state) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; @@ -431,6 +436,36 @@ static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) return err; } +static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) +{ + struct mlx5e_channel *c = rq->channel; + struct mlx5e_priv *priv = c->priv; + struct mlx5_core_dev *mdev = priv->mdev; + + void *in; + void *rqc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_rq_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); + + MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); + MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_RQ_BITMASK_VSD); + MLX5_SET(rqc, rqc, vsd, vsd); + MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); + + err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen); + + kvfree(in); + + return err; +} + static void mlx5e_disable_rq(struct mlx5e_rq *rq) { mlx5_core_destroy_rq(rq->priv->mdev, rq->rqn); @@ -457,6 +492,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { + struct mlx5e_sq *sq = &c->icosq; + u16 pi = sq->pc & sq->wq.sz_m1; int err; err = mlx5e_create_rq(c, param, rq); @@ -467,12 +504,15 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_destroy_rq; - err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); + err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); - mlx5e_send_nop(&c->sq[0], true); /* trigger mlx5e_post_rx_wqes() */ + + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; + sq->ico_wqe_info[pi].num_wqebbs = 1; + mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -489,7 +529,7 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ - mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); + mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); while (!mlx5_wq_ll_is_empty(&rq->wq)) msleep(20); @@ -538,7 +578,6 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); - int txq_ix; int err; err = mlx5_alloc_map_uar(mdev, &sq->uar, true); @@ -566,8 +605,24 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - txq_ix = c->ix + tc * priv->params.num_channels; - sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); + if (param->icosq) { + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * + wq_sz, + GFP_KERNEL, + cpu_to_node(c->cpu)); + if (!sq->ico_wqe_info) { + err = -ENOMEM; + goto err_free_sq_db; + } + } else { + int txq_ix; + + txq_ix = c->ix + tc * priv->params.num_channels; + sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix); + priv->txq_to_sq_map[txq_ix] = sq; + } sq->pdev = c->pdev; sq->tstamp = &priv->tstamp; @@ -576,10 +631,12 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->tc = tc; sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; sq->bf_budget = MLX5E_SQ_BF_BUDGET; - priv->txq_to_sq_map[txq_ix] = sq; return 0; +err_free_sq_db: + mlx5e_free_sq_db(sq); + err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -594,6 +651,7 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; + kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -622,10 +680,10 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, priv->tisn[sq->tc]); - MLX5_SET(sqc, sqc, cqn, c->sq[sq->tc].cq.mcq.cqn); + MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -700,9 +758,11 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, if (err) goto err_disable_sq; - set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - netdev_tx_reset_queue(sq->txq); - netif_tx_start_queue(sq->txq); + if (sq->txq) { + set_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + netdev_tx_reset_queue(sq->txq); + netif_tx_start_queue(sq->txq); + } return 0; @@ -723,15 +783,19 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq) static void mlx5e_close_sq(struct mlx5e_sq *sq) { - clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); - napi_synchronize(&sq->channel->napi); /* prevent netif_tx_wake_queue */ - netif_tx_disable_queue(sq->txq); + if (sq->txq) { + clear_bit(MLX5E_SQ_STATE_WAKE_TXQ_ENABLE, &sq->state); + /* prevent netif_tx_wake_queue */ + napi_synchronize(&sq->channel->napi); + netif_tx_disable_queue(sq->txq); - /* ensure hw is notified of all pending wqes */ - if (mlx5e_sq_has_room_for(sq, 1)) - mlx5e_send_nop(sq, true); + /* ensure hw is notified of all pending wqes */ + if (mlx5e_sq_has_room_for(sq, 1)) + mlx5e_send_nop(sq, true); + + mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); + } - mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); while (sq->cc != sq->pc) /* wait till sq is empty */ msleep(20); @@ -985,10 +1049,14 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - err = mlx5e_open_tx_cqs(c, cparam); + err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, 0, 0); if (err) goto err_napi_del; + err = mlx5e_open_tx_cqs(c, cparam); + if (err) + goto err_close_icosq_cq; + err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts); @@ -997,10 +1065,14 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, napi_enable(&c->napi); - err = mlx5e_open_sqs(c, cparam); + err = mlx5e_open_sq(c, 0, &cparam->icosq, &c->icosq); if (err) goto err_disable_napi; + err = mlx5e_open_sqs(c, cparam); + if (err) + goto err_close_icosq; + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; @@ -1013,6 +1085,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, err_close_sqs: mlx5e_close_sqs(c); +err_close_icosq: + mlx5e_close_sq(&c->icosq); + err_disable_napi: napi_disable(&c->napi); mlx5e_close_cq(&c->rq.cq); @@ -1020,6 +1095,9 @@ err_disable_napi: err_close_tx_cqs: mlx5e_close_tx_cqs(c); +err_close_icosq_cq: + mlx5e_close_cq(&c->icosq.cq); + err_napi_del: netif_napi_del(&c->napi); napi_hash_del(&c->napi); @@ -1032,9 +1110,11 @@ static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); mlx5e_close_sqs(c); + mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); + mlx5e_close_cq(&c->icosq.cq); netif_napi_del(&c->napi); napi_hash_del(&c->napi); @@ -1049,11 +1129,23 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); - MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + MLX5_SET(wq, wq, log_wqe_num_of_strides, + priv->params.mpwqe_log_num_strides - 9); + MLX5_SET(wq, wq, log_wqe_stride_size, + priv->params.mpwqe_log_stride_sz - 6); + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); + } + MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; @@ -1068,17 +1160,27 @@ static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); } -static void mlx5e_build_sq_param(struct mlx5e_priv *priv, - struct mlx5e_sq_param *param) +static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); - MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); +} + +static void mlx5e_build_sq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + param->max_inline = priv->params.tx_max_inline; } @@ -1094,8 +1196,22 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; + u8 log_cq_size; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + log_cq_size = priv->params.log_rq_size + + priv->params.mpwqe_log_num_strides; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + log_cq_size = priv->params.log_rq_size; + } + + MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); + if (priv->params.rx_cqe_compress) { + MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + } mlx5e_build_common_cq_param(priv, param); } @@ -1105,25 +1221,52 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); + MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); mlx5e_build_common_cq_param(priv, param); } -static void mlx5e_build_channel_param(struct mlx5e_priv *priv, - struct mlx5e_channel_param *cparam) +static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, + struct mlx5e_cq_param *param, + u8 log_wq_size) { - memset(cparam, 0, sizeof(*cparam)); + void *cqc = param->cqc; + + MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); + + mlx5e_build_common_cq_param(priv, param); +} + +static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param, + u8 log_wq_size) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + + MLX5_SET(wq, wq, log_wq_sz, log_wq_size); + MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); + + param->icosq = true; +} + +static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) +{ + u8 icosq_log_wq_sz = MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); + mlx5e_build_ico_cq_param(priv, &cparam->icosq_cq, icosq_log_wq_sz); } static int mlx5e_open_channels(struct mlx5e_priv *priv) { - struct mlx5e_channel_param cparam; + struct mlx5e_channel_param *cparam; int nch = priv->params.num_channels; int err = -ENOMEM; int i; @@ -1135,12 +1278,15 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv) priv->txq_to_sq_map = kcalloc(nch * priv->params.num_tc, sizeof(struct mlx5e_sq *), GFP_KERNEL); - if (!priv->channel || !priv->txq_to_sq_map) + cparam = kzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL); + + if (!priv->channel || !priv->txq_to_sq_map || !cparam) goto err_free_txq_to_sq_map; - mlx5e_build_channel_param(priv, &cparam); + mlx5e_build_channel_param(priv, cparam); + for (i = 0; i < nch; i++) { - err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]); + err = mlx5e_open_channel(priv, i, cparam, &priv->channel[i]); if (err) goto err_close_channels; } @@ -1151,6 +1297,7 @@ static int mlx5e_open_channels(struct mlx5e_priv *priv) goto err_close_channels; } + kfree(cparam); return 0; err_close_channels: @@ -1160,6 +1307,7 @@ err_close_channels: err_free_txq_to_sq_map: kfree(priv->txq_to_sq_map); kfree(priv->channel); + kfree(cparam); return err; } @@ -1199,48 +1347,36 @@ static void mlx5e_fill_indir_rqt_rqns(struct mlx5e_priv *priv, void *rqtc) for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) { int ix = i; + u32 rqn; if (priv->params.rss_hfunc == ETH_RSS_HASH_XOR) ix = mlx5e_bits_invert(i, MLX5E_LOG_INDIR_RQT_SIZE); ix = priv->params.indirection_rqt[ix]; - MLX5_SET(rqtc, rqtc, rq_num[i], - test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[ix]->rq.rqn : - priv->drop_rq.rqn); + rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[ix]->rq.rqn : + priv->drop_rq.rqn; + MLX5_SET(rqtc, rqtc, rq_num[i], rqn); } } -static void mlx5e_fill_rqt_rqns(struct mlx5e_priv *priv, void *rqtc, - enum mlx5e_rqt_ix rqt_ix) +static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, + int ix) { + u32 rqn = test_bit(MLX5E_STATE_OPENED, &priv->state) ? + priv->channel[ix]->rq.rqn : + priv->drop_rq.rqn; - switch (rqt_ix) { - case MLX5E_INDIRECTION_RQT: - mlx5e_fill_indir_rqt_rqns(priv, rqtc); - - break; - - default: /* MLX5E_SINGLE_RQ_RQT */ - MLX5_SET(rqtc, rqtc, rq_num[0], - test_bit(MLX5E_STATE_OPENED, &priv->state) ? - priv->channel[0]->rq.rqn : - priv->drop_rq.rqn); - - break; - } + MLX5_SET(rqtc, rqtc, rq_num[0], rqn); } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) { struct mlx5_core_dev *mdev = priv->mdev; - u32 *in; void *rqtc; int inlen; - int sz; int err; - - sz = (rqt_ix == MLX5E_SINGLE_RQ_RQT) ? 1 : MLX5E_INDIR_RQT_SIZE; + u32 *in; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); @@ -1252,26 +1388,73 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); - mlx5e_fill_rqt_rqns(priv, rqtc, rqt_ix); + if (sz > 1) /* RSS */ + mlx5e_fill_indir_rqt_rqns(priv, rqtc); + else + mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - err = mlx5_core_create_rqt(mdev, in, inlen, &priv->rqtn[rqt_ix]); + err = mlx5_core_create_rqt(mdev, in, inlen, rqtn); kvfree(in); + return err; +} + +static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn) +{ + mlx5_core_destroy_rqt(priv->mdev, rqtn); +} + +static int mlx5e_create_rqts(struct mlx5e_priv *priv) +{ + int nch = mlx5e_get_max_num_channels(priv->mdev); + u32 *rqtn; + int err; + int ix; + + /* Indirect RQT */ + rqtn = &priv->indir_rqtn; + err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn); + if (err) + return err; + + /* Direct RQTs */ + for (ix = 0; ix < nch; ix++) { + rqtn = &priv->direct_tir[ix].rqtn; + err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn); + if (err) + goto err_destroy_rqts; + } + + return 0; + +err_destroy_rqts: + for (ix--; ix >= 0; ix--) + mlx5e_destroy_rqt(priv, priv->direct_tir[ix].rqtn); + + mlx5e_destroy_rqt(priv, priv->indir_rqtn); return err; } -int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) +static void mlx5e_destroy_rqts(struct mlx5e_priv *priv) +{ + int nch = mlx5e_get_max_num_channels(priv->mdev); + int i; + + for (i = 0; i < nch; i++) + mlx5e_destroy_rqt(priv, priv->direct_tir[i].rqtn); + + mlx5e_destroy_rqt(priv, priv->indir_rqtn); +} + +int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) { struct mlx5_core_dev *mdev = priv->mdev; - u32 *in; void *rqtc; int inlen; - int sz; + u32 *in; int err; - sz = (rqt_ix == MLX5E_SINGLE_RQ_RQT) ? 1 : MLX5E_INDIR_RQT_SIZE; - inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); if (!in) @@ -1280,27 +1463,31 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); - - mlx5e_fill_rqt_rqns(priv, rqtc, rqt_ix); + if (sz > 1) /* RSS */ + mlx5e_fill_indir_rqt_rqns(priv, rqtc); + else + mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); MLX5_SET(modify_rqt_in, in, bitmask.rqn_list, 1); - err = mlx5_core_modify_rqt(mdev, priv->rqtn[rqt_ix], in, inlen); + err = mlx5_core_modify_rqt(mdev, rqtn, in, inlen); kvfree(in); return err; } -static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, enum mlx5e_rqt_ix rqt_ix) -{ - mlx5_core_destroy_rqt(priv->mdev, priv->rqtn[rqt_ix]); -} - static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) { - mlx5e_redirect_rqt(priv, MLX5E_INDIRECTION_RQT); - mlx5e_redirect_rqt(priv, MLX5E_SINGLE_RQ_RQT); + u32 rqtn; + int ix; + + rqtn = priv->indir_rqtn; + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + for (ix = 0; ix < priv->params.num_channels; ix++) { + rqtn = priv->direct_tir[ix].rqtn; + mlx5e_redirect_rqt(priv, rqtn, 1, ix); + } } static void mlx5e_build_tir_ctx_lro(void *tirc, struct mlx5e_priv *priv) @@ -1345,6 +1532,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) int inlen; int err; int tt; + int ix; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -1356,23 +1544,32 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) mlx5e_build_tir_ctx_lro(tirc, priv); - for (tt = 0; tt < MLX5E_NUM_TT; tt++) { - err = mlx5_core_modify_tir(mdev, priv->tirn[tt], in, inlen); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5_core_modify_tir(mdev, priv->indir_tirn[tt], in, + inlen); if (err) - break; + goto free_in; } + for (ix = 0; ix < mlx5e_get_max_num_channels(mdev); ix++) { + err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, + in, inlen); + if (err) + goto free_in; + } + +free_in: kvfree(in); return err; } -static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev, - u32 tirn) +static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) { void *in; int inlen; int err; + int i; inlen = MLX5_ST_SZ_BYTES(modify_tir_in); in = mlx5_vzalloc(inlen); @@ -1381,25 +1578,23 @@ static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev, MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); - err = mlx5_core_modify_tir(mdev, tirn, in, inlen); - - kvfree(in); - - return err; -} - -static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) -{ - int err; - int i; + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { + err = mlx5_core_modify_tir(priv->mdev, priv->indir_tirn[i], in, + inlen); + if (err) + return err; + } - for (i = 0; i < MLX5E_NUM_TT; i++) { - err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev, - priv->tirn[i]); + for (i = 0; i < priv->params.num_channels; i++) { + err = mlx5_core_modify_tir(priv->mdev, + priv->direct_tir[i].tirn, in, + inlen); if (err) return err; } + kvfree(in); + return 0; } @@ -1503,6 +1698,9 @@ int mlx5e_open_locked(struct net_device *netdev) mlx5e_redirect_rqts(priv); mlx5e_update_carrier(priv); mlx5e_timestamp_init(priv); +#ifdef CONFIG_RFS_ACCEL + priv->netdev->rx_cpu_rmap = priv->mdev->rmap; +#endif queue_delayed_work(priv->wq, &priv->update_stats_work, 0); @@ -1710,7 +1908,8 @@ static void mlx5e_destroy_tises(struct mlx5e_priv *priv) mlx5e_destroy_tis(priv, tc); } -static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) +static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, + enum mlx5e_traffic_types tt) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); @@ -1731,19 +1930,8 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) mlx5e_build_tir_ctx_lro(tirc, priv); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - - switch (tt) { - case MLX5E_TT_ANY: - MLX5_SET(tirc, tirc, indirect_table, - priv->rqtn[MLX5E_SINGLE_RQ_RQT]); - MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); - break; - default: - MLX5_SET(tirc, tirc, indirect_table, - priv->rqtn[MLX5E_INDIRECTION_RQT]); - mlx5e_build_tir_ctx_hash(tirc, priv); - break; - } + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqtn); + mlx5e_build_tir_ctx_hash(tirc, priv); switch (tt) { case MLX5E_TT_IPV4_TCP: @@ -1823,64 +2011,107 @@ static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, int tt) MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); break; + default: + WARN_ONCE(true, + "mlx5e_build_indir_tir_ctx: bad traffic type!\n"); } } -static int mlx5e_create_tir(struct mlx5e_priv *priv, int tt) +static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, + u32 rqtn) { - struct mlx5_core_dev *mdev = priv->mdev; - u32 *in; + MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + + mlx5e_build_tir_ctx_lro(tirc, priv); + + MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); + MLX5_SET(tirc, tirc, indirect_table, rqtn); + MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); +} + +static int mlx5e_create_tirs(struct mlx5e_priv *priv) +{ + int nch = mlx5e_get_max_num_channels(priv->mdev); void *tirc; int inlen; + u32 *tirn; int err; + u32 *in; + int ix; + int tt; inlen = MLX5_ST_SZ_BYTES(create_tir_in); in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; - tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + /* indirect tirs */ + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + memset(in, 0, inlen); + tirn = &priv->indir_tirn[tt]; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_indir_tir_ctx(priv, tirc, tt); + err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + if (err) + goto err_destroy_tirs; + } + + /* direct tirs */ + for (ix = 0; ix < nch; ix++) { + memset(in, 0, inlen); + tirn = &priv->direct_tir[ix].tirn; + tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); + mlx5e_build_direct_tir_ctx(priv, tirc, + priv->direct_tir[ix].rqtn); + err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + if (err) + goto err_destroy_ch_tirs; + } - mlx5e_build_tir_ctx(priv, tirc, tt); + kvfree(in); - err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]); + return 0; + +err_destroy_ch_tirs: + for (ix--; ix >= 0; ix--) + mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[ix].tirn); + +err_destroy_tirs: + for (tt--; tt >= 0; tt--) + mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[tt]); kvfree(in); return err; } -static void mlx5e_destroy_tir(struct mlx5e_priv *priv, int tt) +static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) { - mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]); + int nch = mlx5e_get_max_num_channels(priv->mdev); + int i; + + for (i = 0; i < nch; i++) + mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[i].tirn); + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[i]); } -static int mlx5e_create_tirs(struct mlx5e_priv *priv) +int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) { - int err; + int err = 0; int i; - for (i = 0; i < MLX5E_NUM_TT; i++) { - err = mlx5e_create_tir(priv, i); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + return 0; + + for (i = 0; i < priv->params.num_channels; i++) { + err = mlx5e_modify_rq_vsd(&priv->channel[i]->rq, vsd); if (err) - goto err_destroy_tirs; + return err; } return 0; - -err_destroy_tirs: - for (i--; i >= 0; i--) - mlx5e_destroy_tir(priv, i); - - return err; -} - -static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) -{ - int i; - - for (i = 0; i < MLX5E_NUM_TT; i++) - mlx5e_destroy_tir(priv, i); } static int mlx5e_setup_tc(struct net_device *netdev, u8 tc) @@ -1923,6 +2154,8 @@ static int mlx5e_ndo_setup_tc(struct net_device *dev, u32 handle, return mlx5e_configure_flower(priv, proto, tc->cls_flower); case TC_CLSFLOWER_DESTROY: return mlx5e_delete_flower(priv, tc->cls_flower); + case TC_CLSFLOWER_STATS: + return mlx5e_stats_flower(priv, tc->cls_flower); } default: return -EOPNOTSUPP; @@ -1939,19 +2172,37 @@ static struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_sw_stats *sstats = &priv->stats.sw; struct mlx5e_vport_stats *vstats = &priv->stats.vport; - - stats->rx_packets = vstats->rx_packets; - stats->rx_bytes = vstats->rx_bytes; - stats->tx_packets = vstats->tx_packets; - stats->tx_bytes = vstats->tx_bytes; - stats->multicast = vstats->rx_multicast_packets + - vstats->tx_multicast_packets; - stats->tx_errors = vstats->tx_error_packets; - stats->rx_errors = vstats->rx_error_packets; - stats->tx_dropped = vstats->tx_queue_dropped; - stats->rx_crc_errors = 0; - stats->rx_length_errors = 0; + struct mlx5e_pport_stats *pstats = &priv->stats.pport; + + stats->rx_packets = sstats->rx_packets; + stats->rx_bytes = sstats->rx_bytes; + stats->tx_packets = sstats->tx_packets; + stats->tx_bytes = sstats->tx_bytes; + + stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer; + stats->tx_dropped = sstats->tx_queue_dropped; + + stats->rx_length_errors = + PPORT_802_3_GET(pstats, a_in_range_length_errors) + + PPORT_802_3_GET(pstats, a_out_of_range_length_field) + + PPORT_802_3_GET(pstats, a_frame_too_long_errors); + stats->rx_crc_errors = + PPORT_802_3_GET(pstats, a_frame_check_sequence_errors); + stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors); + stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards); + stats->tx_carrier_errors = + PPORT_802_3_GET(pstats, a_symbol_error_during_carrier); + stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + + stats->rx_frame_errors; + stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; + + /* vport multicast also counts packets that are dropped due to steering + * or rx out of buffer + */ + stats->multicast = + VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); return stats; } @@ -1980,49 +2231,153 @@ static int mlx5e_set_mac(struct net_device *netdev, void *addr) return 0; } -static int mlx5e_set_features(struct net_device *netdev, - netdev_features_t features) +#define MLX5E_SET_FEATURE(netdev, feature, enable) \ + do { \ + if (enable) \ + netdev->features |= feature; \ + else \ + netdev->features &= ~feature; \ + } while (0) + +typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable); + +static int set_feature_lro(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); - int err = 0; - netdev_features_t changes = features ^ netdev->features; + bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + int err; mutex_lock(&priv->state_lock); - if (changes & NETIF_F_LRO) { - bool was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); - - if (was_opened) - mlx5e_close_locked(priv->netdev); - - priv->params.lro_en = !!(features & NETIF_F_LRO); - err = mlx5e_modify_tirs_lro(priv); - if (err) - mlx5_core_warn(priv->mdev, "lro modify failed, %d\n", - err); + if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) + mlx5e_close_locked(priv->netdev); - if (was_opened) - err = mlx5e_open_locked(priv->netdev); + priv->params.lro_en = enable; + err = mlx5e_modify_tirs_lro(priv); + if (err) { + netdev_err(netdev, "lro modify failed, %d\n", err); + priv->params.lro_en = !enable; } + if (was_opened && (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST)) + mlx5e_open_locked(priv->netdev); + mutex_unlock(&priv->state_lock); - if (changes & NETIF_F_HW_VLAN_CTAG_FILTER) { - if (features & NETIF_F_HW_VLAN_CTAG_FILTER) - mlx5e_enable_vlan_filter(priv); - else - mlx5e_disable_vlan_filter(priv); - } + return err; +} + +static int set_feature_vlan_filter(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (enable) + mlx5e_enable_vlan_filter(priv); + else + mlx5e_disable_vlan_filter(priv); + + return 0; +} - if ((changes & NETIF_F_HW_TC) && !(features & NETIF_F_HW_TC) && - mlx5e_tc_num_filters(priv)) { +static int set_feature_tc_num_filters(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + if (!enable && mlx5e_tc_num_filters(priv)) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; } + return 0; +} + +static int set_feature_rx_all(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_set_port_fcs(mdev, !enable); +} + +static int set_feature_rx_vlan(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + priv->params.vlan_strip_disable = !enable; + err = mlx5e_modify_rqs_vsd(priv, !enable); + if (err) + priv->params.vlan_strip_disable = enable; + + mutex_unlock(&priv->state_lock); + + return err; +} + +#ifdef CONFIG_RFS_ACCEL +static int set_feature_arfs(struct net_device *netdev, bool enable) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + if (enable) + err = mlx5e_arfs_enable(priv); + else + err = mlx5e_arfs_disable(priv); + return err; } +#endif + +static int mlx5e_handle_feature(struct net_device *netdev, + netdev_features_t wanted_features, + netdev_features_t feature, + mlx5e_feature_handler feature_handler) +{ + netdev_features_t changes = wanted_features ^ netdev->features; + bool enable = !!(wanted_features & feature); + int err; + + if (!(changes & feature)) + return 0; + + err = feature_handler(netdev, enable); + if (err) { + netdev_err(netdev, "%s feature 0x%llx failed err %d\n", + enable ? "Enable" : "Disable", feature, err); + return err; + } + + MLX5E_SET_FEATURE(netdev, feature, enable); + return 0; +} + +static int mlx5e_set_features(struct net_device *netdev, + netdev_features_t features) +{ + int err; + + err = mlx5e_handle_feature(netdev, features, NETIF_F_LRO, + set_feature_lro); + err |= mlx5e_handle_feature(netdev, features, + NETIF_F_HW_VLAN_CTAG_FILTER, + set_feature_vlan_filter); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_TC, + set_feature_tc_num_filters); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_RXALL, + set_feature_rx_all); + err |= mlx5e_handle_feature(netdev, features, NETIF_F_HW_VLAN_CTAG_RX, + set_feature_rx_vlan); +#ifdef CONFIG_RFS_ACCEL + err |= mlx5e_handle_feature(netdev, features, NETIF_F_NTUPLE, + set_feature_arfs); +#endif + + return err ? -EINVAL : 0; +} #define MXL5_HW_MIN_MTU 64 #define MXL5E_MIN_MTU (MXL5_HW_MIN_MTU + ETH_FCS_LEN) @@ -2093,6 +2448,21 @@ static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) vlan, qos); } +static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting); +} + +static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + + return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting); +} static int mlx5_vport_link2ifla(u8 esw_link) { switch (esw_link) { @@ -2149,7 +2519,6 @@ static int mlx5e_get_vf_stats(struct net_device *dev, vf_stats); } -#if IS_ENABLED(CONFIG_MLX5_CORE_EN_VXLAN) static void mlx5e_add_vxlan_port(struct net_device *netdev, sa_family_t sa_family, __be16 port) { @@ -2221,7 +2590,6 @@ static netdev_features_t mlx5e_features_check(struct sk_buff *skb, return features; } -#endif static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, @@ -2237,6 +2605,9 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx5e_rx_flow_steer, +#endif }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -2253,13 +2624,16 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, -#ifdef CONFIG_MLX5_CORE_EN_VXLAN .ndo_add_vxlan_port = mlx5e_add_vxlan_port, .ndo_del_vxlan_port = mlx5e_del_vxlan_port, .ndo_features_check = mlx5e_features_check, +#ifdef CONFIG_RFS_ACCEL + .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_set_vf_mac = mlx5e_set_vf_mac, .ndo_set_vf_vlan = mlx5e_set_vf_vlan, + .ndo_set_vf_spoofchk = mlx5e_set_vf_spoofchk, + .ndo_set_vf_trust = mlx5e_set_vf_trust, .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, @@ -2317,25 +2691,121 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv) } #endif -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, + u32 *indirection_rqt, int len, int num_channels) { + int node = mdev->priv.numa_node; + int node_num_of_cores; int i; + if (node == -1) + node = first_online_node; + + node_num_of_cores = cpumask_weight(cpumask_of_node(node)); + + if (node_num_of_cores) + num_channels = min_t(int, num_channels, node_num_of_cores); + for (i = 0; i < len; i++) indirection_rqt[i] = i % num_channels; } +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + +static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) +{ + enum pcie_link_width width; + enum pci_bus_speed speed; + int err = 0; + + err = pcie_get_minimum_link(mdev->pdev, &speed, &width); + if (err) + return err; + + if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) + return -EINVAL; + + switch (speed) { + case PCIE_SPEED_2_5GT: + *pci_bw = 2500 * width; + break; + case PCIE_SPEED_5_0GT: + *pci_bw = 5000 * width; + break; + case PCIE_SPEED_8_0GT: + *pci_bw = 8000 * width; + break; + default: + return -EINVAL; + } + + return 0; +} + +static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) +{ + return (link_speed && pci_bw && + (pci_bw < 40000) && (pci_bw < link_speed)); +} + static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, int num_channels) { struct mlx5e_priv *priv = netdev_priv(netdev); + u32 link_speed = 0; + u32 pci_bw = 0; priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.log_rq_size = - MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + + /* set CQE compression */ + priv->params.rx_cqe_compress_admin = false; + if (MLX5_CAP_GEN(mdev, cqe_compression) && + MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5e_get_max_linkspeed(mdev, &link_speed); + mlx5e_get_pci_bw(mdev, &pci_bw); + mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + priv->params.rx_cqe_compress_admin = + cqe_compress_heuristic(link_speed, pci_bw); + } + + priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin; + + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.mpwqe_log_stride_sz = + priv->params.rx_cqe_compress ? + MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : + MLX5_MPWRQ_LOG_STRIDE_SIZE; + priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + priv->params.mpwqe_log_stride_sz; + priv->params.lro_en = true; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + + mlx5_core_info(mdev, + "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(priv->params.log_rq_size), + BIT(priv->params.mpwqe_log_stride_sz), + priv->params.rx_cqe_compress_admin); + + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); priv->params.rx_cq_moderation_usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; priv->params.rx_cq_moderation_pkts = @@ -2345,15 +2815,13 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.tx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); - priv->params.min_rx_wqes = - MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; priv->params.num_tc = 1; priv->params.rss_hfunc = ETH_RSS_HASH_XOR; netdev_rss_key_fill(priv->params.toeplitz_hash_key, sizeof(priv->params.toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(priv->params.indirection_rqt, + mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, num_channels); priv->params.lro_wqe_sz = @@ -2390,6 +2858,8 @@ static void mlx5e_build_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + bool fcs_supported; + bool fcs_enabled; SET_NETDEV_DEV(netdev, &mdev->pdev->dev); @@ -2424,25 +2894,41 @@ static void mlx5e_build_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; if (mlx5e_vxlan_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; netdev->hw_enc_features |= NETIF_F_IP_CSUM; - netdev->hw_enc_features |= NETIF_F_RXCSUM; + netdev->hw_enc_features |= NETIF_F_IPV6_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; netdev->hw_enc_features |= NETIF_F_TSO6; - netdev->hw_enc_features |= NETIF_F_RXHASH; netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; } + mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled); + + if (fcs_supported) + netdev->hw_features |= NETIF_F_RXALL; + netdev->features = netdev->hw_features; if (!priv->params.lro_en) netdev->features &= ~NETIF_F_LRO; + if (fcs_enabled) + netdev->features &= ~NETIF_F_RXALL; + #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f) if (FT_CAP(flow_modify_en) && FT_CAP(modify_root) && FT_CAP(identified_miss_table_mode) && - FT_CAP(flow_table_modify)) - priv->netdev->hw_features |= NETIF_F_HW_TC; + FT_CAP(flow_table_modify)) { + netdev->hw_features |= NETIF_F_HW_TC; +#ifdef CONFIG_RFS_ACCEL + netdev->hw_features |= NETIF_F_NTUPLE; +#endif + } netdev->features |= NETIF_F_HIGHDMA; @@ -2476,6 +2962,61 @@ static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, return err; } +static void mlx5e_create_q_counter(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + + err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter); + if (err) { + mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err); + priv->q_counter = 0; + } +} + +static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv) +{ + if (!priv->q_counter) + return; + + mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); +} + +static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_create_mkey_mbox_in *in; + struct mlx5_mkey_seg *mkc; + int inlen = sizeof(*in); + u64 npages = + mlx5e_get_max_num_channels(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + mkc = &in->seg; + mkc->status = MLX5_MKEY_STATUS_FREE; + mkc->flags = MLX5_PERM_UMR_EN | + MLX5_PERM_LOCAL_READ | + MLX5_PERM_LOCAL_WRITE | + MLX5_ACCESS_MODE_MTT; + + mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + mkc->flags_pd = cpu_to_be32(priv->pdn); + mkc->len = cpu_to_be64(npages << PAGE_SHIFT); + mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages)); + mkc->log2_page_size = PAGE_SHIFT; + + err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL, + NULL, NULL); + + kvfree(in); + + return err; +} + static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) { struct net_device *netdev; @@ -2529,10 +3070,16 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_dealloc_transport_domain; } + err = mlx5e_create_umr_mkey(priv); + if (err) { + mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); + goto err_destroy_mkey; + } + err = mlx5e_create_tises(priv); if (err) { mlx5_core_warn(mdev, "create tises failed, %d\n", err); - goto err_destroy_mkey; + goto err_destroy_umr_mkey; } err = mlx5e_open_drop_rq(priv); @@ -2541,37 +3088,33 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_destroy_tises; } - err = mlx5e_create_rqt(priv, MLX5E_INDIRECTION_RQT); + err = mlx5e_create_rqts(priv); if (err) { - mlx5_core_warn(mdev, "create rqt(INDIR) failed, %d\n", err); + mlx5_core_warn(mdev, "create rqts failed, %d\n", err); goto err_close_drop_rq; } - err = mlx5e_create_rqt(priv, MLX5E_SINGLE_RQ_RQT); - if (err) { - mlx5_core_warn(mdev, "create rqt(SINGLE) failed, %d\n", err); - goto err_destroy_rqt_indir; - } - err = mlx5e_create_tirs(priv); if (err) { mlx5_core_warn(mdev, "create tirs failed, %d\n", err); - goto err_destroy_rqt_single; + goto err_destroy_rqts; } - err = mlx5e_create_flow_tables(priv); + err = mlx5e_create_flow_steering(priv); if (err) { - mlx5_core_warn(mdev, "create flow tables failed, %d\n", err); + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); goto err_destroy_tirs; } - mlx5e_init_eth_addr(priv); + mlx5e_create_q_counter(priv); + + mlx5e_init_l2_addr(priv); mlx5e_vxlan_init(priv); err = mlx5e_tc_init(priv); if (err) - goto err_destroy_flow_tables; + goto err_dealloc_q_counters; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); @@ -2583,8 +3126,11 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_tc_cleanup; } - if (mlx5e_vxlan_allowed(mdev)) + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); vxlan_get_rx_port(netdev); + rtnl_unlock(); + } mlx5e_enable_async_events(priv); queue_work(priv->wq, &priv->set_rx_mode_work); @@ -2594,17 +3140,15 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) err_tc_cleanup: mlx5e_tc_cleanup(priv); -err_destroy_flow_tables: - mlx5e_destroy_flow_tables(priv); +err_dealloc_q_counters: + mlx5e_destroy_q_counter(priv); + mlx5e_destroy_flow_steering(priv); err_destroy_tirs: mlx5e_destroy_tirs(priv); -err_destroy_rqt_single: - mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); - -err_destroy_rqt_indir: - mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); +err_destroy_rqts: + mlx5e_destroy_rqts(priv); err_close_drop_rq: mlx5e_close_drop_rq(priv); @@ -2612,6 +3156,9 @@ err_close_drop_rq: err_destroy_tises: mlx5e_destroy_tises(priv); +err_destroy_umr_mkey: + mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); + err_destroy_mkey: mlx5_core_destroy_mkey(mdev, &priv->mkey); @@ -2655,12 +3202,13 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) mlx5e_tc_cleanup(priv); mlx5e_vxlan_cleanup(priv); - mlx5e_destroy_flow_tables(priv); + mlx5e_destroy_q_counter(priv); + mlx5e_destroy_flow_steering(priv); mlx5e_destroy_tirs(priv); - mlx5e_destroy_rqt(priv, MLX5E_SINGLE_RQ_RQT); - mlx5e_destroy_rqt(priv, MLX5E_INDIRECTION_RQT); + mlx5e_destroy_rqts(priv); mlx5e_close_drop_rq(priv); mlx5e_destroy_tises(priv); + mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 58d4e2f962c3..f3456798c596 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -42,13 +42,149 @@ static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; } -static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, u16 ix) +static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, + void *data) +{ + u32 ci = cqcc & cq->wq.sz_m1; + + memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64)); +} + +static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_read_cqe_slot(cq, cqcc, &cq->title); + cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt); + cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter); + rq->stats.cqe_compress_blks++; +} + +static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr); + cq->mini_arr_idx = 0; +} + +static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) +{ + u8 op_own = (cqcc >> cq->wq.log_sz) & 1; + u32 wq_sz = 1 << cq->wq.log_sz; + u32 ci = cqcc & cq->wq.sz_m1; + u32 ci_top = min_t(u32, wq_sz, ci + n); + + for (; ci < ci_top; ci++, n--) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + + cqe->op_own = op_own; + } + + if (unlikely(ci == wq_sz)) { + op_own = !op_own; + for (ci = 0; ci < n; ci++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + + cqe->op_own = op_own; + } + } +} + +static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + u16 wqe_cnt_step; + + cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; + cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; + cq->title.op_own &= 0xf0; + cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz); + cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); + + wqe_cnt_step = + rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + mpwrq_get_cqe_consumed_strides(&cq->title) : 1; + cq->decmprs_wqe_counter = + (cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1; +} + +static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_decompress_cqe(rq, cq, cqcc); + cq->title.rss_hash_type = 0; + cq->title.rss_hash_result = 0; +} + +static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, + int update_owner_only, + int budget_rem) +{ + u32 cqcc = cq->wq.cc + update_owner_only; + u32 cqe_count; + u32 i; + + cqe_count = min_t(u32, cq->decmprs_left, budget_rem); + + for (i = update_owner_only; i < cqe_count; + i++, cq->mini_arr_idx++, cqcc++) { + if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) + mlx5e_read_mini_arr_slot(cq, cqcc); + + mlx5e_decompress_cqe_no_hash(rq, cq, cqcc); + rq->handle_rx_cqe(rq, &cq->title); + } + mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc); + cq->wq.cc = cqcc; + cq->decmprs_left -= cqe_count; + rq->stats.cqe_compress_pkts += cqe_count; + + return cqe_count; +} + +static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, + int budget_rem) +{ + mlx5e_read_title_slot(rq, cq, cq->wq.cc); + mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1); + mlx5e_decompress_cqe(rq, cq, cq->wq.cc); + rq->handle_rx_cqe(rq, &cq->title); + cq->mini_arr_idx++; + + return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; +} + +void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val) +{ + bool was_opened; + + if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) + return; + + mutex_lock(&priv->state_lock); + + if (priv->params.rx_cqe_compress == val) + goto unlock; + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(priv->netdev); + + priv->params.rx_cqe_compress = val; + + if (was_opened) + mlx5e_open_locked(priv->netdev); + +unlock: + mutex_unlock(&priv->state_lock); +} + +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { struct sk_buff *skb; dma_addr_t dma_addr; - skb = netdev_alloc_skb(rq->netdev, rq->wqe_sz); + skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); if (unlikely(!skb)) return -ENOMEM; @@ -62,10 +198,9 @@ static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) goto err_free_skb; - skb_reserve(skb, MLX5E_NET_IP_ALIGN); - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr + MLX5E_NET_IP_ALIGN); + wqe->data.addr = cpu_to_be64(dma_addr); + wqe->data.lkey = rq->mkey_be; rq->skb[ix] = skb; @@ -77,18 +212,389 @@ err_free_skb: return -ENOMEM; } +static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) +{ + return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; +} + +static inline void +mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len) +{ + dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset, + len, DMA_FROM_DEVICE); +} + +static inline void +mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev, + struct mlx5e_mpw_info *wi, + u32 wqe_offset, u32 len) +{ + /* No dma pre sync for fragmented MPWQE */ +} + +static inline void +mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) +{ + unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); + + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + &wi->dma_info.page[page_idx], frag_offset, + len, truesize); +} + +static inline void +mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) +{ + unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); + + dma_sync_single_for_cpu(rq->pdev, + wi->umr.dma_info[page_idx].addr + frag_offset, + len, DMA_FROM_DEVICE); + wi->skbs_frags[page_idx]++; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + wi->umr.dma_info[page_idx].page, frag_offset, + len, truesize); +} + +static inline void +mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) +{ + struct page *page = &wi->dma_info.page[page_idx]; + + skb_copy_to_linear_data(skb, page_address(page) + offset, + ALIGN(headlen, sizeof(long))); +} + +static inline void +mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) +{ + u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx]; + unsigned int len; + + /* Aligning len to sizeof(long) optimizes memcpy performance */ + len = ALIGN(headlen_pg, sizeof(long)); + dma_sync_single_for_cpu(pdev, dma_info->addr + offset, len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data_offset(skb, 0, + page_address(dma_info->page) + offset, + len); + if (unlikely(offset + headlen > PAGE_SIZE)) { + dma_info++; + headlen_pg = len; + len = ALIGN(headlen - headlen_pg, sizeof(long)); + dma_sync_single_for_cpu(pdev, dma_info->addr, len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data_offset(skb, headlen_pg, + page_address(dma_info->page), + len); + } +} + +static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix) +{ + return rq_ix * MLX5_CHANNEL_MAX_NUM_MTTS + + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); +} + +static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, + struct mlx5e_sq *sq, + struct mlx5e_umr_wqe *wqe, + u16 ix) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_data_seg *dseg = &wqe->data; + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); + u16 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix); + + memset(wqe, 0, sizeof(*wqe)); + cseg->opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->imm = rq->umr_mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; + ucseg->klm_octowords = + cpu_to_be16(mlx5e_get_mtt_octw(MLX5_MPWRQ_PAGES_PER_WQE)); + ucseg->bsf_octowords = + cpu_to_be16(mlx5e_get_mtt_octw(umr_wqe_mtt_offset)); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + + dseg->lkey = sq->mkey_be; + dseg->addr = cpu_to_be64(wi->umr.mtt_addr); +} + +static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) +{ + struct mlx5e_sq *sq = &rq->channel->icosq; + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_umr_wqe *wqe; + u8 num_wqebbs = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_BB); + u16 pi; + + /* fill sq edge with nops to avoid wqe wrap around */ + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; + sq->ico_wqe_info[pi].num_wqebbs = 1; + mlx5e_send_nop(sq, true); + } + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + mlx5e_build_umr_wqe(rq, sq, wqe, ix); + sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; + sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + sq->pc += num_wqebbs; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + +static inline int mlx5e_get_wqe_mtt_sz(void) +{ + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the mtt array, we allocate + * a little more. + */ + return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), + MLX5_UMR_MTT_ALIGNMENT); +} + +static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi, + int i) +{ + struct page *page; + + page = dev_alloc_page(); + if (unlikely(!page)) + return -ENOMEM; + + wi->umr.dma_info[i].page = page; + wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) { + put_page(page); + return -ENOMEM; + } + wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR); + + return 0; +} + +static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + u32 dma_offset = mlx5e_get_wqe_mtt_offset(rq->ix, ix) << PAGE_SHIFT; + int i; + + wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * + MLX5_MPWRQ_PAGES_PER_WQE, + GFP_ATOMIC); + if (unlikely(!wi->umr.dma_info)) + goto err_out; + + /* We allocate more than mtt_sz as we will align the pointer */ + wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1, + GFP_ATOMIC); + if (unlikely(!wi->umr.mtt_no_align)) + goto err_free_umr; + + wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN); + wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz, + PCI_DMA_TODEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr))) + goto err_free_mtt; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i))) + goto err_unmap; + atomic_add(mlx5e_mpwqe_strides_per_page(rq), + &wi->umr.dma_info[i].page->_count); + wi->skbs_frags[i] = 0; + } + + wi->consumed_strides = 0; + wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe; + wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe; + wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe; + wi->free_wqe = mlx5e_free_rx_fragmented_mpwqe; + wqe->data.lkey = rq->umr_mkey_be; + wqe->data.addr = cpu_to_be64(dma_offset); + + return 0; + +err_unmap: + while (--i >= 0) { + dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + atomic_sub(mlx5e_mpwqe_strides_per_page(rq), + &wi->umr.dma_info[i].page->_count); + put_page(wi->umr.dma_info[i].page); + } + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); + +err_free_mtt: + kfree(wi->umr.mtt_no_align); + +err_free_umr: + kfree(wi->umr.dma_info); + +err_out: + return -ENOMEM; +} + +void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi) +{ + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int i; + + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, + PCI_DMA_FROMDEVICE); + atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i], + &wi->umr.dma_info[i].page->_count); + put_page(wi->umr.dma_info[i].page); + } + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); + kfree(wi->umr.mtt_no_align); + kfree(wi->umr.dma_info); +} + +void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + + clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); + rq->stats.mpwqe_frag++; + + /* ensure wqes are visible to device before updating doorbell record */ + dma_wmb(); + + mlx5_wq_ll_update_db_record(wq); +} + +static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) +{ + struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + gfp_t gfp_mask; + int i; + + gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC; + wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, + MLX5_MPWRQ_WQE_PAGE_ORDER); + if (unlikely(!wi->dma_info.page)) + return -ENOMEM; + + wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0, + rq->wqe_sz, PCI_DMA_FROMDEVICE); + if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) { + put_page(wi->dma_info.page); + return -ENOMEM; + } + + /* We split the high-order page into order-0 ones and manage their + * reference counter to minimize the memory held by small skb fragments + */ + split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + atomic_add(mlx5e_mpwqe_strides_per_page(rq), + &wi->dma_info.page[i]._count); + wi->skbs_frags[i] = 0; + } + + wi->consumed_strides = 0; + wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe; + wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe; + wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe; + wi->free_wqe = mlx5e_free_rx_linear_mpwqe; + wqe->data.lkey = rq->mkey_be; + wqe->data.addr = cpu_to_be64(wi->dma_info.addr); + + return 0; +} + +void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_mpw_info *wi) +{ + int i; + + dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, + PCI_DMA_FROMDEVICE); + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { + atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i], + &wi->dma_info.page[i]._count); + put_page(&wi->dma_info.page[i]); + } +} + +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + int err; + + err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix); + if (unlikely(err)) { + err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix); + if (unlikely(err)) + return err; + set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5e_post_umr_wqe(rq, ix); + return -EBUSY; + } + + return 0; +} + +#define RQ_CANNOT_POST(rq) \ + (!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state) || \ + test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; - if (unlikely(!test_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state))) + if (unlikely(RQ_CANNOT_POST(rq))) return false; while (!mlx5_wq_ll_is_full(wq)) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); + int err; - if (unlikely(mlx5e_alloc_rx_wqe(rq, wqe, wq->head))) + err = rq->alloc_wqe(rq, wqe, wq->head); + if (unlikely(err)) { + if (err != -EBUSY) + rq->stats.buff_alloc_err++; break; + } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); } @@ -101,7 +607,8 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !mlx5_wq_ll_is_full(wq); } -static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) +static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, + u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct iphdr *ipv4 = (struct iphdr *)(skb->data + ETH_HLEN); @@ -112,7 +619,7 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe) int tcp_ack = ((CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA == l4_hdr_type) || (CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA == l4_hdr_type)); - u16 tot_len = be32_to_cpu(cqe->byte_cnt) - ETH_HLEN; + u16 tot_len = cqe_bcnt - ETH_HLEN; if (eth->h_proto == htons(ETH_P_IP)) { tcp = (struct tcphdr *)(skb->data + ETH_HLEN + @@ -176,35 +683,43 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (lro) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (likely(is_first_ethertype_ip(skb))) { + return; + } + + if (is_first_ethertype_ip(skb)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); rq->stats.csum_sw++; - } else { - goto csum_none; + return; } - return; - + if (likely((cqe->hds_ip_ext & CQE_L3_OK) && + (cqe->hds_ip_ext & CQE_L4_OK))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (cqe_is_tunneled(cqe)) { + skb->csum_level = 1; + skb->encapsulation = 1; + rq->stats.csum_inner++; + } + return; + } csum_none: skb->ip_summed = CHECKSUM_NONE; rq->stats.csum_none++; } static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, struct mlx5e_rq *rq, struct sk_buff *skb) { struct net_device *netdev = rq->netdev; - u32 cqe_bcnt = be32_to_cpu(cqe->byte_cnt); struct mlx5e_tstamp *tstamp = rq->tstamp; int lro_num_seg; - skb_put(skb, cqe_bcnt); - lro_num_seg = be32_to_cpu(cqe->srqn) >> 24; if (lro_num_seg > 1) { - mlx5e_lro_update_hdr(skb, cqe); + mlx5e_lro_update_hdr(skb, cqe, cqe_bcnt); skb_shinfo(skb)->gso_size = DIV_ROUND_UP(cqe_bcnt, lro_num_seg); rq->stats.lro_packets++; rq->stats.lro_bytes += cqe_bcnt; @@ -213,10 +728,6 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (unlikely(mlx5e_rx_hw_stamp(tstamp))) mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb)); - mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); - - skb->protocol = eth_type_trans(skb, netdev); - skb_record_rx_queue(skb, rq->ix); if (likely(netdev->features & NETIF_F_RXHASH)) @@ -227,52 +738,165 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, be16_to_cpu(cqe->vlan_info)); skb->mark = be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK; + + mlx5e_handle_csum(netdev, cqe, rq, skb, !!lro_num_seg); + skb->protocol = eth_type_trans(skb, netdev); +} + +static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + rq->stats.packets++; + rq->stats.bytes += cqe_bcnt; + mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + napi_gro_receive(rq->cq.napi, skb); +} + +void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct mlx5e_rx_wqe *wqe; + struct sk_buff *skb; + __be16 wqe_counter_be; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + skb = rq->skb[wqe_counter]; + prefetch(skb->data); + rq->skb[wqe_counter] = NULL; + + dma_unmap_single(rq->pdev, + *((dma_addr_t *)skb->cb), + rq->wqe_sz, + DMA_FROM_DEVICE); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + dev_kfree_skb(skb); + goto wq_ll_pop; + } + + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + skb_put(skb, cqe_bcnt); + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + struct mlx5e_mpw_info *wi, + u32 cqe_bcnt, + struct sk_buff *skb) +{ + u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz); + u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); + u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; + u32 head_offset = wqe_offset & (PAGE_SIZE - 1); + u32 page_idx = wqe_offset >> PAGE_SHIFT; + u32 head_page_idx = page_idx; + u16 headlen = min_t(u16, MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, cqe_bcnt); + u32 frag_offset = head_offset + headlen; + u16 byte_cnt = cqe_bcnt - headlen; + + if (unlikely(frag_offset >= PAGE_SIZE)) { + page_idx++; + frag_offset -= PAGE_SIZE; + } + wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes); + + while (byte_cnt) { + u32 pg_consumed_bytes = + min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); + + wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset, + pg_consumed_bytes); + byte_cnt -= pg_consumed_bytes; + frag_offset = 0; + page_idx++; + } + /* copy header */ + wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset, + headlen); + /* skb linear part was allocated with headlen and aligned to long */ + skb->tail += headlen; + skb->len += headlen; +} + +void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); + u16 wqe_id = be16_to_cpu(cqe->wqe_id); + struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); + struct sk_buff *skb; + u16 cqe_bcnt; + + wi->consumed_strides += cstrides; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + goto mpwrq_cqe_out; + } + + if (unlikely(mpwrq_is_filler_cqe(cqe))) { + rq->stats.mpwqe_filler++; + goto mpwrq_cqe_out; + } + + skb = napi_alloc_skb(rq->cq.napi, + ALIGN(MLX5_MPWRQ_SMALL_PACKET_THRESHOLD, + sizeof(long))); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + goto mpwrq_cqe_out; + } + + prefetch(skb->data); + cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); + + mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + +mpwrq_cqe_out: + if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) + return; + + wi->free_wqe(rq, wi); + mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); } int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - int work_done; + int work_done = 0; + + if (cq->decmprs_left) + work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); - for (work_done = 0; work_done < budget; work_done++) { - struct mlx5e_rx_wqe *wqe; - struct mlx5_cqe64 *cqe; - struct sk_buff *skb; - __be16 wqe_counter_be; - u16 wqe_counter; + for (; work_done < budget; work_done++) { + struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq); - cqe = mlx5e_get_cqe(cq); if (!cqe) break; - mlx5_cqwq_pop(&cq->wq); - - wqe_counter_be = cqe->wqe_counter; - wqe_counter = be16_to_cpu(wqe_counter_be); - wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; - - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { - rq->stats.wqe_err++; - dev_kfree_skb(skb); - goto wq_ll_pop; + if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { + work_done += + mlx5e_decompress_cqes_start(rq, cq, + budget - work_done); + continue; } - mlx5e_build_rx_skb(cqe, rq, skb); - rq->stats.packets++; - rq->stats.bytes += be32_to_cpu(cqe->byte_cnt); - napi_gro_receive(cq->napi, skb); + mlx5_cqwq_pop(&cq->wq); -wq_ll_pop: - mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, - &wqe->next.next_wqe_index); + rq->handle_rx_cqe(rq, cqe); } mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h new file mode 100644 index 000000000000..83bc32b25849 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -0,0 +1,367 @@ +/* + * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __MLX5_EN_STATS_H__ +#define __MLX5_EN_STATS_H__ + +#define MLX5E_READ_CTR64_CPU(ptr, dsc, i) \ + (*(u64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR64_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be64 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \ + (*(u32 *)((char *)ptr + dsc[i].offset)) +#define MLX5E_READ_CTR32_BE(ptr, dsc, i) \ + be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) + +#define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) + +struct counter_desc { + char name[ETH_GSTRING_LEN]; + int offset; /* Byte offset */ +}; + +struct mlx5e_sw_stats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 lro_packets; + u64 lro_bytes; + u64 rx_csum_good; + u64 rx_csum_none; + u64 rx_csum_sw; + u64 rx_csum_inner; + u64 tx_csum_offload; + u64 tx_csum_inner; + u64 tx_queue_stopped; + u64 tx_queue_wake; + u64 tx_queue_dropped; + u64 rx_wqe_err; + u64 rx_mpwqe_filler; + u64 rx_mpwqe_frag; + u64 rx_buff_alloc_err; + u64 rx_cqe_compress_blks; + u64 rx_cqe_compress_pkts; + + /* Special handling counters */ + u64 link_down_events; +}; + +static const struct counter_desc sw_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) }, +}; + +struct mlx5e_qcounter_stats { + u32 rx_out_of_buffer; +}; + +static const struct counter_desc q_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_qcounter_stats, rx_out_of_buffer) }, +}; + +#define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c) +#define VPORT_COUNTER_GET(vstats, c) MLX5_GET64(query_vport_counter_out, \ + vstats->query_vport_out, c) + +struct mlx5e_vport_stats { + __be64 query_vport_out[MLX5_ST_SZ_QW(query_vport_counter_out)]; +}; + +static const struct counter_desc vport_stats_desc[] = { + { "rx_vport_error_packets", + VPORT_COUNTER_OFF(received_errors.packets) }, + { "rx_vport_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) }, + { "tx_vport_error_packets", + VPORT_COUNTER_OFF(transmit_errors.packets) }, + { "tx_vport_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) }, + { "rx_vport_unicast_packets", + VPORT_COUNTER_OFF(received_eth_unicast.packets) }, + { "rx_vport_unicast_bytes", + VPORT_COUNTER_OFF(received_eth_unicast.octets) }, + { "tx_vport_unicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_unicast.packets) }, + { "tx_vport_unicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_unicast.octets) }, + { "rx_vport_multicast_packets", + VPORT_COUNTER_OFF(received_eth_multicast.packets) }, + { "rx_vport_multicast_bytes", + VPORT_COUNTER_OFF(received_eth_multicast.octets) }, + { "tx_vport_multicast_packets", + VPORT_COUNTER_OFF(transmitted_eth_multicast.packets) }, + { "tx_vport_multicast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_multicast.octets) }, + { "rx_vport_broadcast_packets", + VPORT_COUNTER_OFF(received_eth_broadcast.packets) }, + { "rx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(received_eth_broadcast.octets) }, + { "tx_vport_broadcast_packets", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.packets) }, + { "tx_vport_broadcast_bytes", + VPORT_COUNTER_OFF(transmitted_eth_broadcast.octets) }, +}; + +#define PPORT_802_3_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_802_3_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->IEEE_802_3_counters, \ + counter_set.eth_802_3_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2863_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2863_counters, \ + counter_set.eth_2863_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_2819_GET(pstats, c) \ + MLX5_GET64(ppcnt_reg, pstats->RFC_2819_counters, \ + counter_set.eth_2819_cntrs_grp_data_layout.c##_high) +#define PPORT_PER_PRIO_OFF(c) \ + MLX5_BYTE_OFF(ppcnt_reg, \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +#define PPORT_PER_PRIO_GET(pstats, prio, c) \ + MLX5_GET64(ppcnt_reg, pstats->per_prio_counters[prio], \ + counter_set.eth_per_prio_grp_data_layout.c##_high) +#define NUM_PPORT_PRIO 8 + +struct mlx5e_pport_stats { + __be64 IEEE_802_3_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2863_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 RFC_2819_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 per_prio_counters[NUM_PPORT_PRIO][MLX5_ST_SZ_QW(ppcnt_reg)]; + __be64 phy_counters[MLX5_ST_SZ_QW(ppcnt_reg)]; +}; + +static const struct counter_desc pport_802_3_stats_desc[] = { + { "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) }, + { "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "alignment_err", PPORT_802_3_OFF(a_alignment_errors) }, + { "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "octets_received", PPORT_802_3_OFF(a_octets_received_ok) }, + { "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "unsupported_op_rx", + PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "pause_ctrl_tx", + PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, +}; + +static const struct counter_desc pport_2863_stats_desc[] = { + { "in_octets", PPORT_2863_OFF(if_in_octets) }, + { "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) }, + { "in_discards", PPORT_2863_OFF(if_in_discards) }, + { "in_errors", PPORT_2863_OFF(if_in_errors) }, + { "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) }, + { "out_octets", PPORT_2863_OFF(if_out_octets) }, + { "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) }, + { "out_discards", PPORT_2863_OFF(if_out_discards) }, + { "out_errors", PPORT_2863_OFF(if_out_errors) }, + { "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) }, + { "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) }, + { "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) }, + { "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) }, +}; + +static const struct counter_desc pport_2819_stats_desc[] = { + { "drop_events", PPORT_2819_OFF(ether_stats_drop_events) }, + { "octets", PPORT_2819_OFF(ether_stats_octets) }, + { "pkts", PPORT_2819_OFF(ether_stats_pkts) }, + { "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) }, + { "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) }, + { "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) }, + { "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) }, + { "fragments", PPORT_2819_OFF(ether_stats_fragments) }, + { "jabbers", PPORT_2819_OFF(ether_stats_jabbers) }, + { "collisions", PPORT_2819_OFF(ether_stats_collisions) }, + { "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "p1024to1518octets", + PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "p1519to2047octets", + PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "p2048to4095octets", + PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "p4096to8191octets", + PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "p8192to10239octets", + PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, +}; + +static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { + { "rx_octets", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_frames", PPORT_PER_PRIO_OFF(rx_frames) }, + { "tx_octets", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_frames", PPORT_PER_PRIO_OFF(tx_frames) }, +}; + +static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { + { "rx_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, +}; + +struct mlx5e_rq_stats { + u64 packets; + u64 bytes; + u64 csum_sw; + u64 csum_inner; + u64 csum_none; + u64 lro_packets; + u64 lro_bytes; + u64 wqe_err; + u64 mpwqe_filler; + u64 mpwqe_frag; + u64 buff_alloc_err; + u64 cqe_compress_blks; + u64 cqe_compress_pkts; +}; + +static const struct counter_desc rq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, +}; + +struct mlx5e_sq_stats { + /* commonly accessed in data path */ + u64 packets; + u64 bytes; + u64 tso_packets; + u64 tso_bytes; + u64 tso_inner_packets; + u64 tso_inner_bytes; + u64 csum_offload_inner; + u64 nop; + /* less likely accessed in data path */ + u64 csum_offload_none; + u64 stopped; + u64 wake; + u64 dropped; +}; + +static const struct counter_desc sq_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) }, +}; + +#define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) +#define NUM_Q_COUNTERS ARRAY_SIZE(q_stats_desc) +#define NUM_VPORT_COUNTERS ARRAY_SIZE(vport_stats_desc) +#define NUM_PPORT_802_3_COUNTERS ARRAY_SIZE(pport_802_3_stats_desc) +#define NUM_PPORT_2863_COUNTERS ARRAY_SIZE(pport_2863_stats_desc) +#define NUM_PPORT_2819_COUNTERS ARRAY_SIZE(pport_2819_stats_desc) +#define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \ + ARRAY_SIZE(pport_per_prio_traffic_stats_desc) +#define NUM_PPORT_PER_PRIO_PFC_COUNTERS \ + ARRAY_SIZE(pport_per_prio_pfc_stats_desc) +#define NUM_PPORT_COUNTERS (NUM_PPORT_802_3_COUNTERS + \ + NUM_PPORT_2863_COUNTERS + \ + NUM_PPORT_2819_COUNTERS + \ + NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \ + NUM_PPORT_PRIO) +#define NUM_RQ_STATS ARRAY_SIZE(rq_stats_desc) +#define NUM_SQ_STATS ARRAY_SIZE(sq_stats_desc) + +struct mlx5e_stats { + struct mlx5e_sw_stats sw; + struct mlx5e_qcounter_stats qcnt; + struct mlx5e_vport_stats vport; + struct mlx5e_pport_stats pport; +}; + +#endif /* __MLX5_EN_STATS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index b3de09f13425..704c3d30493e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -46,43 +46,65 @@ struct mlx5e_tc_flow { struct mlx5_flow_rule *rule; }; -#define MLX5E_TC_FLOW_TABLE_NUM_ENTRIES 1024 -#define MLX5E_TC_FLOW_TABLE_NUM_GROUPS 4 +#define MLX5E_TC_TABLE_NUM_ENTRIES 1024 +#define MLX5E_TC_TABLE_NUM_GROUPS 4 static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, u32 *match_c, u32 *match_v, u32 action, u32 flow_tag) { - struct mlx5_flow_destination dest = { - .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, - {.ft = priv->fts.vlan.t}, - }; + struct mlx5_core_dev *dev = priv->mdev; + struct mlx5_flow_destination dest = { 0 }; + struct mlx5_fc *counter = NULL; struct mlx5_flow_rule *rule; bool table_created = false; - if (IS_ERR_OR_NULL(priv->fts.tc.t)) { - priv->fts.tc.t = - mlx5_create_auto_grouped_flow_table(priv->fts.ns, 0, - MLX5E_TC_FLOW_TABLE_NUM_ENTRIES, - MLX5E_TC_FLOW_TABLE_NUM_GROUPS); - if (IS_ERR(priv->fts.tc.t)) { + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = priv->fs.vlan.ft.t; + } else { + counter = mlx5_fc_create(dev, true); + if (IS_ERR(counter)) + return ERR_CAST(counter); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; + dest.counter = counter; + } + + if (IS_ERR_OR_NULL(priv->fs.tc.t)) { + priv->fs.tc.t = + mlx5_create_auto_grouped_flow_table(priv->fs.ns, + MLX5E_TC_PRIO, + MLX5E_TC_TABLE_NUM_ENTRIES, + MLX5E_TC_TABLE_NUM_GROUPS, + 0); + if (IS_ERR(priv->fs.tc.t)) { netdev_err(priv->netdev, "Failed to create tc offload table\n"); - return ERR_CAST(priv->fts.tc.t); + rule = ERR_CAST(priv->fs.tc.t); + goto err_create_ft; } table_created = true; } - rule = mlx5_add_flow_rule(priv->fts.tc.t, MLX5_MATCH_OUTER_HEADERS, + rule = mlx5_add_flow_rule(priv->fs.tc.t, MLX5_MATCH_OUTER_HEADERS, match_c, match_v, action, flow_tag, - action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST ? &dest : NULL); + &dest); + + if (IS_ERR(rule)) + goto err_add_rule; + + return rule; - if (IS_ERR(rule) && table_created) { - mlx5_destroy_flow_table(priv->fts.tc.t); - priv->fts.tc.t = NULL; +err_add_rule: + if (table_created) { + mlx5_destroy_flow_table(priv->fs.tc.t); + priv->fs.tc.t = NULL; } +err_create_ft: + mlx5_fc_destroy(dev, counter); return rule; } @@ -90,11 +112,17 @@ static struct mlx5_flow_rule *mlx5e_tc_add_flow(struct mlx5e_priv *priv, static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5_flow_rule *rule) { + struct mlx5_fc *counter = NULL; + + counter = mlx5_flow_rule_counter(rule); + mlx5_del_flow_rule(rule); + mlx5_fc_destroy(priv->mdev, counter); + if (!mlx5e_tc_num_filters(priv)) { - mlx5_destroy_flow_table(priv->fts.tc.t); - priv->fts.tc.t = NULL; + mlx5_destroy_flow_table(priv->fs.tc.t); + priv->fs.tc.t = NULL; } } @@ -284,6 +312,9 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (is_tcf_gact_shot(a)) { *action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + if (MLX5_CAP_FLOWTABLE(priv->mdev, + flow_table_properties_nic_receive.flow_counter)) + *action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -310,7 +341,7 @@ static int parse_tc_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, struct tc_cls_flower_offload *f) { - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; u32 *match_c; u32 *match_v; int err = 0; @@ -376,7 +407,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f) { struct mlx5e_tc_flow *flow; - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); @@ -392,6 +423,34 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, return 0; } +int mlx5e_stats_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f) +{ + struct mlx5e_tc_table *tc = &priv->fs.tc; + struct mlx5e_tc_flow *flow; + struct tc_action *a; + struct mlx5_fc *counter; + u64 bytes; + u64 packets; + u64 lastuse; + + flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, + tc->ht_params); + if (!flow) + return -EINVAL; + + counter = mlx5_flow_rule_counter(flow->rule); + if (!counter) + return 0; + + mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); + + tc_for_each_action(a, f->exts) + tcf_action_stats_update(a, bytes, packets, lastuse); + + return 0; +} + static const struct rhashtable_params mlx5e_tc_flow_ht_params = { .head_offset = offsetof(struct mlx5e_tc_flow, node), .key_offset = offsetof(struct mlx5e_tc_flow, cookie), @@ -401,7 +460,7 @@ static const struct rhashtable_params mlx5e_tc_flow_ht_params = { int mlx5e_tc_init(struct mlx5e_priv *priv) { - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; tc->ht_params = mlx5e_tc_flow_ht_params; return rhashtable_init(&tc->ht, &tc->ht_params); @@ -418,12 +477,12 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) void mlx5e_tc_cleanup(struct mlx5e_priv *priv) { - struct mlx5e_tc_flow_table *tc = &priv->fts.tc; + struct mlx5e_tc_table *tc = &priv->fs.tc; rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, priv); - if (!IS_ERR_OR_NULL(priv->fts.tc.t)) { - mlx5_destroy_flow_table(priv->fts.tc.t); - priv->fts.tc.t = NULL; + if (!IS_ERR_OR_NULL(tc->t)) { + mlx5_destroy_flow_table(tc->t); + tc->t = NULL; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index d677428dc10f..34bf903fc886 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -43,9 +43,12 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f); +int mlx5e_stats_flower(struct mlx5e_priv *priv, + struct tc_cls_flower_offload *f); + static inline int mlx5e_tc_num_filters(struct mlx5e_priv *priv) { - return atomic_read(&priv->fts.tc.ht.nelems); + return atomic_read(&priv->fs.tc.ht.nelems); } #endif /* __MLX5_EN_TC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 1ffc7cb6f78c..229ab16fb8d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -54,10 +54,11 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) sq->skb[pi] = NULL; sq->pc++; + sq->stats.nop++; if (notify_hw) { cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe, 0); + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } } @@ -309,7 +310,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) bf_sz = wi->num_wqebbs << 3; cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - mlx5e_tx_notify_hw(sq, wqe, bf_sz); + mlx5e_tx_notify_hw(sq, &wqe->ctrl, bf_sz); } /* fill sq edge with nops to avoid wqe wrap around */ @@ -387,7 +388,6 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) wi = &sq->wqe_info[ci]; if (unlikely(!skb)) { /* nop */ - sq->stats.nop++; sqcc++; continue; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9bb4395aceeb..c38781fa567d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -49,6 +49,60 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) return cqe; } +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) +{ + struct mlx5_wq_cyc *wq; + struct mlx5_cqe64 *cqe; + struct mlx5e_sq *sq; + u16 sqcc; + + cqe = mlx5e_get_cqe(cq); + if (likely(!cqe)) + return; + + sq = container_of(cq, struct mlx5e_sq, cq); + wq = &sq->wq; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + do { + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + + mlx5_cqwq_pop(&cq->wq); + sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + break; + } + + switch (icowi->opcode) { + case MLX5_OPCODE_NOP: + break; + case MLX5_OPCODE_UMR: + mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq); + break; + default: + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); + } + + } while ((cqe = mlx5e_get_cqe(cq))); + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -64,6 +118,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + + mlx5e_poll_ico_cq(&c->icosq.cq); + busy |= mlx5e_post_rx_wqes(&c->rq); if (busy) @@ -80,6 +137,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); mlx5e_cq_arm(&c->rq.cq); + mlx5e_cq_arm(&c->icosq.cq); return work_done; } @@ -89,7 +147,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); - barrier(); napi_schedule(cq->napi); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index bc3d9f8a75c1..b84a6918a700 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -77,16 +77,20 @@ struct vport_addr { u8 action; u32 vport; struct mlx5_flow_rule *flow_rule; /* SRIOV only */ + /* A flag indicating that mac was added due to mc promiscuous vport */ + bool mc_promisc; }; enum { UC_ADDR_CHANGE = BIT(0), MC_ADDR_CHANGE = BIT(1), + PROMISC_CHANGE = BIT(3), }; /* Vport context events */ #define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \ - MC_ADDR_CHANGE) + MC_ADDR_CHANGE | \ + PROMISC_CHANGE) static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) @@ -116,6 +120,9 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, if (events_mask & MC_ADDR_CHANGE) MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_mc_address_change, 1); + if (events_mask & PROMISC_CHANGE) + MLX5_SET(nic_vport_context, nic_vport_ctx, + event_on_promisc_change, 1); err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) @@ -323,30 +330,45 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index) /* E-Switch FDB */ static struct mlx5_flow_rule * -esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, + u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN]) { - int match_header = MLX5_MATCH_OUTER_HEADERS; - struct mlx5_flow_destination dest; + int match_header = (is_zero_ether_addr(mac_c) ? 0 : + MLX5_MATCH_OUTER_HEADERS); struct mlx5_flow_rule *flow_rule = NULL; + struct mlx5_flow_destination dest; + void *mv_misc = NULL; + void *mc_misc = NULL; + u8 *dmac_v = NULL; + u8 *dmac_c = NULL; u32 *match_v; u32 *match_c; - u8 *dmac_v; - u8 *dmac_c; + if (rx_rule) + match_header |= MLX5_MATCH_MISC_PARAMETERS; match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); if (!match_v || !match_c) { pr_warn("FDB: Failed to alloc match parameters\n"); goto out; } + dmac_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers.dmac_47_16); dmac_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers.dmac_47_16); - ether_addr_copy(dmac_v, mac); - /* Match criteria mask */ - memset(dmac_c, 0xff, 6); + if (match_header & MLX5_MATCH_OUTER_HEADERS) { + ether_addr_copy(dmac_v, mac_v); + ether_addr_copy(dmac_c, mac_c); + } + + if (match_header & MLX5_MATCH_MISC_PARAMETERS) { + mv_misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + mc_misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + MLX5_SET(fte_match_set_misc, mv_misc, source_port, UPLINK_VPORT); + MLX5_SET_TO_ONES(fte_match_set_misc, mc_misc, source_port); + } dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport_num = vport; @@ -373,6 +395,39 @@ out: return flow_rule; } +static struct mlx5_flow_rule * +esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport) +{ + u8 mac_c[ETH_ALEN]; + + eth_broadcast_addr(mac_c); + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac); +} + +static struct mlx5_flow_rule * +esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + mac_c[0] = 0x01; + mac_v[0] = 0x01; + return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v); +} + +static struct mlx5_flow_rule * +esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) +{ + u8 mac_c[ETH_ALEN]; + u8 mac_v[ETH_ALEN]; + + eth_zero_addr(mac_c); + eth_zero_addr(mac_v); + return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); +} + static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); @@ -401,34 +456,80 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) memset(flow_group_in, 0, inlen); table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); - fdb = mlx5_create_flow_table(root_ns, 0, table_size); + fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); if (IS_ERR_OR_NULL(fdb)) { err = PTR_ERR(fdb); esw_warn(dev, "Failed to create FDB Table err %d\n", err); goto out; } + esw->fdb_table.fdb = fdb; + /* Addresses group : Full match unicast/multicast addresses */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + /* Preserve 2 entries for allmulti and promisc rules*/ + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3); eth_broadcast_addr(dmac); - g = mlx5_create_flow_group(fdb, flow_group_in); if (IS_ERR_OR_NULL(g)) { err = PTR_ERR(g); esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; } - esw->fdb_table.addr_grp = g; - esw->fdb_table.fdb = fdb; + + /* Allmulti group : One rule that forwards any mcast traffic */ + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 2); + eth_zero_addr(dmac); + dmac[0] = 0x01; + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.allmulti_grp = g; + + /* Promiscuous group : + * One rule that forward all unmatched traffic from previous groups + */ + eth_zero_addr(dmac); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1); + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); + goto out; + } + esw->fdb_table.promisc_grp = g; + out: + if (err) { + if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); + esw->fdb_table.allmulti_grp = NULL; + } + if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + esw->fdb_table.addr_grp = NULL; + } + if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) { + mlx5_destroy_flow_table(esw->fdb_table.fdb); + esw->fdb_table.fdb = NULL; + } + } + kfree(flow_group_in); - if (err && !IS_ERR_OR_NULL(fdb)) - mlx5_destroy_flow_table(fdb); return err; } @@ -438,10 +539,14 @@ static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) return; esw_debug(esw->dev, "Destroy FDB Table\n"); + mlx5_destroy_flow_group(esw->fdb_table.promisc_grp); + mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); mlx5_destroy_flow_group(esw->fdb_table.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.fdb); esw->fdb_table.fdb = NULL; esw->fdb_table.addr_grp = NULL; + esw->fdb_table.allmulti_grp = NULL; + esw->fdb_table.promisc_grp = NULL; } /* E-Switch vport UC/MC lists management */ @@ -511,6 +616,52 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) return 0; } +static void update_allmulti_vports(struct mlx5_eswitch *esw, + struct vport_addr *vaddr, + struct esw_mc_addr *esw_mc) +{ + u8 *mac = vaddr->node.addr; + u32 vport_idx = 0; + + for (vport_idx = 0; vport_idx < esw->total_vports; vport_idx++) { + struct mlx5_vport *vport = &esw->vports[vport_idx]; + struct hlist_head *vport_hash = vport->mc_list; + struct vport_addr *iter_vaddr = + l2addr_hash_find(vport_hash, + mac, + struct vport_addr); + if (IS_ERR_OR_NULL(vport->allmulti_rule) || + vaddr->vport == vport_idx) + continue; + switch (vaddr->action) { + case MLX5_ACTION_ADD: + if (iter_vaddr) + continue; + iter_vaddr = l2addr_hash_add(vport_hash, mac, + struct vport_addr, + GFP_KERNEL); + if (!iter_vaddr) { + esw_warn(esw->dev, + "ALL-MULTI: Failed to add MAC(%pM) to vport[%d] DB\n", + mac, vport_idx); + continue; + } + iter_vaddr->vport = vport_idx; + iter_vaddr->flow_rule = + esw_fdb_set_vport_rule(esw, + mac, + vport_idx); + break; + case MLX5_ACTION_DEL: + if (!iter_vaddr) + continue; + mlx5_del_flow_rule(iter_vaddr->flow_rule); + l2addr_hash_del(iter_vaddr); + break; + } + } +} + static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) { struct hlist_head *hash = esw->mc_table; @@ -531,8 +682,17 @@ static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) esw_mc->uplink_rule = /* Forward MC MAC to Uplink */ esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT); + + /* Add this multicast mac to all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + add: - esw_mc->refcnt++; + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't increment the multicast ref count + */ + if (!vaddr->mc_promisc) + esw_mc->refcnt++; + /* Forward MC MAC to vport */ vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, @@ -568,9 +728,15 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) mlx5_del_flow_rule(vaddr->flow_rule); vaddr->flow_rule = NULL; - if (--esw_mc->refcnt) + /* If the multicast mac is added as a result of mc promiscuous vport, + * don't decrement the multicast ref count. + */ + if (vaddr->mc_promisc || (--esw_mc->refcnt > 0)) return 0; + /* Remove this multicast mac from all the mc promiscuous vports */ + update_allmulti_vports(esw, vaddr, esw_mc); + if (esw_mc->uplink_rule) mlx5_del_flow_rule(esw_mc->uplink_rule); @@ -643,10 +809,13 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, addr->action = MLX5_ACTION_DEL; } + if (!vport->enabled) + goto out; + err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type, mac_list, &size); if (err) - return; + goto out; esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n", vport_num, is_uc ? "UC" : "MC", size); @@ -660,6 +829,24 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr); if (addr) { addr->action = MLX5_ACTION_NONE; + /* If this mac was previously added because of allmulti + * promiscuous rx mode, its now converted to be original + * vport mac. + */ + if (addr->mc_promisc) { + struct esw_mc_addr *esw_mc = + l2addr_hash_find(esw->mc_table, + mac_list[i], + struct esw_mc_addr); + if (!esw_mc) { + esw_warn(esw->dev, + "Failed to MAC(%pM) in mcast DB\n", + mac_list[i]); + continue; + } + esw_mc->refcnt++; + addr->mc_promisc = false; + } continue; } @@ -674,13 +861,121 @@ static void esw_update_vport_addr_list(struct mlx5_eswitch *esw, addr->vport = vport_num; addr->action = MLX5_ACTION_ADD; } +out: kfree(mac_list); } -static void esw_vport_change_handler(struct work_struct *work) +/* Sync vport UC/MC list from vport context + * Must be called after esw_update_vport_addr_list + */ +static void esw_update_vport_mc_promisc(struct mlx5_eswitch *esw, u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + struct l2addr_node *node; + struct vport_addr *addr; + struct hlist_head *hash; + struct hlist_node *tmp; + int hi; + + hash = vport->mc_list; + + for_each_l2hash_node(node, tmp, esw->mc_table, hi) { + u8 *mac = node->addr; + + addr = l2addr_hash_find(hash, mac, struct vport_addr); + if (addr) { + if (addr->action == MLX5_ACTION_DEL) + addr->action = MLX5_ACTION_NONE; + continue; + } + addr = l2addr_hash_add(hash, mac, struct vport_addr, + GFP_KERNEL); + if (!addr) { + esw_warn(esw->dev, + "Failed to add allmulti MAC(%pM) to vport[%d] DB\n", + mac, vport_num); + continue; + } + addr->vport = vport_num; + addr->action = MLX5_ACTION_ADD; + addr->mc_promisc = true; + } +} + +/* Apply vport rx mode to HW FDB table */ +static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num, + bool promisc, bool mc_promisc) +{ + struct esw_mc_addr *allmulti_addr = esw->mc_promisc; + struct mlx5_vport *vport = &esw->vports[vport_num]; + + if (IS_ERR_OR_NULL(vport->allmulti_rule) != mc_promisc) + goto promisc; + + if (mc_promisc) { + vport->allmulti_rule = + esw_fdb_set_vport_allmulti_rule(esw, vport_num); + if (!allmulti_addr->uplink_rule) + allmulti_addr->uplink_rule = + esw_fdb_set_vport_allmulti_rule(esw, + UPLINK_VPORT); + allmulti_addr->refcnt++; + } else if (vport->allmulti_rule) { + mlx5_del_flow_rule(vport->allmulti_rule); + vport->allmulti_rule = NULL; + + if (--allmulti_addr->refcnt > 0) + goto promisc; + + if (allmulti_addr->uplink_rule) + mlx5_del_flow_rule(allmulti_addr->uplink_rule); + allmulti_addr->uplink_rule = NULL; + } + +promisc: + if (IS_ERR_OR_NULL(vport->promisc_rule) != promisc) + return; + + if (promisc) { + vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw, + vport_num); + } else if (vport->promisc_rule) { + mlx5_del_flow_rule(vport->promisc_rule); + vport->promisc_rule = NULL; + } +} + +/* Sync vport rx mode from vport context */ +static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) +{ + struct mlx5_vport *vport = &esw->vports[vport_num]; + int promisc_all = 0; + int promisc_uc = 0; + int promisc_mc = 0; + int err; + + err = mlx5_query_nic_vport_promisc(esw->dev, + vport_num, + &promisc_uc, + &promisc_mc, + &promisc_all); + if (err) + return; + esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", + vport_num, promisc_all, promisc_mc); + + if (!vport->trusted || !vport->enabled) { + promisc_uc = 0; + promisc_mc = 0; + promisc_all = 0; + } + + esw_apply_vport_rx_mode(esw, vport_num, promisc_all, + (promisc_all || promisc_mc)); +} + +static void esw_vport_change_handle_locked(struct mlx5_vport *vport) { - struct mlx5_vport *vport = - container_of(work, struct mlx5_vport, vport_change_handler); struct mlx5_core_dev *dev = vport->dev; struct mlx5_eswitch *esw = dev->priv.eswitch; u8 mac[ETH_ALEN]; @@ -699,6 +994,15 @@ static void esw_vport_change_handler(struct work_struct *work) if (vport->enabled_events & MC_ADDR_CHANGE) { esw_update_vport_addr_list(esw, vport->vport, MLX5_NVPRT_LIST_TYPE_MC); + } + + if (vport->enabled_events & PROMISC_CHANGE) { + esw_update_vport_rx_mode(esw, vport->vport); + if (!IS_ERR_OR_NULL(vport->allmulti_rule)) + esw_update_vport_mc_promisc(esw, vport->vport); + } + + if (vport->enabled_events & (PROMISC_CHANGE | MC_ADDR_CHANGE)) { esw_apply_vport_addr_list(esw, vport->vport, MLX5_NVPRT_LIST_TYPE_MC); } @@ -709,15 +1013,477 @@ static void esw_vport_change_handler(struct work_struct *work) vport->enabled_events); } +static void esw_vport_change_handler(struct work_struct *work) +{ + struct mlx5_vport *vport = + container_of(work, struct mlx5_vport, vport_change_handler); + struct mlx5_eswitch *esw = vport->dev->priv.eswitch; + + mutex_lock(&esw->state_lock); + esw_vport_change_handle_locked(vport); + mutex_unlock(&esw->state_lock); +} + +static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *vlan_grp = NULL; + struct mlx5_flow_group *drop_grp = NULL; + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + void *match_criteria; + u32 *flow_group_in; + /* The egress acl table contains 2 rules: + * 1)Allow traffic with vlan_tag=vst_vlan_id + * 2)Drop all other traffic. + */ + int table_size = 2; + int err = 0; + + if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support) || + !IS_ERR_OR_NULL(vport->egress.acl)) + return; + + esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch egress flow namespace\n"); + return; + } + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return; + + acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); + if (IS_ERR_OR_NULL(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n", + vport->vport, err); + goto out; + } + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + vlan_grp = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR_OR_NULL(vlan_grp)) { + err = PTR_ERR(vlan_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n", + vport->vport, err); + goto out; + } + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + drop_grp = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR_OR_NULL(drop_grp)) { + err = PTR_ERR(drop_grp); + esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n", + vport->vport, err); + goto out; + } + + vport->egress.acl = acl; + vport->egress.drop_grp = drop_grp; + vport->egress.allowed_vlans_grp = vlan_grp; +out: + kfree(flow_group_in); + if (err && !IS_ERR_OR_NULL(vlan_grp)) + mlx5_destroy_flow_group(vlan_grp); + if (err && !IS_ERR_OR_NULL(acl)) + mlx5_destroy_flow_table(acl); +} + +static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) + mlx5_del_flow_rule(vport->egress.allowed_vlan); + + if (!IS_ERR_OR_NULL(vport->egress.drop_rule)) + mlx5_del_flow_rule(vport->egress.drop_rule); + + vport->egress.allowed_vlan = NULL; + vport->egress.drop_rule = NULL; +} + +static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->egress.acl)) + return; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport); + + esw_vport_cleanup_egress_rules(esw, vport); + mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp); + mlx5_destroy_flow_group(vport->egress.drop_grp); + mlx5_destroy_flow_table(vport->egress.acl); + vport->egress.allowed_vlans_grp = NULL; + vport->egress.drop_grp = NULL; + vport->egress.acl = NULL; +} + +static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *acl; + struct mlx5_flow_group *g; + void *match_criteria; + u32 *flow_group_in; + /* The ingress acl table contains 4 groups + * (2 active rules at the same time - + * 1 allow rule from one of the first 3 groups. + * 1 drop rule from the last group): + * 1)Allow untagged traffic with smac=original mac. + * 2)Allow untagged traffic. + * 3)Allow traffic with smac=original mac. + * 4)Drop all other traffic. + */ + int table_size = 4; + int err = 0; + + if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) || + !IS_ERR_OR_NULL(vport->ingress.acl)) + return; + + esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n", + vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size)); + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS); + if (!root_ns) { + esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n"); + return; + } + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return; + + acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport); + if (IS_ERR_OR_NULL(acl)) { + err = PTR_ERR(acl); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.acl = acl; + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.allow_untagged_spoofchk_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.vlan_tag); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.allow_untagged_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.allow_spoofchk_only_grp = g; + + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR_OR_NULL(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n", + vport->vport, err); + goto out; + } + vport->ingress.drop_grp = g; + +out: + if (err) { + if (!IS_ERR_OR_NULL(vport->ingress.allow_spoofchk_only_grp)) + mlx5_destroy_flow_group( + vport->ingress.allow_spoofchk_only_grp); + if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_only_grp)) + mlx5_destroy_flow_group( + vport->ingress.allow_untagged_only_grp); + if (!IS_ERR_OR_NULL(vport->ingress.allow_untagged_spoofchk_grp)) + mlx5_destroy_flow_group( + vport->ingress.allow_untagged_spoofchk_grp); + if (!IS_ERR_OR_NULL(vport->ingress.acl)) + mlx5_destroy_flow_table(vport->ingress.acl); + } + + kfree(flow_group_in); +} + +static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (!IS_ERR_OR_NULL(vport->ingress.drop_rule)) + mlx5_del_flow_rule(vport->ingress.drop_rule); + + if (!IS_ERR_OR_NULL(vport->ingress.allow_rule)) + mlx5_del_flow_rule(vport->ingress.allow_rule); + + vport->ingress.drop_rule = NULL; + vport->ingress.allow_rule = NULL; +} + +static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + if (IS_ERR_OR_NULL(vport->ingress.acl)) + return; + + esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport); + + esw_vport_cleanup_ingress_rules(esw, vport); + mlx5_destroy_flow_group(vport->ingress.allow_spoofchk_only_grp); + mlx5_destroy_flow_group(vport->ingress.allow_untagged_only_grp); + mlx5_destroy_flow_group(vport->ingress.allow_untagged_spoofchk_grp); + mlx5_destroy_flow_group(vport->ingress.drop_grp); + mlx5_destroy_flow_table(vport->ingress.acl); + vport->ingress.acl = NULL; + vport->ingress.drop_grp = NULL; + vport->ingress.allow_spoofchk_only_grp = NULL; + vport->ingress.allow_untagged_only_grp = NULL; + vport->ingress.allow_untagged_spoofchk_grp = NULL; +} + +static int esw_vport_ingress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u8 smac[ETH_ALEN]; + u32 *match_v; + u32 *match_c; + int err = 0; + u8 *smac_v; + + if (vport->spoofchk) { + err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac); + if (err) { + esw_warn(esw->dev, + "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n", + vport->vport, err); + return err; + } + + if (!is_valid_ether_addr(smac)) { + mlx5_core_warn(esw->dev, + "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", + vport->vport); + return -EPERM; + } + } + + esw_vport_cleanup_ingress_rules(esw, vport); + + if (!vport->vlan && !vport->qos && !vport->spoofchk) { + esw_vport_disable_ingress_acl(esw, vport); + return 0; + } + + esw_vport_enable_ingress_acl(esw, vport); + + esw_debug(esw->dev, + "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->vlan, vport->qos); + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + err = -ENOMEM; + esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n", + vport->vport, err); + goto out; + } + + if (vport->vlan || vport->qos) + MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag); + + if (vport->spoofchk) { + MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.smac_15_0); + smac_v = MLX5_ADDR_OF(fte_match_param, + match_v, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, smac); + } + + vport->ingress.allow_rule = + mlx5_add_flow_rule(vport->ingress.acl, + MLX5_MATCH_OUTER_HEADERS, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_ALLOW, + 0, NULL); + if (IS_ERR_OR_NULL(vport->ingress.allow_rule)) { + err = PTR_ERR(vport->ingress.allow_rule); + pr_warn("vport[%d] configure ingress allow rule, err(%d)\n", + vport->vport, err); + vport->ingress.allow_rule = NULL; + goto out; + } + + memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + vport->ingress.drop_rule = + mlx5_add_flow_rule(vport->ingress.acl, + 0, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_DROP, + 0, NULL); + if (IS_ERR_OR_NULL(vport->ingress.drop_rule)) { + err = PTR_ERR(vport->ingress.drop_rule); + pr_warn("vport[%d] configure ingress drop rule, err(%d)\n", + vport->vport, err); + vport->ingress.drop_rule = NULL; + goto out; + } + +out: + if (err) + esw_vport_cleanup_ingress_rules(esw, vport); + + kfree(match_v); + kfree(match_c); + return err; +} + +static int esw_vport_egress_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u32 *match_v; + u32 *match_c; + int err = 0; + + esw_vport_cleanup_egress_rules(esw, vport); + + if (!vport->vlan && !vport->qos) { + esw_vport_disable_egress_acl(esw, vport); + return 0; + } + + esw_vport_enable_egress_acl(esw, vport); + + esw_debug(esw->dev, + "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", + vport->vport, vport->vlan, vport->qos); + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + err = -ENOMEM; + esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n", + vport->vport, err); + goto out; + } + + /* Allowed vlan rule */ + MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_v, outer_headers.vlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.first_vid); + MLX5_SET(fte_match_param, match_v, outer_headers.first_vid, vport->vlan); + + vport->egress.allowed_vlan = + mlx5_add_flow_rule(vport->egress.acl, + MLX5_MATCH_OUTER_HEADERS, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_ALLOW, + 0, NULL); + if (IS_ERR_OR_NULL(vport->egress.allowed_vlan)) { + err = PTR_ERR(vport->egress.allowed_vlan); + pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n", + vport->vport, err); + vport->egress.allowed_vlan = NULL; + goto out; + } + + /* Drop others rule (star rule) */ + memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param)); + vport->egress.drop_rule = + mlx5_add_flow_rule(vport->egress.acl, + 0, + match_c, + match_v, + MLX5_FLOW_CONTEXT_ACTION_DROP, + 0, NULL); + if (IS_ERR_OR_NULL(vport->egress.drop_rule)) { + err = PTR_ERR(vport->egress.drop_rule); + pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n", + vport->vport, err); + vport->egress.drop_rule = NULL; + } +out: + kfree(match_v); + kfree(match_c); + return err; +} + static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, int enable_events) { struct mlx5_vport *vport = &esw->vports[vport_num]; - unsigned long flags; + mutex_lock(&esw->state_lock); WARN_ON(vport->enabled); esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + + if (vport_num) { /* Only VFs need ACLs for VST and spoofchk filtering */ + esw_vport_ingress_config(esw, vport); + esw_vport_egress_config(esw, vport); + } + mlx5_modify_vport_admin_state(esw->dev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, vport_num, @@ -725,53 +1491,32 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, /* Sync with current vport context */ vport->enabled_events = enable_events; - esw_vport_change_handler(&vport->vport_change_handler); + esw_vport_change_handle_locked(vport); - spin_lock_irqsave(&vport->lock, flags); vport->enabled = true; - spin_unlock_irqrestore(&vport->lock, flags); + + /* only PF is trusted by default */ + vport->trusted = (vport_num) ? false : true; arm_vport_context_events_cmd(esw->dev, vport_num, enable_events); esw->enabled_vports++; esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num); -} - -static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num) -{ - struct mlx5_vport *vport = &esw->vports[vport_num]; - struct l2addr_node *node; - struct vport_addr *addr; - struct hlist_node *tmp; - int hi; - - for_each_l2hash_node(node, tmp, vport->uc_list, hi) { - addr = container_of(node, struct vport_addr, node); - addr->action = MLX5_ACTION_DEL; - } - esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_UC); - - for_each_l2hash_node(node, tmp, vport->mc_list, hi) { - addr = container_of(node, struct vport_addr, node); - addr->action = MLX5_ACTION_DEL; - } - esw_apply_vport_addr_list(esw, vport_num, MLX5_NVPRT_LIST_TYPE_MC); + mutex_unlock(&esw->state_lock); } static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) { struct mlx5_vport *vport = &esw->vports[vport_num]; - unsigned long flags; if (!vport->enabled) return; esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num); /* Mark this vport as disabled to discard new events */ - spin_lock_irqsave(&vport->lock, flags); vport->enabled = false; - vport->enabled_events = 0; - spin_unlock_irqrestore(&vport->lock, flags); + + synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC)); mlx5_modify_vport_admin_state(esw->dev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, @@ -781,9 +1526,19 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) flush_workqueue(esw->work_queue); /* Disable events from this vport */ arm_vport_context_events_cmd(esw->dev, vport->vport, 0); - /* We don't assume VFs will cleanup after themselves */ - esw_cleanup_vport(esw, vport_num); + mutex_lock(&esw->state_lock); + /* We don't assume VFs will cleanup after themselves. + * Calling vport change handler while vport is disabled will cleanup + * the vport resources. + */ + esw_vport_change_handle_locked(vport); + vport->enabled_events = 0; + if (vport_num) { + esw_vport_disable_egress_acl(esw, vport); + esw_vport_disable_ingress_acl(esw, vport); + } esw->enabled_vports--; + mutex_unlock(&esw->state_lock); } /* Public E-Switch API */ @@ -802,6 +1557,12 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) return -ENOTSUPP; } + if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support)) + esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n"); + + if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) + esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); esw_disable_vport(esw, 0); @@ -824,6 +1585,7 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { + struct esw_mc_addr *mc_promisc; int i; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || @@ -833,9 +1595,14 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", esw->enabled_vports); + mc_promisc = esw->mc_promisc; + for (i = 0; i < esw->total_vports; i++) esw_disable_vport(esw, i); + if (mc_promisc && mc_promisc->uplink_rule) + mlx5_del_flow_rule(mc_promisc->uplink_rule); + esw_destroy_fdb_table(esw); /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ @@ -845,7 +1612,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); - int total_vports = 1 + pci_sriov_get_totalvfs(dev->pdev); + int total_vports = MLX5_TOTAL_VPORTS(dev); + struct esw_mc_addr *mc_promisc; struct mlx5_eswitch *esw; int vport_num; int err; @@ -874,6 +1642,13 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) } esw->l2_table.size = l2_table_size; + mc_promisc = kzalloc(sizeof(*mc_promisc), GFP_KERNEL); + if (!mc_promisc) { + err = -ENOMEM; + goto abort; + } + esw->mc_promisc = mc_promisc; + esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq"); if (!esw->work_queue) { err = -ENOMEM; @@ -887,6 +1662,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + mutex_init(&esw->state_lock); + for (vport_num = 0; vport_num < total_vports; vport_num++) { struct mlx5_vport *vport = &esw->vports[vport_num]; @@ -894,7 +1671,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) vport->dev = dev; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); - spin_lock_init(&vport->lock); } esw->total_vports = total_vports; @@ -925,6 +1701,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); + kfree(esw->mc_promisc); kfree(esw->vports); kfree(esw); } @@ -942,10 +1719,8 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) } vport = &esw->vports[vport_num]; - spin_lock(&vport->lock); if (vport->enabled) queue_work(esw->work_queue, &vport->vport_change_handler); - spin_unlock(&vport->lock); } /* Vport Administration */ @@ -957,12 +1732,22 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { int err = 0; + struct mlx5_vport *evport; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + evport = &esw->vports[vport]; + + if (evport->spoofchk && !is_valid_ether_addr(mac)) { + mlx5_core_warn(esw->dev, + "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", + vport); + return -EPERM; + } + err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); if (err) { mlx5_core_warn(esw->dev, @@ -971,6 +1756,11 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, return err; } + mutex_lock(&esw->state_lock); + if (evport->enabled) + err = esw_vport_ingress_config(esw, evport); + mutex_unlock(&esw->state_lock); + return err; } @@ -990,6 +1780,7 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi) { + struct mlx5_vport *evport; u16 vlan; u8 qos; @@ -998,6 +1789,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + evport = &esw->vports[vport]; + memset(ivi, 0, sizeof(*ivi)); ivi->vf = vport - 1; @@ -1008,7 +1801,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); ivi->vlan = vlan; ivi->qos = qos; - ivi->spoofchk = 0; + ivi->spoofchk = evport->spoofchk; return 0; } @@ -1016,6 +1809,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, int vport, u16 vlan, u8 qos) { + struct mlx5_vport *evport; + int err = 0; int set = 0; if (!ESW_ALLOWED(esw)) @@ -1026,7 +1821,72 @@ int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, if (vlan || qos) set = 1; - return modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + evport = &esw->vports[vport]; + + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + if (err) + return err; + + mutex_lock(&esw->state_lock); + evport->vlan = vlan; + evport->qos = qos; + if (evport->enabled) { + err = esw_vport_ingress_config(esw, evport); + if (err) + goto out; + err = esw_vport_egress_config(esw, evport); + } + +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + int vport, bool spoofchk) +{ + struct mlx5_vport *evport; + bool pschk; + int err = 0; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + evport = &esw->vports[vport]; + + mutex_lock(&esw->state_lock); + pschk = evport->spoofchk; + evport->spoofchk = spoofchk; + if (evport->enabled) + err = esw_vport_ingress_config(esw, evport); + if (err) + evport->spoofchk = pschk; + mutex_unlock(&esw->state_lock); + + return err; +} + +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + int vport, bool setting) +{ + struct mlx5_vport *evport; + + if (!ESW_ALLOWED(esw)) + return -EPERM; + if (!LEGAL_VPORT(esw, vport)) + return -EINVAL; + + evport = &esw->vports[vport]; + + mutex_lock(&esw->state_lock); + evport->trusted = setting; + if (evport->enabled) + esw_vport_change_handle_locked(evport); + mutex_unlock(&esw->state_lock); + + return 0; } int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 3416a428f70f..fd6800256d4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -88,18 +88,40 @@ struct l2addr_node { kfree(ptr); \ }) +struct vport_ingress { + struct mlx5_flow_table *acl; + struct mlx5_flow_group *allow_untagged_spoofchk_grp; + struct mlx5_flow_group *allow_spoofchk_only_grp; + struct mlx5_flow_group *allow_untagged_only_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_rule *allow_rule; + struct mlx5_flow_rule *drop_rule; +}; + +struct vport_egress { + struct mlx5_flow_table *acl; + struct mlx5_flow_group *allowed_vlans_grp; + struct mlx5_flow_group *drop_grp; + struct mlx5_flow_rule *allowed_vlan; + struct mlx5_flow_rule *drop_rule; +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; struct hlist_head mc_list[MLX5_L2_ADDR_HASH_SIZE]; + struct mlx5_flow_rule *promisc_rule; + struct mlx5_flow_rule *allmulti_rule; struct work_struct vport_change_handler; - /* This spinlock protects access to vport data, between - * "esw_vport_disable" and ongoing interrupt "mlx5_eswitch_vport_event" - * once vport marked as disabled new interrupts are discarded. - */ - spinlock_t lock; /* vport events sync */ + struct vport_ingress ingress; + struct vport_egress egress; + + u16 vlan; + u8 qos; + bool spoofchk; + bool trusted; bool enabled; u16 enabled_events; }; @@ -113,6 +135,8 @@ struct mlx5_l2_table { struct mlx5_eswitch_fdb { void *fdb; struct mlx5_flow_group *addr_grp; + struct mlx5_flow_group *allmulti_grp; + struct mlx5_flow_group *promisc_grp; }; struct mlx5_eswitch { @@ -124,6 +148,11 @@ struct mlx5_eswitch { struct mlx5_vport *vports; int total_vports; int enabled_vports; + /* Synchronize between vport change events + * and async SRIOV admin state changes + */ + struct mutex state_lock; + struct esw_mc_addr *mc_promisc; }; /* E-Switch API */ @@ -138,6 +167,10 @@ int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int vport, int link_state); int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, int vport, u16 vlan, u8 qos); +int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, + int vport, bool spoofchk); +int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, + int vport_num, bool setting); int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi); int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index f46f1db0fc00..a5bb6b695242 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -50,6 +50,10 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); + MLX5_SET(set_flow_table_root_in, in, other_vport, 1); + } memset(out, 0, sizeof(out)); return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, @@ -57,6 +61,7 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, } int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + u16 vport, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table *next_ft, unsigned int *table_id) @@ -77,6 +82,10 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, table_type, type); MLX5_SET(create_flow_table_in, in, level, level); MLX5_SET(create_flow_table_in, in, log_size, log_size); + if (vport) { + MLX5_SET(create_flow_table_in, in, vport_number, vport); + MLX5_SET(create_flow_table_in, in, other_vport, 1); + } memset(out, 0, sizeof(out)); err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, @@ -101,6 +110,10 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, MLX5_CMD_OP_DESTROY_FLOW_TABLE); MLX5_SET(destroy_flow_table_in, in, table_type, ft->type); MLX5_SET(destroy_flow_table_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(destroy_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_table_in, in, other_vport, 1); + } return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); @@ -120,6 +133,10 @@ int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, MLX5_CMD_OP_MODIFY_FLOW_TABLE); MLX5_SET(modify_flow_table_in, in, table_type, ft->type); MLX5_SET(modify_flow_table_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport); + MLX5_SET(modify_flow_table_in, in, other_vport, 1); + } MLX5_SET(modify_flow_table_in, in, modify_field_select, MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); if (next_ft) { @@ -148,6 +165,10 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, MLX5_CMD_OP_CREATE_FLOW_GROUP); MLX5_SET(create_flow_group_in, in, table_type, ft->type); MLX5_SET(create_flow_group_in, in, table_id, ft->id); + if (ft->vport) { + MLX5_SET(create_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(create_flow_group_in, in, other_vport, 1); + } err = mlx5_cmd_exec_check_status(dev, in, inlen, out, @@ -174,6 +195,10 @@ int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, MLX5_SET(destroy_flow_group_in, in, table_type, ft->type); MLX5_SET(destroy_flow_group_in, in, table_id, ft->id); MLX5_SET(destroy_flow_group_in, in, group_id, group_id); + if (ft->vport) { + MLX5_SET(destroy_flow_group_in, in, vport_number, ft->vport); + MLX5_SET(destroy_flow_group_in, in, other_vport, 1); + } return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); @@ -207,22 +232,29 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, MLX5_SET(set_fte_in, in, table_type, ft->type); MLX5_SET(set_fte_in, in, table_id, ft->id); MLX5_SET(set_fte_in, in, flow_index, fte->index); + if (ft->vport) { + MLX5_SET(set_fte_in, in, vport_number, ft->vport); + MLX5_SET(set_fte_in, in, other_vport, 1); + } in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context); MLX5_SET(flow_context, in_flow_context, group_id, group_id); MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag); MLX5_SET(flow_context, in_flow_context, action, fte->action); - MLX5_SET(flow_context, in_flow_context, destination_list_size, - fte->dests_size); in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context, match_value); memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param)); + in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); if (fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination); + int list_size = 0; + list_for_each_entry(dst, &fte->node.children, node.list) { unsigned int id; + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + MLX5_SET(dest_format_struct, in_dests, destination_type, dst->dest_attr.type); if (dst->dest_attr.type == @@ -233,8 +265,31 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, } MLX5_SET(dest_format_struct, in_dests, destination_id, id); in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + list_size++; } + + MLX5_SET(flow_context, in_flow_context, destination_list_size, + list_size); } + + if (fte->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + int list_size = 0; + + list_for_each_entry(dst, &fte->node.children, node.list) { + if (dst->dest_attr.type != + MLX5_FLOW_DESTINATION_TYPE_COUNTER) + continue; + + MLX5_SET(flow_counter_list, in_dests, flow_counter_id, + dst->dest_attr.counter->id); + in_dests += MLX5_ST_SZ_BYTES(dest_format_struct); + list_size++; + } + + MLX5_SET(flow_context, in_flow_context, flow_counter_list_size, + list_size); + } + memset(out, 0, sizeof(out)); err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); @@ -254,18 +309,16 @@ int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned group_id, + int modify_mask, struct fs_fte *fte) { int opmod; - int modify_mask; int atomic_mod_cap = MLX5_CAP_FLOWTABLE(dev, flow_table_properties_nic_receive. flow_modify_en); if (!atomic_mod_cap) return -ENOTSUPP; opmod = 1; - modify_mask = 1 << - MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST; return mlx5_cmd_set_fte(dev, opmod, modify_mask, ft, group_id, fte); } @@ -285,8 +338,78 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, MLX5_SET(delete_fte_in, in, table_type, ft->type); MLX5_SET(delete_fte_in, in, table_id, ft->id); MLX5_SET(delete_fte_in, in, flow_index, index); + if (ft->vport) { + MLX5_SET(delete_fte_in, in, vport_number, ft->vport); + MLX5_SET(delete_fte_in, in, other_vport, 1); + } err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); return err; } + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) +{ + u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]; + u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(alloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_ALLOC_FLOW_COUNTER); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); + if (err) + return err; + + *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + + return 0; +} + +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(dealloc_flow_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); + MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} + +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, + u64 *packets, u64 *bytes) +{ + u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + + MLX5_ST_SZ_BYTES(traffic_counter)]; + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; + void *stats; + int err = 0; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(query_flow_counter_in, in, opcode, + MLX5_CMD_OP_QUERY_FLOW_COUNTER); + MLX5_SET(query_flow_counter_in, in, op_mod, 0); + MLX5_SET(query_flow_counter_in, in, flow_counter_id, id); + + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics); + *packets = MLX5_GET64(traffic_counter, stats, packets); + *bytes = MLX5_GET64(traffic_counter, stats, octets); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 9814d4784803..fc4f7b83fe0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -34,6 +34,7 @@ #define _MLX5_FS_CMD_ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, + u16 vport, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table *next_ft, unsigned int *table_id); @@ -61,6 +62,7 @@ int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned group_id, + int modify_mask, struct fs_fte *fte); int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, @@ -69,4 +71,9 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft); + +int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); +int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); +int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, + u64 *packets, u64 *bytes); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 89cce97d46c6..8b5f0b2c0d5c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -40,18 +40,18 @@ #define INIT_TREE_NODE_ARRAY_SIZE(...) (sizeof((struct init_tree_node[]){__VA_ARGS__}) /\ sizeof(struct init_tree_node)) -#define ADD_PRIO(num_prios_val, min_level_val, max_ft_val, caps_val,\ +#define ADD_PRIO(num_prios_val, min_level_val, num_levels_val, caps_val,\ ...) {.type = FS_TYPE_PRIO,\ .min_ft_level = min_level_val,\ - .max_ft = max_ft_val,\ + .num_levels = num_levels_val,\ .num_leaf_prios = num_prios_val,\ .caps = caps_val,\ .children = (struct init_tree_node[]) {__VA_ARGS__},\ .ar_size = INIT_TREE_NODE_ARRAY_SIZE(__VA_ARGS__) \ } -#define ADD_MULTIPLE_PRIO(num_prios_val, max_ft_val, ...)\ - ADD_PRIO(num_prios_val, 0, max_ft_val, {},\ +#define ADD_MULTIPLE_PRIO(num_prios_val, num_levels_val, ...)\ + ADD_PRIO(num_prios_val, 0, num_levels_val, {},\ __VA_ARGS__)\ #define ADD_NS(...) {.type = FS_TYPE_NAMESPACE,\ @@ -67,17 +67,20 @@ #define FS_REQUIRED_CAPS(...) {.arr_sz = INIT_CAPS_ARRAY_SIZE(__VA_ARGS__), \ .caps = (long[]) {__VA_ARGS__} } -#define LEFTOVERS_MAX_FT 1 +#define LEFTOVERS_NUM_LEVELS 1 #define LEFTOVERS_NUM_PRIOS 1 -#define BY_PASS_PRIO_MAX_FT 1 -#define BY_PASS_MIN_LEVEL (KENREL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ - LEFTOVERS_MAX_FT) -#define KERNEL_MAX_FT 3 -#define KERNEL_NUM_PRIOS 2 -#define KENREL_MIN_LEVEL 2 +#define BY_PASS_PRIO_NUM_LEVELS 1 +#define BY_PASS_MIN_LEVEL (KERNEL_MIN_LEVEL + MLX5_BY_PASS_NUM_PRIOS +\ + LEFTOVERS_NUM_PRIOS) -#define ANCHOR_MAX_FT 1 +/* Vlan, mac, ttc, aRFS */ +#define KERNEL_NIC_PRIO_NUM_LEVELS 4 +#define KERNEL_NIC_NUM_PRIOS 1 +/* One more level for tc */ +#define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1) + +#define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) struct node_caps { @@ -92,7 +95,7 @@ static struct init_tree_node { int min_ft_level; int num_leaf_prios; int prio; - int max_ft; + int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, .ar_size = 4, @@ -102,17 +105,20 @@ static struct init_tree_node { FS_CAP(flow_table_properties_nic_receive.modify_root), FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), - ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_MAX_FT))), - ADD_PRIO(0, KENREL_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(KERNEL_NUM_PRIOS, KERNEL_MAX_FT))), + ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, + BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(1, 1), + ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, + KERNEL_NIC_PRIO_NUM_LEVELS))), ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), FS_CAP(flow_table_properties_nic_receive.modify_root), FS_CAP(flow_table_properties_nic_receive.identified_miss_table_mode), FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), - ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_MAX_FT))), + ADD_NS(ADD_MULTIPLE_PRIO(LEFTOVERS_NUM_PRIOS, LEFTOVERS_NUM_LEVELS))), ADD_PRIO(0, ANCHOR_MIN_LEVEL, 0, {}, - ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_MAX_FT))), + ADD_NS(ADD_MULTIPLE_PRIO(ANCHOR_NUM_PRIOS, ANCHOR_NUM_LEVELS))), } }; @@ -222,19 +228,6 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns, return NULL; } -static unsigned int find_next_free_level(struct fs_prio *prio) -{ - if (!list_empty(&prio->node.children)) { - struct mlx5_flow_table *ft; - - ft = list_last_entry(&prio->node.children, - struct mlx5_flow_table, - node.list); - return ft->level + 1; - } - return prio->start_level; -} - static bool masked_memcmp(void *mask, void *val1, void *val2, size_t size) { unsigned int i; @@ -351,6 +344,7 @@ static void del_rule(struct fs_node *node) struct mlx5_flow_group *fg; struct fs_fte *fte; u32 *match_value; + int modify_mask; struct mlx5_core_dev *dev = get_dev(node); int match_len = MLX5_ST_SZ_BYTES(fte_match_param); int err; @@ -374,8 +368,11 @@ static void del_rule(struct fs_node *node) } if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST), err = mlx5_cmd_update_fte(dev, ft, - fg->id, fte); + fg->id, + modify_mask, + fte); if (err) pr_warn("%s can't del rule fg id=%d fte_index=%d\n", __func__, fg->id, fte->index); @@ -464,7 +461,7 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) return fg; } -static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, +static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte, enum fs_flow_table_type table_type) { struct mlx5_flow_table *ft; @@ -476,6 +473,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, int max_fte, ft->level = level; ft->node.type = FS_TYPE_FLOW_TABLE; ft->type = table_type; + ft->vport = vport; ft->max_fte = max_fte; INIT_LIST_HEAD(&ft->fwd_rules); mutex_init(&ft->lock); @@ -615,12 +613,13 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio return err; } -static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, - struct mlx5_flow_destination *dest) +int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, + struct mlx5_flow_destination *dest) { struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; struct fs_fte *fte; + int modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); int err = 0; fs_get_obj(fte, rule->node.parent); @@ -632,7 +631,9 @@ static int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, memcpy(&rule->dest_attr, dest, sizeof(*dest)); err = mlx5_cmd_update_fte(get_dev(&ft->node), - ft, fg->id, fte); + ft, fg->id, + modify_mask, + fte); unlock_ref_node(&fte->node); return err; @@ -693,9 +694,23 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table return err; } -struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int max_fte) +static void list_add_flow_table(struct mlx5_flow_table *ft, + struct fs_prio *prio) +{ + struct list_head *prev = &prio->node.children; + struct mlx5_flow_table *iter; + + fs_for_each_ft(iter, prio) { + if (iter->level > ft->level) + break; + prev = &iter->node.list; + } + list_add(&ft->node.list, prev); +} + +static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + u16 vport, int prio, + int max_fte, u32 level) { struct mlx5_flow_table *next_ft = NULL; struct mlx5_flow_table *ft; @@ -716,12 +731,16 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, err = -EINVAL; goto unlock_root; } - if (fs_prio->num_ft == fs_prio->max_ft) { + if (level >= fs_prio->num_levels) { err = -ENOSPC; goto unlock_root; } - - ft = alloc_flow_table(find_next_free_level(fs_prio), + /* The level is related to the + * priority level range. + */ + level += fs_prio->start_level; + ft = alloc_flow_table(level, + vport, roundup_pow_of_two(max_fte), root->table_type); if (!ft) { @@ -732,7 +751,7 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, tree_init_node(&ft->node, 1, del_flow_table); log_table_sz = ilog2(ft->max_fte); next_ft = find_next_chained_ft(fs_prio); - err = mlx5_cmd_create_flow_table(root->dev, ft->type, ft->level, + err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->type, ft->level, log_table_sz, next_ft, &ft->id); if (err) goto free_ft; @@ -742,7 +761,7 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, goto destroy_ft; lock_ref_node(&fs_prio->node); tree_add_node(&ft->node, &fs_prio->node); - list_add_tail(&ft->node.list, &fs_prio->node.children); + list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; unlock_ref_node(&fs_prio->node); mutex_unlock(&root->chain_lock); @@ -756,17 +775,32 @@ unlock_root: return ERR_PTR(err); } +struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + int prio, int max_fte, + u32 level) +{ + return __mlx5_create_flow_table(ns, 0, prio, max_fte, level); +} + +struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, + int prio, int max_fte, + u32 level, u16 vport) +{ + return __mlx5_create_flow_table(ns, vport, prio, max_fte, level); +} + struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, - int max_num_groups) + int max_num_groups, + u32 level) { struct mlx5_flow_table *ft; if (max_num_groups > num_flow_table_entries) return ERR_PTR(-EINVAL); - ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries); + ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level); if (IS_ERR(ft)) return ft; @@ -850,6 +884,7 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, { struct mlx5_flow_table *ft; struct mlx5_flow_rule *rule; + int modify_mask = 0; int err; rule = alloc_rule(dest); @@ -865,14 +900,20 @@ static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte, list_add(&rule->node.list, &fte->node.children); else list_add_tail(&rule->node.list, &fte->node.children); - if (dest) + if (dest) { fte->dests_size++; + + modify_mask |= dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ? + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS) : + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); + } + if (fte->dests_size == 1 || !dest) err = mlx5_cmd_create_fte(get_dev(&ft->node), ft, fg->id, fte); else err = mlx5_cmd_update_fte(get_dev(&ft->node), - ft, fg->id, fte); + ft, fg->id, modify_mask, fte); if (err) goto free_rule; @@ -1065,6 +1106,50 @@ unlock_fg: return rule; } +struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule) +{ + struct mlx5_flow_rule *dst; + struct fs_fte *fte; + + fs_get_obj(fte, rule->node.parent); + + fs_for_each_dst(dst, fte) { + if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) + return dst->dest_attr.counter; + } + + return NULL; +} + +static bool counter_is_valid(struct mlx5_fc *counter, u32 action) +{ + if (!(action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) + return !counter; + + if (!counter) + return false; + + /* Hardware support counter for a drop action only */ + return action == (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT); +} + +static bool dest_is_valid(struct mlx5_flow_destination *dest, + u32 action, + struct mlx5_flow_table *ft) +{ + if (dest && (dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)) + return counter_is_valid(dest->counter, action); + + if (!(action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) + return true; + + if (!dest || ((dest->type == + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) && + (dest->ft->level <= ft->level))) + return false; + return true; +} + static struct mlx5_flow_rule * _mlx5_add_flow_rule(struct mlx5_flow_table *ft, u8 match_criteria_enable, @@ -1077,7 +1162,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft, struct mlx5_flow_group *g; struct mlx5_flow_rule *rule; - if ((action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !dest) + if (!dest_is_valid(dest, action, ft)) return ERR_PTR(-EINVAL); nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT); @@ -1294,6 +1379,16 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return &dev->priv.fdb_root_ns->ns; else return NULL; + case MLX5_FLOW_NAMESPACE_ESW_EGRESS: + if (dev->priv.esw_egress_root_ns) + return &dev->priv.esw_egress_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_ESW_INGRESS: + if (dev->priv.esw_ingress_root_ns) + return &dev->priv.esw_ingress_root_ns->ns; + else + return NULL; default: return NULL; } @@ -1311,7 +1406,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, EXPORT_SYMBOL(mlx5_get_flow_namespace); static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, - unsigned prio, int max_ft) + unsigned int prio, int num_levels) { struct fs_prio *fs_prio; @@ -1322,7 +1417,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns, fs_prio->node.type = FS_TYPE_PRIO; tree_init_node(&fs_prio->node, 1, NULL); tree_add_node(&fs_prio->node, &ns->node); - fs_prio->max_ft = max_ft; + fs_prio->num_levels = num_levels; fs_prio->prio = prio; list_add_tail(&fs_prio->node.list, &ns->node.children); @@ -1353,14 +1448,14 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio) return ns; } -static int create_leaf_prios(struct mlx5_flow_namespace *ns, struct init_tree_node - *prio_metadata) +static int create_leaf_prios(struct mlx5_flow_namespace *ns, int prio, + struct init_tree_node *prio_metadata) { struct fs_prio *fs_prio; int i; for (i = 0; i < prio_metadata->num_leaf_prios; i++) { - fs_prio = fs_create_prio(ns, i, prio_metadata->max_ft); + fs_prio = fs_create_prio(ns, prio++, prio_metadata->num_levels); if (IS_ERR(fs_prio)) return PTR_ERR(fs_prio); } @@ -1387,7 +1482,7 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, struct init_tree_node *init_node, struct fs_node *fs_parent_node, struct init_tree_node *init_parent_node, - int index) + int prio) { int max_ft_level = MLX5_CAP_FLOWTABLE(dev, flow_table_properties_nic_receive. @@ -1405,8 +1500,8 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, fs_get_obj(fs_ns, fs_parent_node); if (init_node->num_leaf_prios) - return create_leaf_prios(fs_ns, init_node); - fs_prio = fs_create_prio(fs_ns, index, init_node->max_ft); + return create_leaf_prios(fs_ns, prio, init_node); + fs_prio = fs_create_prio(fs_ns, prio, init_node->num_levels); if (IS_ERR(fs_prio)) return PTR_ERR(fs_prio); base = &fs_prio->node; @@ -1419,11 +1514,16 @@ static int init_root_tree_recursive(struct mlx5_core_dev *dev, } else { return -EINVAL; } + prio = 0; for (i = 0; i < init_node->ar_size; i++) { err = init_root_tree_recursive(dev, &init_node->children[i], - base, init_node, i); + base, init_node, prio); if (err) return err; + if (init_node->children[i].type == FS_TYPE_PRIO && + init_node->children[i].num_leaf_prios) { + prio += init_node->children[i].num_leaf_prios; + } } return 0; @@ -1479,9 +1579,9 @@ static int set_prio_attrs_in_ns(struct mlx5_flow_namespace *ns, int acc_level) struct fs_prio *prio; fs_for_each_prio(prio, ns) { - /* This updates prio start_level and max_ft */ + /* This updates prio start_level and num_levels */ set_prio_attrs_in_prio(prio, acc_level); - acc_level += prio->max_ft; + acc_level += prio->num_levels; } return acc_level; } @@ -1493,11 +1593,11 @@ static void set_prio_attrs_in_prio(struct fs_prio *prio, int acc_level) prio->start_level = acc_level; fs_for_each_ns(ns, prio) - /* This updates start_level and max_ft of ns's priority descendants */ + /* This updates start_level and num_levels of ns's priority descendants */ acc_level_ns = set_prio_attrs_in_ns(ns, acc_level); - if (!prio->max_ft) - prio->max_ft = acc_level_ns - prio->start_level; - WARN_ON(prio->max_ft < acc_level_ns - prio->start_level); + if (!prio->num_levels) + prio->num_levels = acc_level_ns - prio->start_level; + WARN_ON(prio->num_levels < acc_level_ns - prio->start_level); } static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) @@ -1508,12 +1608,13 @@ static void set_prio_attrs(struct mlx5_flow_root_namespace *root_ns) fs_for_each_prio(prio, ns) { set_prio_attrs_in_prio(prio, start_level); - start_level += prio->max_ft; + start_level += prio->num_levels; } } #define ANCHOR_PRIO 0 #define ANCHOR_SIZE 1 +#define ANCHOR_LEVEL 0 static int create_anchor_flow_table(struct mlx5_core_dev *dev) { @@ -1523,7 +1624,7 @@ static int create_anchor_flow_table(struct mlx5_core_dev ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ANCHOR); if (!ns) return -EINVAL; - ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE); + ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL); if (IS_ERR(ft)) { mlx5_core_err(dev, "Failed to create last anchor flow table"); return PTR_ERR(ft); @@ -1668,6 +1769,9 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) { cleanup_root_ns(dev); cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns); + cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns); + cleanup_single_prio_root_ns(dev, dev->priv.esw_ingress_root_ns); + mlx5_cleanup_fc_stats(dev); } static int init_fdb_root_ns(struct mlx5_core_dev *dev) @@ -1688,20 +1792,69 @@ static int init_fdb_root_ns(struct mlx5_core_dev *dev) } } +static int init_egress_acl_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->priv.esw_egress_root_ns = create_root_ns(dev, FS_FT_ESW_EGRESS_ACL); + if (!dev->priv.esw_egress_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&dev->priv.esw_egress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev)); + if (IS_ERR(prio)) + return PTR_ERR(prio); + else + return 0; +} + +static int init_ingress_acl_root_ns(struct mlx5_core_dev *dev) +{ + struct fs_prio *prio; + + dev->priv.esw_ingress_root_ns = create_root_ns(dev, FS_FT_ESW_INGRESS_ACL); + if (!dev->priv.esw_ingress_root_ns) + return -ENOMEM; + + /* create 1 prio*/ + prio = fs_create_prio(&dev->priv.esw_ingress_root_ns->ns, 0, MLX5_TOTAL_VPORTS(dev)); + if (IS_ERR(prio)) + return PTR_ERR(prio); + else + return 0; +} + int mlx5_init_fs(struct mlx5_core_dev *dev) { int err = 0; + err = mlx5_init_fc_stats(dev); + if (err) + return err; + if (MLX5_CAP_GEN(dev, nic_flow_table)) { err = init_root_ns(dev); if (err) - return err; + goto err; } if (MLX5_CAP_GEN(dev, eswitch_flow_table)) { err = init_fdb_root_ns(dev); if (err) - cleanup_root_ns(dev); + goto err; + } + if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { + err = init_egress_acl_root_ns(dev); + if (err) + goto err; + } + if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { + err = init_ingress_acl_root_ns(dev); + if (err) + goto err; } + return 0; +err: + mlx5_cleanup_fs(dev); return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index f37a6248a27b..aa41a7314691 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -45,8 +45,10 @@ enum fs_node_type { }; enum fs_flow_table_type { - FS_FT_NIC_RX = 0x0, - FS_FT_FDB = 0X4, + FS_FT_NIC_RX = 0x0, + FS_FT_ESW_EGRESS_ACL = 0x2, + FS_FT_ESW_INGRESS_ACL = 0x3, + FS_FT_FDB = 0X4, }; enum fs_fte_status { @@ -79,6 +81,7 @@ struct mlx5_flow_rule { struct mlx5_flow_table { struct fs_node node; u32 id; + u16 vport; unsigned int max_fte; unsigned int level; enum fs_flow_table_type type; @@ -93,6 +96,28 @@ struct mlx5_flow_table { struct list_head fwd_rules; }; +struct mlx5_fc_cache { + u64 packets; + u64 bytes; + u64 lastuse; +}; + +struct mlx5_fc { + struct list_head list; + + /* last{packets,bytes} members are used when calculating the delta since + * last reading + */ + u64 lastpackets; + u64 lastbytes; + + u16 id; + bool deleted; + bool aging; + + struct mlx5_fc_cache cache ____cacheline_aligned_in_smp; +}; + /* Type of children is mlx5_flow_rule */ struct fs_fte { struct fs_node node; @@ -102,12 +127,13 @@ struct fs_fte { u32 index; u32 action; enum fs_fte_status status; + struct mlx5_fc *counter; }; /* Type of children is mlx5_flow_table/namespace */ struct fs_prio { struct fs_node node; - unsigned int max_ft; + unsigned int num_levels; unsigned int start_level; unsigned int prio; unsigned int num_ft; @@ -143,6 +169,9 @@ struct mlx5_flow_root_namespace { struct mutex chain_lock; }; +int mlx5_init_fc_stats(struct mlx5_core_dev *dev); +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev); + int mlx5_init_fs(struct mlx5_core_dev *dev); void mlx5_cleanup_fs(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c new file mode 100644 index 000000000000..164dc37fda72 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/mlx5/driver.h> +#include <linux/mlx5/fs.h> +#include "mlx5_core.h" +#include "fs_core.h" +#include "fs_cmd.h" + +#define MLX5_FC_STATS_PERIOD msecs_to_jiffies(1000) + +/* locking scheme: + * + * It is the responsibility of the user to prevent concurrent calls or bad + * ordering to mlx5_fc_create(), mlx5_fc_destroy() and accessing a reference + * to struct mlx5_fc. + * e.g en_tc.c is protected by RTNL lock of its caller, and will never call a + * dump (access to struct mlx5_fc) after a counter is destroyed. + * + * access to counter list: + * - create (user context) + * - mlx5_fc_create() only adds to an addlist to be used by + * mlx5_fc_stats_query_work(). addlist is protected by a spinlock. + * - spawn thread to do the actual destroy + * + * - destroy (user context) + * - mark a counter as deleted + * - spawn thread to do the actual del + * + * - dump (user context) + * user should not call dump after destroy + * + * - query (single thread workqueue context) + * destroy/dump - no conflict (see destroy) + * query/dump - packets and bytes might be inconsistent (since update is not + * atomic) + * query/create - no conflict (see create) + * since every create/destroy spawn the work, only after necessary time has + * elapsed, the thread will actually query the hardware. + */ + +static void mlx5_fc_stats_work(struct work_struct *work) +{ + struct mlx5_core_dev *dev = container_of(work, struct mlx5_core_dev, + priv.fc_stats.work.work); + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + unsigned long now = jiffies; + struct mlx5_fc *counter; + struct mlx5_fc *tmp; + int err = 0; + + spin_lock(&fc_stats->addlist_lock); + + list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); + + if (!list_empty(&fc_stats->list)) + queue_delayed_work(fc_stats->wq, &fc_stats->work, MLX5_FC_STATS_PERIOD); + + spin_unlock(&fc_stats->addlist_lock); + + list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) { + struct mlx5_fc_cache *c = &counter->cache; + u64 packets; + u64 bytes; + + if (counter->deleted) { + list_del(&counter->list); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + continue; + } + + if (time_before(now, fc_stats->next_query)) + continue; + + err = mlx5_cmd_fc_query(dev, counter->id, &packets, &bytes); + if (err) { + pr_err("Error querying stats for counter id %d\n", + counter->id); + continue; + } + + if (packets == c->packets) + continue; + + c->lastuse = jiffies; + c->packets = packets; + c->bytes = bytes; + } + + if (time_after_eq(now, fc_stats->next_query)) + fc_stats->next_query = now + MLX5_FC_STATS_PERIOD; +} + +struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + int err; + + counter = kzalloc(sizeof(*counter), GFP_KERNEL); + if (!counter) + return ERR_PTR(-ENOMEM); + + err = mlx5_cmd_fc_alloc(dev, &counter->id); + if (err) + goto err_out; + + if (aging) { + counter->aging = true; + + spin_lock(&fc_stats->addlist_lock); + list_add(&counter->list, &fc_stats->addlist); + spin_unlock(&fc_stats->addlist_lock); + + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + } + + return counter; + +err_out: + kfree(counter); + + return ERR_PTR(err); +} + +void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + if (!counter) + return; + + if (counter->aging) { + counter->deleted = true; + mod_delayed_work(fc_stats->wq, &fc_stats->work, 0); + return; + } + + mlx5_cmd_fc_free(dev, counter->id); + kfree(counter); +} + +int mlx5_init_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + + INIT_LIST_HEAD(&fc_stats->list); + INIT_LIST_HEAD(&fc_stats->addlist); + spin_lock_init(&fc_stats->addlist_lock); + + fc_stats->wq = create_singlethread_workqueue("mlx5_fc"); + if (!fc_stats->wq) + return -ENOMEM; + + INIT_DELAYED_WORK(&fc_stats->work, mlx5_fc_stats_work); + + return 0; +} + +void mlx5_cleanup_fc_stats(struct mlx5_core_dev *dev) +{ + struct mlx5_fc_stats *fc_stats = &dev->priv.fc_stats; + struct mlx5_fc *counter; + struct mlx5_fc *tmp; + + cancel_delayed_work_sync(&dev->priv.fc_stats.work); + destroy_workqueue(dev->priv.fc_stats.wq); + dev->priv.fc_stats.wq = NULL; + + list_splice_tail_init(&fc_stats->addlist, &fc_stats->list); + + list_for_each_entry_safe(counter, tmp, &fc_stats->list, list) { + list_del(&counter->list); + + mlx5_cmd_fc_free(dev, counter->id); + + kfree(counter); + } +} + +void mlx5_fc_query_cached(struct mlx5_fc *counter, + u64 *bytes, u64 *packets, u64 *lastuse) +{ + struct mlx5_fc_cache c; + + c = counter->cache; + + *bytes = c.bytes - counter->lastbytes; + *packets = c.packets - counter->lastpackets; + *lastuse = c.lastuse; + + counter->lastbytes = c.bytes; + counter->lastpackets = c.packets; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 6892746fd10d..6feef7fb9d6a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -48,6 +48,9 @@ #include <linux/kmod.h> #include <linux/delay.h> #include <linux/mlx5/mlx5_ifc.h> +#ifdef CONFIG_RFS_ACCEL +#include <linux/cpu_rmap.h> +#endif #include "mlx5_core.h" #include "fs_core.h" #ifdef CONFIG_MLX5_CORE_EN @@ -665,6 +668,12 @@ static void free_comp_eqs(struct mlx5_core_dev *dev) struct mlx5_eq_table *table = &dev->priv.eq_table; struct mlx5_eq *eq, *n; +#ifdef CONFIG_RFS_ACCEL + if (dev->rmap) { + free_irq_cpu_rmap(dev->rmap); + dev->rmap = NULL; + } +#endif spin_lock(&table->lock); list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { list_del(&eq->list); @@ -691,6 +700,11 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) INIT_LIST_HEAD(&table->comp_eqs_list); ncomp_vec = table->num_comp_vectors; nent = MLX5_COMP_EQ_SIZE; +#ifdef CONFIG_RFS_ACCEL + dev->rmap = alloc_irq_cpu_rmap(ncomp_vec); + if (!dev->rmap) + return -ENOMEM; +#endif for (i = 0; i < ncomp_vec; i++) { eq = kzalloc(sizeof(*eq), GFP_KERNEL); if (!eq) { @@ -698,6 +712,10 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) goto clean; } +#ifdef CONFIG_RFS_ACCEL + irq_cpu_rmap_add(dev->rmap, + dev->priv.msix_arr[i + MLX5_EQ_VEC_COMP_BASE].vector); +#endif snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 0b0b226c789e..482604bd051c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -42,6 +42,8 @@ #define DRIVER_VERSION "3.0-1" #define DRIVER_RELDATE "January 2015" +#define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev)) + extern int mlx5_core_debug_mask; #define mlx5_core_dbg(__dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 53cc1e2c693b..3e35611b19c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -115,6 +115,19 @@ int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, } EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); +int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) +{ + u32 out[MLX5_ST_SZ_DW(mlcr_reg)]; + u32 in[MLX5_ST_SZ_DW(mlcr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(mlcr_reg, in, local_port, 1); + MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration); + + return mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MLCR, 0, 1); +} + int mlx5_query_port_proto_cap(struct mlx5_core_dev *dev, u32 *proto_cap, int proto_mask) { @@ -297,6 +310,82 @@ void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, } EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); +static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) +{ + u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; + u32 in[MLX5_ST_SZ_DW(pmlp_reg)]; + int module_mapping; + int err; + + memset(in, 0, sizeof(in)); + + MLX5_SET(pmlp_reg, in, local_port, 1); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_REG_PMLP, 0, 0); + if (err) + return err; + + module_mapping = MLX5_GET(pmlp_reg, out, lane0_module_mapping); + *module_num = module_mapping & MLX5_EEPROM_IDENTIFIER_BYTE_MASK; + + return 0; +} + +int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, + u16 offset, u16 size, u8 *data) +{ + u32 out[MLX5_ST_SZ_DW(mcia_reg)]; + u32 in[MLX5_ST_SZ_DW(mcia_reg)]; + int module_num; + u16 i2c_addr; + int status; + int err; + void *ptr = MLX5_ADDR_OF(mcia_reg, out, dword_0); + + err = mlx5_query_module_num(dev, &module_num); + if (err) + return err; + + memset(in, 0, sizeof(in)); + size = min_t(int, size, MLX5_EEPROM_MAX_BYTES); + + if (offset < MLX5_EEPROM_PAGE_LENGTH && + offset + size > MLX5_EEPROM_PAGE_LENGTH) + /* Cross pages read, read until offset 256 in low page */ + size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; + + i2c_addr = MLX5_I2C_ADDR_LOW; + if (offset >= MLX5_EEPROM_PAGE_LENGTH) { + i2c_addr = MLX5_I2C_ADDR_HIGH; + offset -= MLX5_EEPROM_PAGE_LENGTH; + } + + MLX5_SET(mcia_reg, in, l, 0); + MLX5_SET(mcia_reg, in, module, module_num); + MLX5_SET(mcia_reg, in, i2c_device_address, i2c_addr); + MLX5_SET(mcia_reg, in, page_number, 0); + MLX5_SET(mcia_reg, in, device_address, offset); + MLX5_SET(mcia_reg, in, size, size); + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, + sizeof(out), MLX5_REG_MCIA, 0, 0); + if (err) + return err; + + status = MLX5_GET(mcia_reg, out, status); + if (status) { + mlx5_core_err(dev, "query_mcia_reg failed: status: 0x%x\n", + status); + return -EIO; + } + + memcpy(data, ptr, size); + + return size; +} +EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); + static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) { @@ -607,3 +696,52 @@ int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) return err; } EXPORT_SYMBOL_GPL(mlx5_query_port_wol); + +static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, + int outlen) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pcmr_reg, in, local_port, 1); + + return mlx5_core_access_reg(mdev, in, sizeof(in), out, + outlen, MLX5_REG_PCMR, 0, 0); +} + +static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + + return mlx5_core_access_reg(mdev, in, inlen, out, + sizeof(out), MLX5_REG_PCMR, 0, 1); +} + +int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, fcs_chk, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + +void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported, + bool *enabled) +{ + u32 out[MLX5_ST_SZ_DW(pcmr_reg)]; + /* Default values for FW which do not support MLX5_REG_PCMR */ + *supported = false; + *enabled = true; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return; + + if (mlx5_query_ports_check(mdev, out, sizeof(out))) + return; + + *supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap)); + *enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index def289375ecb..b720a274220d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -538,3 +538,71 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, mlx5_core_destroy_sq(dev, sq->qpn); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); + +int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id) +{ + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)]; + int err; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); + err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *counter_id = MLX5_GET(alloc_q_counter_out, out, + counter_set_id); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter); + +int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(dealloc_q_counter_in, in, opcode, + MLX5_CMD_OP_DEALLOC_Q_COUNTER); + MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + sizeof(out)); +} +EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter); + +int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, + int reset, void *out, int out_size) +{ + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)]; + + memset(in, 0, sizeof(in)); + + MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); + MLX5_SET(query_q_counter_in, in, clear, reset); + MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id); + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_size); +} +EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); + +int mlx5_core_query_out_of_buffer(struct mlx5_core_dev *dev, u16 counter_id, + u32 *out_of_buffer) +{ + int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out); + void *out; + int err; + + out = mlx5_vzalloc(outlen); + if (!out) + return -ENOMEM; + + err = mlx5_core_query_q_counter(dev, counter_id, 0, out, outlen); + if (!err) + *out_of_buffer = MLX5_GET(query_q_counter_out, out, + out_of_buffer); + + kfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h index 217ac530a514..5def12c048e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.h @@ -48,18 +48,12 @@ struct mlx5e_vxlan_work { static inline bool mlx5e_vxlan_allowed(struct mlx5_core_dev *mdev) { - return IS_ENABLED(CONFIG_MLX5_CORE_EN_VXLAN) && - (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) && + return (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan) && mlx5_core_is_pf(mdev)); } -#ifdef CONFIG_MLX5_CORE_EN_VXLAN void mlx5e_vxlan_init(struct mlx5e_priv *priv); void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv); -#else -static inline void mlx5e_vxlan_init(struct mlx5e_priv *priv) {} -static inline void mlx5e_vxlan_cleanup(struct mlx5e_priv *priv) {} -#endif void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, sa_family_t sa_family, u16 port, int add); diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig index 2ad7f67854d5..5989f7cb5462 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig +++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig @@ -50,3 +50,11 @@ config MLXSW_SPECTRUM To compile this driver as a module, choose M here: the module will be called mlxsw_spectrum. + +config MLXSW_SPECTRUM_DCB + bool "Data Center Bridging (DCB) support" + depends on MLXSW_SPECTRUM && DCB + default y + ---help--- + Say Y here if you want to use Data Center Bridging (DCB) in the + driver. diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile index 584cac444852..9b5ebf84c051 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/Makefile +++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile @@ -8,3 +8,4 @@ mlxsw_switchx2-objs := switchx2.o obj-$(CONFIG_MLXSW_SPECTRUM) += mlxsw_spectrum.o mlxsw_spectrum-objs := spectrum.o spectrum_buffers.o \ spectrum_switchdev.o +mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB) += spectrum_dcb.o diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index f69f6280519f..b0a0b01bb4ef 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -44,7 +44,7 @@ #include <linux/seq_file.h> #include <linux/u64_stats_sync.h> #include <linux/netdevice.h> -#include <linux/wait.h> +#include <linux/completion.h> #include <linux/skbuff.h> #include <linux/etherdevice.h> #include <linux/types.h> @@ -55,6 +55,7 @@ #include <linux/mutex.h> #include <linux/rcupdate.h> #include <linux/slab.h> +#include <linux/workqueue.h> #include <asm/byteorder.h> #include <net/devlink.h> @@ -73,6 +74,8 @@ static const char mlxsw_core_driver_name[] = "mlxsw_core"; static struct dentry *mlxsw_core_dbg_root; +static struct workqueue_struct *mlxsw_wq; + struct mlxsw_core_pcpu_stats { u64 trap_rx_packets[MLXSW_TRAP_ID_MAX]; u64 trap_rx_bytes[MLXSW_TRAP_ID_MAX]; @@ -93,11 +96,9 @@ struct mlxsw_core { struct list_head rx_listener_list; struct list_head event_listener_list; struct { - struct sk_buff *resp_skb; - u64 tid; - wait_queue_head_t wait; - bool trans_active; - struct mutex lock; /* One EMAD transaction at a time. */ + atomic64_t tid; + struct list_head trans_list; + spinlock_t trans_list_lock; /* protects trans_list writes */ bool use_emad; } emad; struct mlxsw_core_pcpu_stats __percpu *pcpu_stats; @@ -114,6 +115,12 @@ struct mlxsw_core { /* driver_priv has to be always the last item */ }; +void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core) +{ + return mlxsw_core->driver_priv; +} +EXPORT_SYMBOL(mlxsw_core_driver_priv); + struct mlxsw_rx_listener_item { struct list_head list; struct mlxsw_rx_listener rxl; @@ -284,7 +291,7 @@ static void mlxsw_emad_pack_reg_tlv(char *reg_tlv, static void mlxsw_emad_pack_op_tlv(char *op_tlv, const struct mlxsw_reg_info *reg, enum mlxsw_core_reg_access_type type, - struct mlxsw_core *mlxsw_core) + u64 tid) { mlxsw_emad_op_tlv_type_set(op_tlv, MLXSW_EMAD_TLV_TYPE_OP); mlxsw_emad_op_tlv_len_set(op_tlv, MLXSW_EMAD_OP_TLV_LEN); @@ -300,7 +307,7 @@ static void mlxsw_emad_pack_op_tlv(char *op_tlv, MLXSW_EMAD_OP_TLV_METHOD_WRITE); mlxsw_emad_op_tlv_class_set(op_tlv, MLXSW_EMAD_OP_TLV_CLASS_REG_ACCESS); - mlxsw_emad_op_tlv_tid_set(op_tlv, mlxsw_core->emad.tid); + mlxsw_emad_op_tlv_tid_set(op_tlv, tid); } static int mlxsw_emad_construct_eth_hdr(struct sk_buff *skb) @@ -322,7 +329,7 @@ static void mlxsw_emad_construct(struct sk_buff *skb, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type, - struct mlxsw_core *mlxsw_core) + u64 tid) { char *buf; @@ -333,7 +340,7 @@ static void mlxsw_emad_construct(struct sk_buff *skb, mlxsw_emad_pack_reg_tlv(buf, reg, payload); buf = skb_push(skb, MLXSW_EMAD_OP_TLV_LEN * sizeof(u32)); - mlxsw_emad_pack_op_tlv(buf, reg, type, mlxsw_core); + mlxsw_emad_pack_op_tlv(buf, reg, type, tid); mlxsw_emad_construct_eth_hdr(skb); } @@ -370,58 +377,16 @@ static bool mlxsw_emad_is_resp(const struct sk_buff *skb) return (mlxsw_emad_op_tlv_r_get(op_tlv) == MLXSW_EMAD_OP_TLV_RESPONSE); } -#define MLXSW_EMAD_TIMEOUT_MS 200 - -static int __mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) -{ - int err; - int ret; - - mlxsw_core->emad.trans_active = true; - - err = mlxsw_core_skb_transmit(mlxsw_core->driver_priv, skb, tx_info); - if (err) { - dev_err(mlxsw_core->bus_info->dev, "Failed to transmit EMAD (tid=%llx)\n", - mlxsw_core->emad.tid); - dev_kfree_skb(skb); - goto trans_inactive_out; - } - - ret = wait_event_timeout(mlxsw_core->emad.wait, - !(mlxsw_core->emad.trans_active), - msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS)); - if (!ret) { - dev_warn(mlxsw_core->bus_info->dev, "EMAD timed-out (tid=%llx)\n", - mlxsw_core->emad.tid); - err = -EIO; - goto trans_inactive_out; - } - - return 0; - -trans_inactive_out: - mlxsw_core->emad.trans_active = false; - return err; -} - -static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core, - char *op_tlv) +static int mlxsw_emad_process_status(char *op_tlv, + enum mlxsw_emad_op_tlv_status *p_status) { - enum mlxsw_emad_op_tlv_status status; - u64 tid; + *p_status = mlxsw_emad_op_tlv_status_get(op_tlv); - status = mlxsw_emad_op_tlv_status_get(op_tlv); - tid = mlxsw_emad_op_tlv_tid_get(op_tlv); - - switch (status) { + switch (*p_status) { case MLXSW_EMAD_OP_TLV_STATUS_SUCCESS: return 0; case MLXSW_EMAD_OP_TLV_STATUS_BUSY: case MLXSW_EMAD_OP_TLV_STATUS_MESSAGE_RECEIPT_ACK: - dev_warn(mlxsw_core->bus_info->dev, "Reg access status again (tid=%llx,status=%x(%s))\n", - tid, status, mlxsw_emad_op_tlv_status_str(status)); return -EAGAIN; case MLXSW_EMAD_OP_TLV_STATUS_VERSION_NOT_SUPPORTED: case MLXSW_EMAD_OP_TLV_STATUS_UNKNOWN_TLV: @@ -432,70 +397,150 @@ static int mlxsw_emad_process_status(struct mlxsw_core *mlxsw_core, case MLXSW_EMAD_OP_TLV_STATUS_RESOURCE_NOT_AVAILABLE: case MLXSW_EMAD_OP_TLV_STATUS_INTERNAL_ERROR: default: - dev_err(mlxsw_core->bus_info->dev, "Reg access status failed (tid=%llx,status=%x(%s))\n", - tid, status, mlxsw_emad_op_tlv_status_str(status)); return -EIO; } } -static int mlxsw_emad_process_status_skb(struct mlxsw_core *mlxsw_core, - struct sk_buff *skb) +static int +mlxsw_emad_process_status_skb(struct sk_buff *skb, + enum mlxsw_emad_op_tlv_status *p_status) +{ + return mlxsw_emad_process_status(mlxsw_emad_op_tlv(skb), p_status); +} + +struct mlxsw_reg_trans { + struct list_head list; + struct list_head bulk_list; + struct mlxsw_core *core; + struct sk_buff *tx_skb; + struct mlxsw_tx_info tx_info; + struct delayed_work timeout_dw; + unsigned int retries; + u64 tid; + struct completion completion; + atomic_t active; + mlxsw_reg_trans_cb_t *cb; + unsigned long cb_priv; + const struct mlxsw_reg_info *reg; + enum mlxsw_core_reg_access_type type; + int err; + enum mlxsw_emad_op_tlv_status emad_status; + struct rcu_head rcu; +}; + +#define MLXSW_EMAD_TIMEOUT_MS 200 + +static void mlxsw_emad_trans_timeout_schedule(struct mlxsw_reg_trans *trans) { - return mlxsw_emad_process_status(mlxsw_core, mlxsw_emad_op_tlv(skb)); + unsigned long timeout = msecs_to_jiffies(MLXSW_EMAD_TIMEOUT_MS); + + mlxsw_core_schedule_dw(&trans->timeout_dw, timeout); } static int mlxsw_emad_transmit(struct mlxsw_core *mlxsw_core, - struct sk_buff *skb, - const struct mlxsw_tx_info *tx_info) + struct mlxsw_reg_trans *trans) { - struct sk_buff *trans_skb; - int n_retry; + struct sk_buff *skb; int err; - n_retry = 0; -retry: - /* We copy the EMAD to a new skb, since we might need - * to retransmit it in case of failure. - */ - trans_skb = skb_copy(skb, GFP_KERNEL); - if (!trans_skb) { - err = -ENOMEM; - goto out; + skb = skb_copy(trans->tx_skb, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + atomic_set(&trans->active, 1); + err = mlxsw_core_skb_transmit(mlxsw_core, skb, &trans->tx_info); + if (err) { + dev_kfree_skb(skb); + return err; } + mlxsw_emad_trans_timeout_schedule(trans); + return 0; +} - err = __mlxsw_emad_transmit(mlxsw_core, trans_skb, tx_info); - if (!err) { - struct sk_buff *resp_skb = mlxsw_core->emad.resp_skb; +static void mlxsw_emad_trans_finish(struct mlxsw_reg_trans *trans, int err) +{ + struct mlxsw_core *mlxsw_core = trans->core; + + dev_kfree_skb(trans->tx_skb); + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_del_rcu(&trans->list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + trans->err = err; + complete(&trans->completion); +} - err = mlxsw_emad_process_status_skb(mlxsw_core, resp_skb); - if (err) - dev_kfree_skb(resp_skb); - if (!err || err != -EAGAIN) - goto out; +static void mlxsw_emad_transmit_retry(struct mlxsw_core *mlxsw_core, + struct mlxsw_reg_trans *trans) +{ + int err; + + if (trans->retries < MLXSW_EMAD_MAX_RETRY) { + trans->retries++; + err = mlxsw_emad_transmit(trans->core, trans); + if (err == 0) + return; + } else { + err = -EIO; } - if (n_retry++ < MLXSW_EMAD_MAX_RETRY) - goto retry; + mlxsw_emad_trans_finish(trans, err); +} -out: - dev_kfree_skb(skb); - mlxsw_core->emad.tid++; - return err; +static void mlxsw_emad_trans_timeout_work(struct work_struct *work) +{ + struct mlxsw_reg_trans *trans = container_of(work, + struct mlxsw_reg_trans, + timeout_dw.work); + + if (!atomic_dec_and_test(&trans->active)) + return; + + mlxsw_emad_transmit_retry(trans->core, trans); +} + +static void mlxsw_emad_process_response(struct mlxsw_core *mlxsw_core, + struct mlxsw_reg_trans *trans, + struct sk_buff *skb) +{ + int err; + + if (!atomic_dec_and_test(&trans->active)) + return; + + err = mlxsw_emad_process_status_skb(skb, &trans->emad_status); + if (err == -EAGAIN) { + mlxsw_emad_transmit_retry(mlxsw_core, trans); + } else { + if (err == 0) { + char *op_tlv = mlxsw_emad_op_tlv(skb); + + if (trans->cb) + trans->cb(mlxsw_core, + mlxsw_emad_reg_payload(op_tlv), + trans->reg->len, trans->cb_priv); + } + mlxsw_emad_trans_finish(trans, err); + } } +/* called with rcu read lock held */ static void mlxsw_emad_rx_listener_func(struct sk_buff *skb, u8 local_port, void *priv) { struct mlxsw_core *mlxsw_core = priv; + struct mlxsw_reg_trans *trans; - if (mlxsw_emad_is_resp(skb) && - mlxsw_core->emad.trans_active && - mlxsw_emad_get_tid(skb) == mlxsw_core->emad.tid) { - mlxsw_core->emad.resp_skb = skb; - mlxsw_core->emad.trans_active = false; - wake_up(&mlxsw_core->emad.wait); - } else { - dev_kfree_skb(skb); + if (!mlxsw_emad_is_resp(skb)) + goto free_skb; + + list_for_each_entry_rcu(trans, &mlxsw_core->emad.trans_list, list) { + if (mlxsw_emad_get_tid(skb) == trans->tid) { + mlxsw_emad_process_response(mlxsw_core, trans, skb); + break; + } } + +free_skb: + dev_kfree_skb(skb); } static const struct mlxsw_rx_listener mlxsw_emad_rx_listener = { @@ -522,18 +567,19 @@ static int mlxsw_emad_traps_set(struct mlxsw_core *mlxsw_core) static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) { + u64 tid; int err; /* Set the upper 32 bits of the transaction ID field to a random * number. This allows us to discard EMADs addressed to other * devices. */ - get_random_bytes(&mlxsw_core->emad.tid, 4); - mlxsw_core->emad.tid = mlxsw_core->emad.tid << 32; + get_random_bytes(&tid, 4); + tid <<= 32; + atomic64_set(&mlxsw_core->emad.tid, tid); - init_waitqueue_head(&mlxsw_core->emad.wait); - mlxsw_core->emad.trans_active = false; - mutex_init(&mlxsw_core->emad.lock); + INIT_LIST_HEAD(&mlxsw_core->emad.trans_list); + spin_lock_init(&mlxsw_core->emad.trans_list_lock); err = mlxsw_core_rx_listener_register(mlxsw_core, &mlxsw_emad_rx_listener, @@ -591,6 +637,59 @@ static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core, return skb; } +static int mlxsw_emad_reg_access(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, + char *payload, + enum mlxsw_core_reg_access_type type, + struct mlxsw_reg_trans *trans, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, + unsigned long cb_priv, u64 tid) +{ + struct sk_buff *skb; + int err; + + dev_dbg(mlxsw_core->bus_info->dev, "EMAD reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", + trans->tid, reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + + skb = mlxsw_emad_alloc(mlxsw_core, reg->len); + if (!skb) + return -ENOMEM; + + list_add_tail(&trans->bulk_list, bulk_list); + trans->core = mlxsw_core; + trans->tx_skb = skb; + trans->tx_info.local_port = MLXSW_PORT_CPU_PORT; + trans->tx_info.is_emad = true; + INIT_DELAYED_WORK(&trans->timeout_dw, mlxsw_emad_trans_timeout_work); + trans->tid = tid; + init_completion(&trans->completion); + trans->cb = cb; + trans->cb_priv = cb_priv; + trans->reg = reg; + trans->type = type; + + mlxsw_emad_construct(skb, reg, payload, type, trans->tid); + mlxsw_core->driver->txhdr_construct(skb, &trans->tx_info); + + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_add_tail_rcu(&trans->list, &mlxsw_core->emad.trans_list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + err = mlxsw_emad_transmit(mlxsw_core, trans); + if (err) + goto err_out; + return 0; + +err_out: + spin_lock_bh(&mlxsw_core->emad.trans_list_lock); + list_del_rcu(&trans->list); + spin_unlock_bh(&mlxsw_core->emad.trans_list_lock); + list_del(&trans->bulk_list); + dev_kfree_skb(trans->tx_skb); + return err; +} + /***************** * Core functions *****************/ @@ -680,24 +779,6 @@ static const struct file_operations mlxsw_core_rx_stats_dbg_ops = { .llseek = seq_lseek }; -static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, - const char *buf, size_t size) -{ - __be32 *m = (__be32 *) buf; - int i; - int count = size / sizeof(__be32); - - for (i = count - 1; i >= 0; i--) - if (m[i]) - break; - i++; - count = i ? i : 1; - for (i = 0; i < count; i += 4) - dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n", - i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]), - be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3])); -} - int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver) { spin_lock(&mlxsw_core_driver_list_lock); @@ -795,8 +876,7 @@ static int mlxsw_devlink_port_split(struct devlink *devlink, return -EINVAL; if (!mlxsw_core->driver->port_split) return -EOPNOTSUPP; - return mlxsw_core->driver->port_split(mlxsw_core->driver_priv, - port_index, count); + return mlxsw_core->driver->port_split(mlxsw_core, port_index, count); } static int mlxsw_devlink_port_unsplit(struct devlink *devlink, @@ -808,13 +888,171 @@ static int mlxsw_devlink_port_unsplit(struct devlink *devlink, return -EINVAL; if (!mlxsw_core->driver->port_unsplit) return -EOPNOTSUPP; - return mlxsw_core->driver->port_unsplit(mlxsw_core->driver_priv, - port_index); + return mlxsw_core->driver->port_unsplit(mlxsw_core, port_index); +} + +static int +mlxsw_devlink_sb_pool_get(struct devlink *devlink, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_pool_get(mlxsw_core, sb_index, + pool_index, pool_info); +} + +static int +mlxsw_devlink_sb_pool_set(struct devlink *devlink, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_pool_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_pool_set(mlxsw_core, sb_index, + pool_index, size, threshold_type); +} + +static void *__dl_port(struct devlink_port *devlink_port) +{ + return container_of(devlink_port, struct mlxsw_core_port, devlink_port); +} + +static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_port_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_port_pool_get(mlxsw_core_port, sb_index, + pool_index, p_threshold); +} + +static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_port_pool_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index, + pool_index, threshold); +} + +static int +mlxsw_devlink_sb_tc_pool_bind_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_tc_pool_bind_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_tc_pool_bind_get(mlxsw_core_port, sb_index, + tc_index, pool_type, + p_pool_index, p_threshold); +} + +static int +mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_tc_pool_bind_set) + return -EOPNOTSUPP; + return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index, + tc_index, pool_type, + pool_index, threshold); +} + +static int mlxsw_devlink_sb_occ_snapshot(struct devlink *devlink, + unsigned int sb_index) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_occ_snapshot) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_snapshot(mlxsw_core, sb_index); +} + +static int mlxsw_devlink_sb_occ_max_clear(struct devlink *devlink, + unsigned int sb_index) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + + if (!mlxsw_driver->sb_occ_max_clear) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_max_clear(mlxsw_core, sb_index); +} + +static int +mlxsw_devlink_sb_occ_port_pool_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_occ_port_pool_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_port_pool_get(mlxsw_core_port, sb_index, + pool_index, p_cur, p_max); +} + +static int +mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink); + struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver; + struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port); + + if (!mlxsw_driver->sb_occ_tc_port_bind_get) + return -EOPNOTSUPP; + return mlxsw_driver->sb_occ_tc_port_bind_get(mlxsw_core_port, + sb_index, tc_index, + pool_type, p_cur, p_max); } static const struct devlink_ops mlxsw_devlink_ops = { - .port_split = mlxsw_devlink_port_split, - .port_unsplit = mlxsw_devlink_port_unsplit, + .port_split = mlxsw_devlink_port_split, + .port_unsplit = mlxsw_devlink_port_unsplit, + .sb_pool_get = mlxsw_devlink_sb_pool_get, + .sb_pool_set = mlxsw_devlink_sb_pool_set, + .sb_port_pool_get = mlxsw_devlink_sb_port_pool_get, + .sb_port_pool_set = mlxsw_devlink_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_devlink_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_devlink_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_devlink_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_devlink_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_devlink_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_devlink_sb_occ_tc_port_bind_get, }; int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, @@ -880,8 +1118,7 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (err) goto err_devlink_register; - err = mlxsw_driver->init(mlxsw_core->driver_priv, mlxsw_core, - mlxsw_bus_info); + err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info); if (err) goto err_driver_init; @@ -892,7 +1129,7 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, return 0; err_debugfs_init: - mlxsw_core->driver->fini(mlxsw_core->driver_priv); + mlxsw_core->driver->fini(mlxsw_core); err_driver_init: devlink_unregister(devlink); err_devlink_register: @@ -918,7 +1155,7 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) struct devlink *devlink = priv_to_devlink(mlxsw_core); mlxsw_core_debugfs_fini(mlxsw_core); - mlxsw_core->driver->fini(mlxsw_core->driver_priv); + mlxsw_core->driver->fini(mlxsw_core); devlink_unregister(devlink); mlxsw_emad_fini(mlxsw_core); mlxsw_core->bus->fini(mlxsw_core->bus_priv); @@ -929,26 +1166,17 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core) } EXPORT_SYMBOL(mlxsw_core_bus_device_unregister); -static struct mlxsw_core *__mlxsw_core_get(void *driver_priv) -{ - return container_of(driver_priv, struct mlxsw_core, driver_priv); -} - -bool mlxsw_core_skb_transmit_busy(void *driver_priv, +bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info) { - struct mlxsw_core *mlxsw_core = __mlxsw_core_get(driver_priv); - return mlxsw_core->bus->skb_transmit_busy(mlxsw_core->bus_priv, tx_info); } EXPORT_SYMBOL(mlxsw_core_skb_transmit_busy); -int mlxsw_core_skb_transmit(void *driver_priv, struct sk_buff *skb, +int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info) { - struct mlxsw_core *mlxsw_core = __mlxsw_core_get(driver_priv); - return mlxsw_core->bus->skb_transmit(mlxsw_core->bus_priv, skb, tx_info); } @@ -1108,56 +1336,112 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_event_listener_unregister); +static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core) +{ + return atomic64_inc_return(&mlxsw_core->emad.tid); +} + static int mlxsw_core_reg_access_emad(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload, - enum mlxsw_core_reg_access_type type) + enum mlxsw_core_reg_access_type type, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, + unsigned long cb_priv) { + u64 tid = mlxsw_core_tid_get(mlxsw_core); + struct mlxsw_reg_trans *trans; int err; - char *op_tlv; - struct sk_buff *skb; - struct mlxsw_tx_info tx_info = { - .local_port = MLXSW_PORT_CPU_PORT, - .is_emad = true, - }; - skb = mlxsw_emad_alloc(mlxsw_core, reg->len); - if (!skb) + trans = kzalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) return -ENOMEM; - mlxsw_emad_construct(skb, reg, payload, type, mlxsw_core); - mlxsw_core->driver->txhdr_construct(skb, &tx_info); + err = mlxsw_emad_reg_access(mlxsw_core, reg, payload, type, trans, + bulk_list, cb, cb_priv, tid); + if (err) { + kfree(trans); + return err; + } + return 0; +} - dev_dbg(mlxsw_core->bus_info->dev, "EMAD send (tid=%llx)\n", - mlxsw_core->emad.tid); - mlxsw_core_buf_dump_dbg(mlxsw_core, skb->data, skb->len); +int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv) +{ + return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_QUERY, + bulk_list, cb, cb_priv); +} +EXPORT_SYMBOL(mlxsw_reg_trans_query); - err = mlxsw_emad_transmit(mlxsw_core, skb, &tx_info); - if (!err) { - op_tlv = mlxsw_emad_op_tlv(mlxsw_core->emad.resp_skb); - memcpy(payload, mlxsw_emad_reg_payload(op_tlv), - reg->len); +int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv) +{ + return mlxsw_core_reg_access_emad(mlxsw_core, reg, payload, + MLXSW_CORE_REG_ACCESS_TYPE_WRITE, + bulk_list, cb, cb_priv); +} +EXPORT_SYMBOL(mlxsw_reg_trans_write); - dev_dbg(mlxsw_core->bus_info->dev, "EMAD recv (tid=%llx)\n", - mlxsw_core->emad.tid - 1); - mlxsw_core_buf_dump_dbg(mlxsw_core, - mlxsw_core->emad.resp_skb->data, - mlxsw_core->emad.resp_skb->len); +static int mlxsw_reg_trans_wait(struct mlxsw_reg_trans *trans) +{ + struct mlxsw_core *mlxsw_core = trans->core; + int err; - dev_kfree_skb(mlxsw_core->emad.resp_skb); - } + wait_for_completion(&trans->completion); + cancel_delayed_work_sync(&trans->timeout_dw); + err = trans->err; + if (trans->retries) + dev_warn(mlxsw_core->bus_info->dev, "EMAD retries (%d/%d) (tid=%llx)\n", + trans->retries, MLXSW_EMAD_MAX_RETRY, trans->tid); + if (err) + dev_err(mlxsw_core->bus_info->dev, "EMAD reg access failed (tid=%llx,reg_id=%x(%s),type=%s,status=%x(%s))\n", + trans->tid, trans->reg->id, + mlxsw_reg_id_str(trans->reg->id), + mlxsw_core_reg_access_type_str(trans->type), + trans->emad_status, + mlxsw_emad_op_tlv_status_str(trans->emad_status)); + + list_del(&trans->bulk_list); + kfree_rcu(trans, rcu); return err; } +int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list) +{ + struct mlxsw_reg_trans *trans; + struct mlxsw_reg_trans *tmp; + int sum_err = 0; + int err; + + list_for_each_entry_safe(trans, tmp, bulk_list, bulk_list) { + err = mlxsw_reg_trans_wait(trans); + if (err && sum_err == 0) + sum_err = err; /* first error to be returned */ + } + return sum_err; +} +EXPORT_SYMBOL(mlxsw_reg_trans_bulk_wait); + static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type) { + enum mlxsw_emad_op_tlv_status status; int err, n_retry; char *in_mbox, *out_mbox, *tmp; + dev_dbg(mlxsw_core->bus_info->dev, "Reg cmd access (reg_id=%x(%s),type=%s)\n", + reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); + in_mbox = mlxsw_cmd_mbox_alloc(); if (!in_mbox) return -ENOMEM; @@ -1168,7 +1452,8 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, goto free_in_mbox; } - mlxsw_emad_pack_op_tlv(in_mbox, reg, type, mlxsw_core); + mlxsw_emad_pack_op_tlv(in_mbox, reg, type, + mlxsw_core_tid_get(mlxsw_core)); tmp = in_mbox + MLXSW_EMAD_OP_TLV_LEN * sizeof(u32); mlxsw_emad_pack_reg_tlv(tmp, reg, payload); @@ -1176,60 +1461,61 @@ static int mlxsw_core_reg_access_cmd(struct mlxsw_core *mlxsw_core, retry: err = mlxsw_cmd_access_reg(mlxsw_core, in_mbox, out_mbox); if (!err) { - err = mlxsw_emad_process_status(mlxsw_core, out_mbox); - if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY) - goto retry; + err = mlxsw_emad_process_status(out_mbox, &status); + if (err) { + if (err == -EAGAIN && n_retry++ < MLXSW_EMAD_MAX_RETRY) + goto retry; + dev_err(mlxsw_core->bus_info->dev, "Reg cmd access status failed (status=%x(%s))\n", + status, mlxsw_emad_op_tlv_status_str(status)); + } } if (!err) memcpy(payload, mlxsw_emad_reg_payload(out_mbox), reg->len); - mlxsw_core->emad.tid++; mlxsw_cmd_mbox_free(out_mbox); free_in_mbox: mlxsw_cmd_mbox_free(in_mbox); + if (err) + dev_err(mlxsw_core->bus_info->dev, "Reg cmd access failed (reg_id=%x(%s),type=%s)\n", + reg->id, mlxsw_reg_id_str(reg->id), + mlxsw_core_reg_access_type_str(type)); return err; } +static void mlxsw_core_reg_access_cb(struct mlxsw_core *mlxsw_core, + char *payload, size_t payload_len, + unsigned long cb_priv) +{ + char *orig_payload = (char *) cb_priv; + + memcpy(orig_payload, payload, payload_len); +} + static int mlxsw_core_reg_access(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload, enum mlxsw_core_reg_access_type type) { - u64 cur_tid; + LIST_HEAD(bulk_list); int err; - if (mutex_lock_interruptible(&mlxsw_core->emad.lock)) { - dev_err(mlxsw_core->bus_info->dev, "Reg access interrupted (reg_id=%x(%s),type=%s)\n", - reg->id, mlxsw_reg_id_str(reg->id), - mlxsw_core_reg_access_type_str(type)); - return -EINTR; - } - - cur_tid = mlxsw_core->emad.tid; - dev_dbg(mlxsw_core->bus_info->dev, "Reg access (tid=%llx,reg_id=%x(%s),type=%s)\n", - cur_tid, reg->id, mlxsw_reg_id_str(reg->id), - mlxsw_core_reg_access_type_str(type)); - /* During initialization EMAD interface is not available to us, * so we default to command interface. We switch to EMAD interface * after setting the appropriate traps. */ if (!mlxsw_core->emad.use_emad) - err = mlxsw_core_reg_access_cmd(mlxsw_core, reg, - payload, type); - else - err = mlxsw_core_reg_access_emad(mlxsw_core, reg, + return mlxsw_core_reg_access_cmd(mlxsw_core, reg, payload, type); + err = mlxsw_core_reg_access_emad(mlxsw_core, reg, + payload, type, &bulk_list, + mlxsw_core_reg_access_cb, + (unsigned long) payload); if (err) - dev_err(mlxsw_core->bus_info->dev, "Reg access failed (tid=%llx,reg_id=%x(%s),type=%s)\n", - cur_tid, reg->id, mlxsw_reg_id_str(reg->id), - mlxsw_core_reg_access_type_str(type)); - - mutex_unlock(&mlxsw_core->emad.lock); - return err; + return err; + return mlxsw_reg_trans_bulk_wait(&bulk_list); } int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, @@ -1358,6 +1644,46 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear); +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, + struct mlxsw_core_port *mlxsw_core_port, u8 local_port, + struct net_device *dev, bool split, u32 split_group) +{ + struct devlink *devlink = priv_to_devlink(mlxsw_core); + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + if (split) + devlink_port_split_set(devlink_port, split_group); + devlink_port_type_eth_set(devlink_port, dev); + return devlink_port_register(devlink, devlink_port, local_port); +} +EXPORT_SYMBOL(mlxsw_core_port_init); + +void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port) +{ + struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port; + + devlink_port_unregister(devlink_port); +} +EXPORT_SYMBOL(mlxsw_core_port_fini); + +static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core, + const char *buf, size_t size) +{ + __be32 *m = (__be32 *) buf; + int i; + int count = size / sizeof(__be32); + + for (i = count - 1; i >= 0; i--) + if (m[i]) + break; + i++; + count = i ? i : 1; + for (i = 0; i < count; i += 4) + dev_dbg(mlxsw_core->bus_info->dev, "%04x - %08x %08x %08x %08x\n", + i * 4, be32_to_cpu(m[i]), be32_to_cpu(m[i + 1]), + be32_to_cpu(m[i + 2]), be32_to_cpu(m[i + 3])); +} + int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, u32 in_mod, bool out_mbox_direct, char *in_mbox, size_t in_mbox_size, @@ -1400,17 +1726,35 @@ int mlxsw_cmd_exec(struct mlxsw_core *mlxsw_core, u16 opcode, u8 opcode_mod, } EXPORT_SYMBOL(mlxsw_cmd_exec); +int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay) +{ + return queue_delayed_work(mlxsw_wq, dwork, delay); +} +EXPORT_SYMBOL(mlxsw_core_schedule_dw); + static int __init mlxsw_core_module_init(void) { - mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); - if (!mlxsw_core_dbg_root) + int err; + + mlxsw_wq = create_workqueue(mlxsw_core_driver_name); + if (!mlxsw_wq) return -ENOMEM; + mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL); + if (!mlxsw_core_dbg_root) { + err = -ENOMEM; + goto err_debugfs_create_dir; + } return 0; + +err_debugfs_create_dir: + destroy_workqueue(mlxsw_wq); + return err; } static void __exit mlxsw_core_module_exit(void) { debugfs_remove_recursive(mlxsw_core_dbg_root); + destroy_workqueue(mlxsw_wq); } module_init(mlxsw_core_module_init); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index c73d1c0792a6..436bc49df6ab 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -43,6 +43,8 @@ #include <linux/gfp.h> #include <linux/types.h> #include <linux/skbuff.h> +#include <linux/workqueue.h> +#include <net/devlink.h> #include "trap.h" #include "reg.h" @@ -61,6 +63,8 @@ struct mlxsw_driver; struct mlxsw_bus; struct mlxsw_bus_info; +void *mlxsw_core_driver_priv(struct mlxsw_core *mlxsw_core); + int mlxsw_core_driver_register(struct mlxsw_driver *mlxsw_driver); void mlxsw_core_driver_unregister(struct mlxsw_driver *mlxsw_driver); @@ -74,10 +78,9 @@ struct mlxsw_tx_info { bool is_emad; }; -bool mlxsw_core_skb_transmit_busy(void *driver_priv, +bool mlxsw_core_skb_transmit_busy(struct mlxsw_core *mlxsw_core, const struct mlxsw_tx_info *tx_info); - -int mlxsw_core_skb_transmit(void *driver_priv, struct sk_buff *skb, +int mlxsw_core_skb_transmit(struct mlxsw_core *mlxsw_core, struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); struct mlxsw_rx_listener { @@ -106,6 +109,19 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core, const struct mlxsw_event_listener *el, void *priv); +typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload, + size_t payload_len, unsigned long cb_priv); + +int mlxsw_reg_trans_query(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv); +int mlxsw_reg_trans_write(struct mlxsw_core *mlxsw_core, + const struct mlxsw_reg_info *reg, char *payload, + struct list_head *bulk_list, + mlxsw_reg_trans_cb_t *cb, unsigned long cb_priv); +int mlxsw_reg_trans_bulk_wait(struct list_head *bulk_list); + int mlxsw_reg_query(struct mlxsw_core *mlxsw_core, const struct mlxsw_reg_info *reg, char *payload); int mlxsw_reg_write(struct mlxsw_core *mlxsw_core, @@ -131,6 +147,26 @@ u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core, void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, u16 lag_id, u8 local_port); +struct mlxsw_core_port { + struct devlink_port devlink_port; +}; + +static inline void * +mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port) +{ + /* mlxsw_core_port is ensured to always be the first field in driver + * port structure. + */ + return mlxsw_core_port; +} + +int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, + struct mlxsw_core_port *mlxsw_core_port, u8 local_port, + struct net_device *dev, bool split, u32 split_group); +void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port); + +int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay); + #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8 struct mlxsw_swid_config { @@ -183,11 +219,43 @@ struct mlxsw_driver { const char *kind; struct module *owner; size_t priv_size; - int (*init)(void *driver_priv, struct mlxsw_core *mlxsw_core, + int (*init)(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info); - void (*fini)(void *driver_priv); - int (*port_split)(void *driver_priv, u8 local_port, unsigned int count); - int (*port_unsplit)(void *driver_priv, u8 local_port); + void (*fini)(struct mlxsw_core *mlxsw_core); + int (*port_split)(struct mlxsw_core *mlxsw_core, u8 local_port, + unsigned int count); + int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u8 local_port); + int (*sb_pool_get)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); + int (*sb_pool_set)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); + int (*sb_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); + int (*sb_port_pool_set)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold); + int (*sb_tc_pool_bind_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); + int (*sb_tc_pool_bind_set)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold); + int (*sb_occ_snapshot)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); + int (*sb_occ_max_clear)(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); + int (*sb_occ_port_pool_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); + int (*sb_occ_tc_port_bind_get)(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); void (*txhdr_construct)(struct sk_buff *skb, const struct mlxsw_tx_info *tx_info); u8 txhdr_len; diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index ffe4c0305733..1977e7a5c530 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -1805,6 +1805,184 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port, } } +/* QTCT - QoS Switch Traffic Class Table + * ------------------------------------- + * Configures the mapping between the packet switch priority and the + * traffic class on the transmit port. + */ +#define MLXSW_REG_QTCT_ID 0x400A +#define MLXSW_REG_QTCT_LEN 0x08 + +static const struct mlxsw_reg_info mlxsw_reg_qtct = { + .id = MLXSW_REG_QTCT_ID, + .len = MLXSW_REG_QTCT_LEN, +}; + +/* reg_qtct_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is not supported. + */ +MLXSW_ITEM32(reg, qtct, local_port, 0x00, 16, 8); + +/* reg_qtct_sub_port + * Virtual port within the physical port. + * Should be set to 0 when virtual ports are not enabled on the port. + * Access: Index + */ +MLXSW_ITEM32(reg, qtct, sub_port, 0x00, 8, 8); + +/* reg_qtct_switch_prio + * Switch priority. + * Access: Index + */ +MLXSW_ITEM32(reg, qtct, switch_prio, 0x00, 0, 4); + +/* reg_qtct_tclass + * Traffic class. + * Default values: + * switch_prio 0 : tclass 1 + * switch_prio 1 : tclass 0 + * switch_prio i : tclass i, for i > 1 + * Access: RW + */ +MLXSW_ITEM32(reg, qtct, tclass, 0x04, 0, 4); + +static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port, + u8 switch_prio, u8 tclass) +{ + MLXSW_REG_ZERO(qtct, payload); + mlxsw_reg_qtct_local_port_set(payload, local_port); + mlxsw_reg_qtct_switch_prio_set(payload, switch_prio); + mlxsw_reg_qtct_tclass_set(payload, tclass); +} + +/* QEEC - QoS ETS Element Configuration Register + * --------------------------------------------- + * Configures the ETS elements. + */ +#define MLXSW_REG_QEEC_ID 0x400D +#define MLXSW_REG_QEEC_LEN 0x1C + +static const struct mlxsw_reg_info mlxsw_reg_qeec = { + .id = MLXSW_REG_QEEC_ID, + .len = MLXSW_REG_QEEC_LEN, +}; + +/* reg_qeec_local_port + * Local port number. + * Access: Index + * + * Note: CPU port is supported. + */ +MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8); + +enum mlxsw_reg_qeec_hr { + MLXSW_REG_QEEC_HIERARCY_PORT, + MLXSW_REG_QEEC_HIERARCY_GROUP, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + MLXSW_REG_QEEC_HIERARCY_TC, +}; + +/* reg_qeec_element_hierarchy + * 0 - Port + * 1 - Group + * 2 - Subgroup + * 3 - Traffic Class + * Access: Index + */ +MLXSW_ITEM32(reg, qeec, element_hierarchy, 0x04, 16, 4); + +/* reg_qeec_element_index + * The index of the element in the hierarchy. + * Access: Index + */ +MLXSW_ITEM32(reg, qeec, element_index, 0x04, 0, 8); + +/* reg_qeec_next_element_index + * The index of the next (lower) element in the hierarchy. + * Access: RW + * + * Note: Reserved for element_hierarchy 0. + */ +MLXSW_ITEM32(reg, qeec, next_element_index, 0x08, 0, 8); + +enum { + MLXSW_REG_QEEC_BYTES_MODE, + MLXSW_REG_QEEC_PACKETS_MODE, +}; + +/* reg_qeec_pb + * Packets or bytes mode. + * 0 - Bytes mode + * 1 - Packets mode + * Access: RW + * + * Note: Used for max shaper configuration. For Spectrum, packets mode + * is supported only for traffic classes of CPU port. + */ +MLXSW_ITEM32(reg, qeec, pb, 0x0C, 28, 1); + +/* reg_qeec_mase + * Max shaper configuration enable. Enables configuration of the max + * shaper on this ETS element. + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, mase, 0x10, 31, 1); + +/* A large max rate will disable the max shaper. */ +#define MLXSW_REG_QEEC_MAS_DIS 200000000 /* Kbps */ + +/* reg_qeec_max_shaper_rate + * Max shaper information rate. + * For CPU port, can only be configured for port hierarchy. + * When in bytes mode, value is specified in units of 1000bps. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, max_shaper_rate, 0x10, 0, 28); + +/* reg_qeec_de + * DWRR configuration enable. Enables configuration of the dwrr and + * dwrr_weight. + * 0 - Disable + * 1 - Enable + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, de, 0x18, 31, 1); + +/* reg_qeec_dwrr + * Transmission selection algorithm to use on the link going down from + * the ETS element. + * 0 - Strict priority + * 1 - DWRR + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, dwrr, 0x18, 15, 1); + +/* reg_qeec_dwrr_weight + * DWRR weight on the link going down from the ETS element. The + * percentage of bandwidth guaranteed to an ETS element within + * its hierarchy. The sum of all weights across all ETS elements + * within one hierarchy should be equal to 100. Reserved when + * transmission selection algorithm is strict priority. + * Access: RW + */ +MLXSW_ITEM32(reg, qeec, dwrr_weight, 0x18, 0, 8); + +static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index) +{ + MLXSW_REG_ZERO(qeec, payload); + mlxsw_reg_qeec_local_port_set(payload, local_port); + mlxsw_reg_qeec_element_hierarchy_set(payload, hr); + mlxsw_reg_qeec_element_index_set(payload, index); + mlxsw_reg_qeec_next_element_index_set(payload, next_index); +} + /* PMLP - Ports Module to Local Port Register * ------------------------------------------ * Configures the assignment of modules to local ports. @@ -2141,6 +2319,145 @@ static inline void mlxsw_reg_paos_pack(char *payload, u8 local_port, mlxsw_reg_paos_e_set(payload, 1); } +/* PFCC - Ports Flow Control Configuration Register + * ------------------------------------------------ + * Configures and retrieves the per port flow control configuration. + */ +#define MLXSW_REG_PFCC_ID 0x5007 +#define MLXSW_REG_PFCC_LEN 0x20 + +static const struct mlxsw_reg_info mlxsw_reg_pfcc = { + .id = MLXSW_REG_PFCC_ID, + .len = MLXSW_REG_PFCC_LEN, +}; + +/* reg_pfcc_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pfcc, local_port, 0x00, 16, 8); + +/* reg_pfcc_pnat + * Port number access type. Determines the way local_port is interpreted: + * 0 - Local port number. + * 1 - IB / label port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pfcc, pnat, 0x00, 14, 2); + +/* reg_pfcc_shl_cap + * Send to higher layers capabilities: + * 0 - No capability of sending Pause and PFC frames to higher layers. + * 1 - Device has capability of sending Pause and PFC frames to higher + * layers. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, shl_cap, 0x00, 1, 1); + +/* reg_pfcc_shl_opr + * Send to higher layers operation: + * 0 - Pause and PFC frames are handled by the port (default). + * 1 - Pause and PFC frames are handled by the port and also sent to + * higher layers. Only valid if shl_cap = 1. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, shl_opr, 0x00, 0, 1); + +/* reg_pfcc_ppan + * Pause policy auto negotiation. + * 0 - Disabled. Generate / ignore Pause frames based on pptx / pprtx. + * 1 - Enabled. When auto-negotiation is performed, set the Pause policy + * based on the auto-negotiation resolution. + * Access: RW + * + * Note: The auto-negotiation advertisement is set according to pptx and + * pprtx. When PFC is set on Tx / Rx, ppan must be set to 0. + */ +MLXSW_ITEM32(reg, pfcc, ppan, 0x04, 28, 4); + +/* reg_pfcc_prio_mask_tx + * Bit per priority indicating if Tx flow control policy should be + * updated based on bit pfctx. + * Access: WO + */ +MLXSW_ITEM32(reg, pfcc, prio_mask_tx, 0x04, 16, 8); + +/* reg_pfcc_prio_mask_rx + * Bit per priority indicating if Rx flow control policy should be + * updated based on bit pfcrx. + * Access: WO + */ +MLXSW_ITEM32(reg, pfcc, prio_mask_rx, 0x04, 0, 8); + +/* reg_pfcc_pptx + * Admin Pause policy on Tx. + * 0 - Never generate Pause frames (default). + * 1 - Generate Pause frames according to Rx buffer threshold. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pptx, 0x08, 31, 1); + +/* reg_pfcc_aptx + * Active (operational) Pause policy on Tx. + * 0 - Never generate Pause frames. + * 1 - Generate Pause frames according to Rx buffer threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, aptx, 0x08, 30, 1); + +/* reg_pfcc_pfctx + * Priority based flow control policy on Tx[7:0]. Per-priority bit mask: + * 0 - Never generate priority Pause frames on the specified priority + * (default). + * 1 - Generate priority Pause frames according to Rx buffer threshold on + * the specified priority. + * Access: RW + * + * Note: pfctx and pptx must be mutually exclusive. + */ +MLXSW_ITEM32(reg, pfcc, pfctx, 0x08, 16, 8); + +/* reg_pfcc_pprx + * Admin Pause policy on Rx. + * 0 - Ignore received Pause frames (default). + * 1 - Respect received Pause frames. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pprx, 0x0C, 31, 1); + +/* reg_pfcc_aprx + * Active (operational) Pause policy on Rx. + * 0 - Ignore received Pause frames. + * 1 - Respect received Pause frames. + * Access: RO + */ +MLXSW_ITEM32(reg, pfcc, aprx, 0x0C, 30, 1); + +/* reg_pfcc_pfcrx + * Priority based flow control policy on Rx[7:0]. Per-priority bit mask: + * 0 - Ignore incoming priority Pause frames on the specified priority + * (default). + * 1 - Respect incoming priority Pause frames on the specified priority. + * Access: RW + */ +MLXSW_ITEM32(reg, pfcc, pfcrx, 0x0C, 16, 8); + +#define MLXSW_REG_PFCC_ALL_PRIO 0xFF + +static inline void mlxsw_reg_pfcc_prio_pack(char *payload, u8 pfc_en) +{ + mlxsw_reg_pfcc_prio_mask_tx_set(payload, MLXSW_REG_PFCC_ALL_PRIO); + mlxsw_reg_pfcc_prio_mask_rx_set(payload, MLXSW_REG_PFCC_ALL_PRIO); + mlxsw_reg_pfcc_pfctx_set(payload, pfc_en); + mlxsw_reg_pfcc_pfcrx_set(payload, pfc_en); +} + +static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(pfcc, payload); + mlxsw_reg_pfcc_local_port_set(payload, local_port); +} + /* PPCNT - Ports Performance Counters Register * ------------------------------------------- * The PPCNT register retrieves per port performance counters. @@ -2180,6 +2497,11 @@ MLXSW_ITEM32(reg, ppcnt, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, ppcnt, pnat, 0x00, 14, 2); +enum mlxsw_reg_ppcnt_grp { + MLXSW_REG_PPCNT_IEEE_8023_CNT = 0x0, + MLXSW_REG_PPCNT_PRIO_CNT = 0x10, +}; + /* reg_ppcnt_grp * Performance counter group. * Group 63 indicates all groups. Only valid on Set() operation with @@ -2215,6 +2537,8 @@ MLXSW_ITEM32(reg, ppcnt, clr, 0x04, 31, 1); */ MLXSW_ITEM32(reg, ppcnt, prio_tc, 0x04, 0, 5); +/* Ethernet IEEE 802.3 Counter Group */ + /* reg_ppcnt_a_frames_transmitted_ok * Access: RO */ @@ -2329,15 +2653,145 @@ MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_received, MLXSW_ITEM64(reg, ppcnt, a_pause_mac_ctrl_frames_transmitted, 0x08 + 0x90, 0, 64); -static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port) +/* Ethernet Per Priority Group Counters */ + +/* reg_ppcnt_rx_octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_octets, 0x08 + 0x00, 0, 64); + +/* reg_ppcnt_rx_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_frames, 0x08 + 0x20, 0, 64); + +/* reg_ppcnt_tx_octets + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_octets, 0x08 + 0x28, 0, 64); + +/* reg_ppcnt_tx_frames + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_frames, 0x08 + 0x48, 0, 64); + +/* reg_ppcnt_rx_pause + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_pause, 0x08 + 0x50, 0, 64); + +/* reg_ppcnt_rx_pause_duration + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, rx_pause_duration, 0x08 + 0x58, 0, 64); + +/* reg_ppcnt_tx_pause + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause, 0x08 + 0x60, 0, 64); + +/* reg_ppcnt_tx_pause_duration + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause_duration, 0x08 + 0x68, 0, 64); + +/* reg_ppcnt_rx_pause_transition + * Access: RO + */ +MLXSW_ITEM64(reg, ppcnt, tx_pause_transition, 0x08 + 0x70, 0, 64); + +static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port, + enum mlxsw_reg_ppcnt_grp grp, + u8 prio_tc) { MLXSW_REG_ZERO(ppcnt, payload); mlxsw_reg_ppcnt_swid_set(payload, 0); mlxsw_reg_ppcnt_local_port_set(payload, local_port); mlxsw_reg_ppcnt_pnat_set(payload, 0); - mlxsw_reg_ppcnt_grp_set(payload, 0); + mlxsw_reg_ppcnt_grp_set(payload, grp); mlxsw_reg_ppcnt_clr_set(payload, 0); - mlxsw_reg_ppcnt_prio_tc_set(payload, 0); + mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc); +} + +/* PPTB - Port Prio To Buffer Register + * ----------------------------------- + * Configures the switch priority to buffer table. + */ +#define MLXSW_REG_PPTB_ID 0x500B +#define MLXSW_REG_PPTB_LEN 0x0C + +static const struct mlxsw_reg_info mlxsw_reg_pptb = { + .id = MLXSW_REG_PPTB_ID, + .len = MLXSW_REG_PPTB_LEN, +}; + +enum { + MLXSW_REG_PPTB_MM_UM, + MLXSW_REG_PPTB_MM_UNICAST, + MLXSW_REG_PPTB_MM_MULTICAST, +}; + +/* reg_pptb_mm + * Mapping mode. + * 0 - Map both unicast and multicast packets to the same buffer. + * 1 - Map only unicast packets. + * 2 - Map only multicast packets. + * Access: Index + * + * Note: SwitchX-2 only supports the first option. + */ +MLXSW_ITEM32(reg, pptb, mm, 0x00, 28, 2); + +/* reg_pptb_local_port + * Local port number. + * Access: Index + */ +MLXSW_ITEM32(reg, pptb, local_port, 0x00, 16, 8); + +/* reg_pptb_um + * Enables the update of the untagged_buf field. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, um, 0x00, 8, 1); + +/* reg_pptb_pm + * Enables the update of the prio_to_buff field. + * Bit <i> is a flag for updating the mapping for switch priority <i>. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, pm, 0x00, 0, 8); + +/* reg_pptb_prio_to_buff + * Mapping of switch priority <i> to one of the allocated receive port + * buffers. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, pptb, prio_to_buff, 0x04, 0x04, 4); + +/* reg_pptb_pm_msb + * Enables the update of the prio_to_buff field. + * Bit <i> is a flag for updating the mapping for switch priority <i+8>. + * Access: RW + */ +MLXSW_ITEM32(reg, pptb, pm_msb, 0x08, 24, 8); + +/* reg_pptb_untagged_buff + * Mapping of untagged frames to one of the allocated receive port buffers. + * Access: RW + * + * Note: In SwitchX-2 this field must be mapped to buffer 8. Reserved for + * Spectrum, as it maps untagged packets based on the default switch priority. + */ +MLXSW_ITEM32(reg, pptb, untagged_buff, 0x08, 0, 4); + +#define MLXSW_REG_PPTB_ALL_PRIO 0xFF + +static inline void mlxsw_reg_pptb_pack(char *payload, u8 local_port) +{ + MLXSW_REG_ZERO(pptb, payload); + mlxsw_reg_pptb_mm_set(payload, MLXSW_REG_PPTB_MM_UM); + mlxsw_reg_pptb_local_port_set(payload, local_port); + mlxsw_reg_pptb_pm_set(payload, MLXSW_REG_PPTB_ALL_PRIO); } /* PBMC - Port Buffer Management Control Register @@ -2346,7 +2800,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port) * allocation for different Prios, and the Pause threshold management. */ #define MLXSW_REG_PBMC_ID 0x500C -#define MLXSW_REG_PBMC_LEN 0x68 +#define MLXSW_REG_PBMC_LEN 0x6C static const struct mlxsw_reg_info mlxsw_reg_pbmc = { .id = MLXSW_REG_PBMC_ID, @@ -2374,6 +2828,8 @@ MLXSW_ITEM32(reg, pbmc, xoff_timer_value, 0x04, 16, 16); */ MLXSW_ITEM32(reg, pbmc, xoff_refresh, 0x04, 0, 16); +#define MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX 11 + /* reg_pbmc_buf_lossy * The field indicates if the buffer is lossy. * 0 - Lossless @@ -2398,6 +2854,30 @@ MLXSW_ITEM32_INDEXED(reg, pbmc, buf_epsb, 0x0C, 24, 1, 0x08, 0x00, false); */ MLXSW_ITEM32_INDEXED(reg, pbmc, buf_size, 0x0C, 0, 16, 0x08, 0x00, false); +/* reg_pbmc_buf_xoff_threshold + * Once the amount of data in the buffer goes above this value, device + * starts sending PFC frames for all priorities associated with the + * buffer. Units are represented in cells. Reserved in case of lossy + * buffer. + * Access: RW + * + * Note: In Spectrum, reserved for buffer[9]. + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xoff_threshold, 0x0C, 16, 16, + 0x08, 0x04, false); + +/* reg_pbmc_buf_xon_threshold + * When the amount of data in the buffer goes below this value, device + * stops sending PFC frames for the priorities associated with the + * buffer. Units are represented in cells. Reserved in case of lossy + * buffer. + * Access: RW + * + * Note: In Spectrum, reserved for buffer[9]. + */ +MLXSW_ITEM32_INDEXED(reg, pbmc, buf_xon_threshold, 0x0C, 0, 16, + 0x08, 0x04, false); + static inline void mlxsw_reg_pbmc_pack(char *payload, u8 local_port, u16 xoff_timer_value, u16 xoff_refresh) { @@ -2416,6 +2896,17 @@ static inline void mlxsw_reg_pbmc_lossy_buffer_pack(char *payload, mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size); } +static inline void mlxsw_reg_pbmc_lossless_buffer_pack(char *payload, + int buf_index, u16 size, + u16 threshold) +{ + mlxsw_reg_pbmc_buf_lossy_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_epsb_set(payload, buf_index, 0); + mlxsw_reg_pbmc_buf_size_set(payload, buf_index, size); + mlxsw_reg_pbmc_buf_xoff_threshold_set(payload, buf_index, threshold); + mlxsw_reg_pbmc_buf_xon_threshold_set(payload, buf_index, threshold); +} + /* PSPA - Port Switch Partition Allocation * --------------------------------------- * Controls the association of a port with a switch partition and enables @@ -2985,9 +3476,10 @@ static const struct mlxsw_reg_info mlxsw_reg_sbpr = { .len = MLXSW_REG_SBPR_LEN, }; -enum mlxsw_reg_sbpr_dir { - MLXSW_REG_SBPR_DIR_INGRESS, - MLXSW_REG_SBPR_DIR_EGRESS, +/* shared direstion enum for SBPR, SBCM, SBPM */ +enum mlxsw_reg_sbxx_dir { + MLXSW_REG_SBXX_DIR_INGRESS, + MLXSW_REG_SBXX_DIR_EGRESS, }; /* reg_sbpr_dir @@ -3020,7 +3512,7 @@ enum mlxsw_reg_sbpr_mode { MLXSW_ITEM32(reg, sbpr, mode, 0x08, 0, 4); static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool, - enum mlxsw_reg_sbpr_dir dir, + enum mlxsw_reg_sbxx_dir dir, enum mlxsw_reg_sbpr_mode mode, u32 size) { MLXSW_REG_ZERO(sbpr, payload); @@ -3062,11 +3554,6 @@ MLXSW_ITEM32(reg, sbcm, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, sbcm, pg_buff, 0x00, 8, 6); -enum mlxsw_reg_sbcm_dir { - MLXSW_REG_SBCM_DIR_INGRESS, - MLXSW_REG_SBCM_DIR_EGRESS, -}; - /* reg_sbcm_dir * Direction. * Access: Index @@ -3079,6 +3566,10 @@ MLXSW_ITEM32(reg, sbcm, dir, 0x00, 0, 2); */ MLXSW_ITEM32(reg, sbcm, min_buff, 0x18, 0, 24); +/* shared max_buff limits for dynamic threshold for SBCM, SBPM */ +#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN 1 +#define MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX 14 + /* reg_sbcm_max_buff * When the pool associated to the port-pg/tclass is configured to * static, Maximum buffer size for the limiter configured in cells. @@ -3099,7 +3590,7 @@ MLXSW_ITEM32(reg, sbcm, max_buff, 0x1C, 0, 24); MLXSW_ITEM32(reg, sbcm, pool, 0x24, 0, 4); static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, - enum mlxsw_reg_sbcm_dir dir, + enum mlxsw_reg_sbxx_dir dir, u32 min_buff, u32 max_buff, u8 pool) { MLXSW_REG_ZERO(sbcm, payload); @@ -3111,8 +3602,8 @@ static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff, mlxsw_reg_sbcm_pool_set(payload, pool); } -/* SBPM - Shared Buffer Class Management Register - * ---------------------------------------------- +/* SBPM - Shared Buffer Port Management Register + * --------------------------------------------- * The SBPM register configures and retrieves the shared buffer allocation * and configuration according to Port-Pool, including the definition * of the associated quota. @@ -3139,17 +3630,33 @@ MLXSW_ITEM32(reg, sbpm, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, sbpm, pool, 0x00, 8, 4); -enum mlxsw_reg_sbpm_dir { - MLXSW_REG_SBPM_DIR_INGRESS, - MLXSW_REG_SBPM_DIR_EGRESS, -}; - /* reg_sbpm_dir * Direction. * Access: Index */ MLXSW_ITEM32(reg, sbpm, dir, 0x00, 0, 2); +/* reg_sbpm_buff_occupancy + * Current buffer occupancy in cells. + * Access: RO + */ +MLXSW_ITEM32(reg, sbpm, buff_occupancy, 0x10, 0, 24); + +/* reg_sbpm_clr + * Clear Max Buffer Occupancy + * When this bit is set, max_buff_occupancy field is cleared (and a + * new max value is tracked from the time the clear was performed). + * Access: OP + */ +MLXSW_ITEM32(reg, sbpm, clr, 0x14, 31, 1); + +/* reg_sbpm_max_buff_occupancy + * Maximum value of buffer occupancy in cells monitored. Cleared by + * writing to the clr field. + * Access: RO + */ +MLXSW_ITEM32(reg, sbpm, max_buff_occupancy, 0x14, 0, 24); + /* reg_sbpm_min_buff * Minimum buffer size for the limiter, in cells. * Access: RW @@ -3170,17 +3677,25 @@ MLXSW_ITEM32(reg, sbpm, min_buff, 0x18, 0, 24); MLXSW_ITEM32(reg, sbpm, max_buff, 0x1C, 0, 24); static inline void mlxsw_reg_sbpm_pack(char *payload, u8 local_port, u8 pool, - enum mlxsw_reg_sbpm_dir dir, + enum mlxsw_reg_sbxx_dir dir, bool clr, u32 min_buff, u32 max_buff) { MLXSW_REG_ZERO(sbpm, payload); mlxsw_reg_sbpm_local_port_set(payload, local_port); mlxsw_reg_sbpm_pool_set(payload, pool); mlxsw_reg_sbpm_dir_set(payload, dir); + mlxsw_reg_sbpm_clr_set(payload, clr); mlxsw_reg_sbpm_min_buff_set(payload, min_buff); mlxsw_reg_sbpm_max_buff_set(payload, max_buff); } +static inline void mlxsw_reg_sbpm_unpack(char *payload, u32 *p_buff_occupancy, + u32 *p_max_buff_occupancy) +{ + *p_buff_occupancy = mlxsw_reg_sbpm_buff_occupancy_get(payload); + *p_max_buff_occupancy = mlxsw_reg_sbpm_max_buff_occupancy_get(payload); +} + /* SBMM - Shared Buffer Multicast Management Register * -------------------------------------------------- * The SBMM register configures and retrieves the shared buffer allocation @@ -3236,6 +3751,104 @@ static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff, mlxsw_reg_sbmm_pool_set(payload, pool); } +/* SBSR - Shared Buffer Status Register + * ------------------------------------ + * The SBSR register retrieves the shared buffer occupancy according to + * Port-Pool. Note that this register enables reading a large amount of data. + * It is the user's responsibility to limit the amount of data to ensure the + * response can match the maximum transfer unit. In case the response exceeds + * the maximum transport unit, it will be truncated with no special notice. + */ +#define MLXSW_REG_SBSR_ID 0xB005 +#define MLXSW_REG_SBSR_BASE_LEN 0x5C /* base length, without records */ +#define MLXSW_REG_SBSR_REC_LEN 0x8 /* record length */ +#define MLXSW_REG_SBSR_REC_MAX_COUNT 120 +#define MLXSW_REG_SBSR_LEN (MLXSW_REG_SBSR_BASE_LEN + \ + MLXSW_REG_SBSR_REC_LEN * \ + MLXSW_REG_SBSR_REC_MAX_COUNT) + +static const struct mlxsw_reg_info mlxsw_reg_sbsr = { + .id = MLXSW_REG_SBSR_ID, + .len = MLXSW_REG_SBSR_LEN, +}; + +/* reg_sbsr_clr + * Clear Max Buffer Occupancy. When this bit is set, the max_buff_occupancy + * field is cleared (and a new max value is tracked from the time the clear + * was performed). + * Access: OP + */ +MLXSW_ITEM32(reg, sbsr, clr, 0x00, 31, 1); + +/* reg_sbsr_ingress_port_mask + * Bit vector for all ingress network ports. + * Indicates which of the ports (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other port + * does not change. + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, ingress_port_mask, 0x10, 0x20, 1); + +/* reg_sbsr_pg_buff_mask + * Bit vector for all switch priority groups. + * Indicates which of the priorities (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other priority + * does not change. + * Range is 0..cap_max_pg_buffers - 1 + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, pg_buff_mask, 0x30, 0x4, 1); + +/* reg_sbsr_egress_port_mask + * Bit vector for all egress network ports. + * Indicates which of the ports (for which the relevant bit is set) + * are affected by the set operation. Configuration of any other port + * does not change. + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, egress_port_mask, 0x34, 0x20, 1); + +/* reg_sbsr_tclass_mask + * Bit vector for all traffic classes. + * Indicates which of the traffic classes (for which the relevant bit is + * set) are affected by the set operation. Configuration of any other + * traffic class does not change. + * Range is 0..cap_max_tclass - 1 + * Access: Index + */ +MLXSW_ITEM_BIT_ARRAY(reg, sbsr, tclass_mask, 0x54, 0x8, 1); + +static inline void mlxsw_reg_sbsr_pack(char *payload, bool clr) +{ + MLXSW_REG_ZERO(sbsr, payload); + mlxsw_reg_sbsr_clr_set(payload, clr); +} + +/* reg_sbsr_rec_buff_occupancy + * Current buffer occupancy in cells. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sbsr, rec_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN, + 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x00, false); + +/* reg_sbsr_rec_max_buff_occupancy + * Maximum value of buffer occupancy in cells monitored. Cleared by + * writing to the clr field. + * Access: RO + */ +MLXSW_ITEM32_INDEXED(reg, sbsr, rec_max_buff_occupancy, MLXSW_REG_SBSR_BASE_LEN, + 0, 24, MLXSW_REG_SBSR_REC_LEN, 0x04, false); + +static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index, + u32 *p_buff_occupancy, + u32 *p_max_buff_occupancy) +{ + *p_buff_occupancy = + mlxsw_reg_sbsr_rec_buff_occupancy_get(payload, rec_index); + *p_max_buff_occupancy = + mlxsw_reg_sbsr_rec_max_buff_occupancy_get(payload, rec_index); +} + static inline const char *mlxsw_reg_id_str(u16 reg_id) { switch (reg_id) { @@ -3283,6 +3896,10 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SFMR"; case MLXSW_REG_SPVMLR_ID: return "SPVMLR"; + case MLXSW_REG_QTCT_ID: + return "QTCT"; + case MLXSW_REG_QEEC_ID: + return "QEEC"; case MLXSW_REG_PMLP_ID: return "PMLP"; case MLXSW_REG_PMTU_ID: @@ -3293,8 +3910,12 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "PPAD"; case MLXSW_REG_PAOS_ID: return "PAOS"; + case MLXSW_REG_PFCC_ID: + return "PFCC"; case MLXSW_REG_PPCNT_ID: return "PPCNT"; + case MLXSW_REG_PPTB_ID: + return "PPTB"; case MLXSW_REG_PBMC_ID: return "PBMC"; case MLXSW_REG_PSPA_ID: @@ -3323,6 +3944,8 @@ static inline const char *mlxsw_reg_id_str(u16 reg_id) return "SBPM"; case MLXSW_REG_SBMM_ID: return "SBMM"; + case MLXSW_REG_SBSR_ID: + return "SBSR"; default: return "*UNKNOWN*"; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 668b2f465ca5..4a7273771028 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -49,7 +49,7 @@ #include <linux/jiffies.h> #include <linux/bitops.h> #include <linux/list.h> -#include <net/devlink.h> +#include <linux/dcbnl.h> #include <net/switchdev.h> #include <generated/utsrelease.h> @@ -305,9 +305,9 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl); } -static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, - u8 local_port, u8 *p_module, - u8 *p_width) +static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 *p_module, + u8 *p_width, u8 *p_lane) { char pmlp_pl[MLXSW_REG_PMLP_LEN]; int err; @@ -318,9 +318,20 @@ static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, return err; *p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); *p_width = mlxsw_reg_pmlp_width_get(pmlp_pl); + *p_lane = mlxsw_reg_pmlp_tx_lane_get(pmlp_pl, 0); return 0; } +static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 *p_module, + u8 *p_width) +{ + u8 lane; + + return __mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, p_module, + p_width, &lane); +} + static int mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u8 local_port, u8 module, u8 width, u8 lane) { @@ -379,7 +390,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, u64 len; int err; - if (mlxsw_core_skb_transmit_busy(mlxsw_sp, &tx_info)) + if (mlxsw_core_skb_transmit_busy(mlxsw_sp->core, &tx_info)) return NETDEV_TX_BUSY; if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { @@ -403,7 +414,7 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ - err = mlxsw_core_skb_transmit(mlxsw_sp, skb, &tx_info); + err = mlxsw_core_skb_transmit(mlxsw_sp->core, skb, &tx_info); if (!err) { pcpu_stats = this_cpu_ptr(mlxsw_sp_port->pcpu_stats); @@ -438,16 +449,89 @@ static int mlxsw_sp_port_set_mac_address(struct net_device *dev, void *p) return 0; } +static void mlxsw_sp_pg_buf_pack(char *pbmc_pl, int pg_index, int mtu, + bool pause_en, bool pfc_en, u16 delay) +{ + u16 pg_size = 2 * MLXSW_SP_BYTES_TO_CELLS(mtu); + + delay = pfc_en ? mlxsw_sp_pfc_delay_get(mtu, delay) : + MLXSW_SP_PAUSE_DELAY; + + if (pause_en || pfc_en) + mlxsw_reg_pbmc_lossless_buffer_pack(pbmc_pl, pg_index, + pg_size + delay, pg_size); + else + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg_index, pg_size); +} + +int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, + u8 *prio_tc, bool pause_en, + struct ieee_pfc *my_pfc) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 pfc_en = !!my_pfc ? my_pfc->pfc_en : 0; + u16 delay = !!my_pfc ? my_pfc->delay : 0; + char pbmc_pl[MLXSW_REG_PBMC_LEN]; + int i, j, err; + + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); + if (err) + return err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + bool configure = false; + bool pfc = false; + + for (j = 0; j < IEEE_8021QAZ_MAX_TCS; j++) { + if (prio_tc[j] == i) { + pfc = pfc_en & BIT(j); + configure = true; + break; + } + } + + if (!configure) + continue; + mlxsw_sp_pg_buf_pack(pbmc_pl, i, mtu, pause_en, pfc, delay); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); +} + +static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, + int mtu, bool pause_en) +{ + u8 def_prio_tc[IEEE_8021QAZ_MAX_TCS] = {0}; + bool dcb_en = !!mlxsw_sp_port->dcb.ets; + struct ieee_pfc *my_pfc; + u8 *prio_tc; + + prio_tc = dcb_en ? mlxsw_sp_port->dcb.ets->prio_tc : def_prio_tc; + my_pfc = dcb_en ? mlxsw_sp_port->dcb.pfc : NULL; + + return __mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, prio_tc, + pause_en, my_pfc); +} + static int mlxsw_sp_port_change_mtu(struct net_device *dev, int mtu) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); int err; - err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); + err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, mtu, pause_en); if (err) return err; + err = mlxsw_sp_port_mtu_set(mlxsw_sp_port, mtu); + if (err) + goto err_port_mtu_set; dev->mtu = mtu; return 0; + +err_port_mtu_set: + mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + return err; } static struct rtnl_link_stats64 * @@ -861,6 +945,33 @@ int mlxsw_sp_port_kill_vid(struct net_device *dev, return 0; } +static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name, + size_t len) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + u8 module, width, lane; + int err; + + err = __mlxsw_sp_port_module_info_get(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + &module, &width, &lane); + if (err) { + netdev_err(dev, "Failed to retrieve module information\n"); + return err; + } + + if (!mlxsw_sp_port->split) + err = snprintf(name, len, "p%d", module + 1); + else + err = snprintf(name, len, "p%ds%d", module + 1, + lane / width); + + if (err >= len) + return -EINVAL; + + return 0; +} + static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_open = mlxsw_sp_port_open, .ndo_stop = mlxsw_sp_port_stop, @@ -877,6 +988,7 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_bridge_setlink = switchdev_port_bridge_setlink, .ndo_bridge_getlink = switchdev_port_bridge_getlink, .ndo_bridge_dellink = switchdev_port_bridge_dellink, + .ndo_get_phys_port_name = mlxsw_sp_port_get_phys_port_name, }; static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, @@ -897,6 +1009,68 @@ static void mlxsw_sp_port_get_drvinfo(struct net_device *dev, sizeof(drvinfo->bus_info)); } +static void mlxsw_sp_port_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + pause->rx_pause = mlxsw_sp_port->link.rx_pause; + pause->tx_pause = mlxsw_sp_port->link.tx_pause; +} + +static int mlxsw_sp_port_pause_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ethtool_pauseparam *pause) +{ + char pfcc_pl[MLXSW_REG_PFCC_LEN]; + + mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_pprx_set(pfcc_pl, pause->rx_pause); + mlxsw_reg_pfcc_pptx_set(pfcc_pl, pause->tx_pause); + + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), + pfcc_pl); +} + +static int mlxsw_sp_port_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = pause->tx_pause || pause->rx_pause; + int err; + + if (mlxsw_sp_port->dcb.pfc && mlxsw_sp_port->dcb.pfc->pfc_en) { + netdev_err(dev, "PFC already enabled on port\n"); + return -EINVAL; + } + + if (pause->autoneg) { + netdev_err(dev, "PAUSE frames autonegotiation isn't supported\n"); + return -EINVAL; + } + + err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); + return err; + } + + err = mlxsw_sp_port_pause_set(mlxsw_sp_port, pause); + if (err) { + netdev_err(dev, "Failed to set PAUSE parameters\n"); + goto err_port_pause_configure; + } + + mlxsw_sp_port->link.rx_pause = pause->rx_pause; + mlxsw_sp_port->link.tx_pause = pause->tx_pause; + + return 0; + +err_port_pause_configure: + pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); + mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, pause_en); + return err; +} + struct mlxsw_sp_port_hw_stats { char str[ETH_GSTRING_LEN]; u64 (*getter)(char *payload); @@ -1032,7 +1206,8 @@ static void mlxsw_sp_port_get_stats(struct net_device *dev, int i; int err; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port); + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, + MLXSW_REG_PPCNT_IEEE_8023_CNT, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); for (i = 0; i < MLXSW_SP_PORT_HW_STATS_LEN; i++) data[i] = !err ? mlxsw_sp_port_hw_stats[i].getter(ppcnt_pl) : 0; @@ -1380,6 +1555,8 @@ static int mlxsw_sp_port_set_settings(struct net_device *dev, static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_drvinfo = mlxsw_sp_port_get_drvinfo, .get_link = ethtool_op_get_link, + .get_pauseparam = mlxsw_sp_port_get_pauseparam, + .set_pauseparam = mlxsw_sp_port_set_pauseparam, .get_strings = mlxsw_sp_port_get_strings, .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, @@ -1402,12 +1579,112 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); } +int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, + bool dwrr, u8 dwrr_weight) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_de_set(qeec_pl, true); + mlxsw_reg_qeec_dwrr_set(qeec_pl, dwrr); + mlxsw_reg_qeec_dwrr_weight_set(qeec_pl, dwrr_weight); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 maxrate) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qeec_pl[MLXSW_REG_QEEC_LEN]; + + mlxsw_reg_qeec_pack(qeec_pl, mlxsw_sp_port->local_port, hr, index, + next_index); + mlxsw_reg_qeec_mase_set(qeec_pl, true); + mlxsw_reg_qeec_max_shaper_rate_set(qeec_pl, maxrate); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qeec), qeec_pl); +} + +int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, + u8 switch_prio, u8 tclass) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char qtct_pl[MLXSW_REG_QTCT_LEN]; + + mlxsw_reg_qtct_pack(qtct_pl, mlxsw_sp_port->local_port, switch_prio, + tclass); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qtct), qtct_pl); +} + +static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err, i; + + /* Setup the elements hierarcy, so that each TC is linked to + * one subgroup, which are all member in the same group. + */ + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false, + 0); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + 0, false, 0); + if (err) + return err; + } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, i, i, + false, 0); + if (err) + return err; + } + + /* Make sure the max shaper is disabled in all hierarcies that + * support it. + */ + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + i, 0, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; + } + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_TC, + i, i, + MLXSW_REG_QEEC_MAS_DIS); + if (err) + return err; + } + + /* Map all priorities to traffic class 0. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, 0); + if (err) + return err; + } + + return 0; +} + static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool split, u8 module, u8 width) { - struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_sp_port *mlxsw_sp_port; - struct devlink_port *devlink_port; struct net_device *dev; size_t bytes; int err; @@ -1460,16 +1737,6 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, */ dev->hard_header_len += MLXSW_TXHDR_LEN; - devlink_port = &mlxsw_sp_port->devlink_port; - if (mlxsw_sp_port->split) - devlink_port_split_set(devlink_port, module); - err = devlink_port_register(devlink, devlink_port, local_port); - if (err) { - dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register devlink port\n", - mlxsw_sp_port->local_port); - goto err_devlink_port_register; - } - err = mlxsw_sp_port_system_port_mapping_set(mlxsw_sp_port); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to set system port mapping\n", @@ -1509,6 +1776,21 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_buffers_init; } + err = mlxsw_sp_port_ets_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize ETS\n", + mlxsw_sp_port->local_port); + goto err_port_ets_init; + } + + /* ETS and buffers must be initialized before DCB. */ + err = mlxsw_sp_port_dcb_init(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to initialize DCB\n", + mlxsw_sp_port->local_port); + goto err_port_dcb_init; + } + mlxsw_sp_port_switchdev_init(mlxsw_sp_port); err = register_netdev(dev); if (err) { @@ -1517,7 +1799,14 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_register_netdev; } - devlink_port_type_eth_set(devlink_port, dev); + err = mlxsw_core_port_init(mlxsw_sp->core, &mlxsw_sp_port->core_port, + mlxsw_sp_port->local_port, dev, + mlxsw_sp_port->split, module); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n", + mlxsw_sp_port->local_port); + goto err_core_port_init; + } err = mlxsw_sp_port_vlan_init(mlxsw_sp_port); if (err) @@ -1527,16 +1816,18 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, return 0; err_port_vlan_init: + mlxsw_core_port_fini(&mlxsw_sp_port->core_port); +err_core_port_init: unregister_netdev(dev); err_register_netdev: +err_port_dcb_init: +err_port_ets_init: err_port_buffers_init: err_port_admin_status_set: err_port_mtu_set: err_port_speed_by_width_set: err_port_swid_set: err_port_system_port_mapping_set: - devlink_port_unregister(&mlxsw_sp_port->devlink_port); -err_devlink_port_register: err_dev_addr_init: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: @@ -1590,15 +1881,13 @@ static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port) static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) { struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port]; - struct devlink_port *devlink_port; if (!mlxsw_sp_port) return; mlxsw_sp->ports[local_port] = NULL; - devlink_port = &mlxsw_sp_port->devlink_port; - devlink_port_type_clear(devlink_port); + mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ - devlink_port_unregister(devlink_port); + mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_vports_fini(mlxsw_sp_port); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); @@ -1659,9 +1948,10 @@ static u8 mlxsw_sp_cluster_base_port_get(u8 local_port) return local_port - offset; } -static int mlxsw_sp_port_split(void *priv, u8 local_port, unsigned int count) +static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port, + unsigned int count) { - struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); struct mlxsw_sp_port *mlxsw_sp_port; u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count; u8 module, cur_width, base_port; @@ -1733,9 +2023,9 @@ err_port_create: return err; } -static int mlxsw_sp_port_unsplit(void *priv, u8 local_port) +static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port) { - struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); struct mlxsw_sp_port *mlxsw_sp_port; u8 module, cur_width, base_port; unsigned int count; @@ -2080,10 +2370,10 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); } -static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, +static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { - struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); int err; mlxsw_sp->core = mlxsw_core; @@ -2144,6 +2434,7 @@ static int mlxsw_sp_init(void *priv, struct mlxsw_core *mlxsw_core, err_switchdev_init: err_lag_init: + mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: err_flood_init: mlxsw_sp_traps_fini(mlxsw_sp); @@ -2154,11 +2445,12 @@ err_event_register: return err; } -static void mlxsw_sp_fini(void *priv) +static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { - struct mlxsw_sp *mlxsw_sp = priv; + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); mlxsw_sp_switchdev_fini(mlxsw_sp); + mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); mlxsw_sp_ports_remove(mlxsw_sp); @@ -2201,16 +2493,26 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { }; static struct mlxsw_driver mlxsw_sp_driver = { - .kind = MLXSW_DEVICE_KIND_SPECTRUM, - .owner = THIS_MODULE, - .priv_size = sizeof(struct mlxsw_sp), - .init = mlxsw_sp_init, - .fini = mlxsw_sp_fini, - .port_split = mlxsw_sp_port_split, - .port_unsplit = mlxsw_sp_port_unsplit, - .txhdr_construct = mlxsw_sp_txhdr_construct, - .txhdr_len = MLXSW_TXHDR_LEN, - .profile = &mlxsw_sp_config_profile, + .kind = MLXSW_DEVICE_KIND_SPECTRUM, + .owner = THIS_MODULE, + .priv_size = sizeof(struct mlxsw_sp), + .init = mlxsw_sp_init, + .fini = mlxsw_sp_fini, + .port_split = mlxsw_sp_port_split, + .port_unsplit = mlxsw_sp_port_unsplit, + .sb_pool_get = mlxsw_sp_sb_pool_get, + .sb_pool_set = mlxsw_sp_sb_pool_set, + .sb_port_pool_get = mlxsw_sp_sb_port_pool_get, + .sb_port_pool_set = mlxsw_sp_sb_port_pool_set, + .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get, + .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set, + .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot, + .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear, + .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get, + .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get, + .txhdr_construct = mlxsw_sp_txhdr_construct, + .txhdr_len = MLXSW_TXHDR_LEN, + .profile = &mlxsw_sp_config_profile, }; static int diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index 4b8abaf06321..e2c022d3e2f3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -42,15 +42,15 @@ #include <linux/bitops.h> #include <linux/if_vlan.h> #include <linux/list.h> +#include <linux/dcbnl.h> #include <net/switchdev.h> -#include <net/devlink.h> #include "port.h" #include "core.h" #define MLXSW_SP_VFID_BASE VLAN_N_VID #define MLXSW_SP_VFID_PORT_MAX 512 /* Non-bridged VLAN interfaces */ -#define MLXSW_SP_VFID_BR_MAX 8192 /* Bridged VLAN interfaces */ +#define MLXSW_SP_VFID_BR_MAX 6144 /* Bridged VLAN interfaces */ #define MLXSW_SP_VFID_MAX (MLXSW_SP_VFID_PORT_MAX + MLXSW_SP_VFID_BR_MAX) #define MLXSW_SP_LAG_MAX 64 @@ -62,6 +62,24 @@ #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ +#define MLXSW_SP_BYTES_PER_CELL 96 + +#define MLXSW_SP_BYTES_TO_CELLS(b) DIV_ROUND_UP(b, MLXSW_SP_BYTES_PER_CELL) +#define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) + +/* Maximum delay buffer needed in case of PAUSE frames, in cells. + * Assumes 100m cable and maximum MTU. + */ +#define MLXSW_SP_PAUSE_DELAY 612 + +#define MLXSW_SP_CELL_FACTOR 2 /* 2 * cell_size / (IPG + cell_size + 1) */ + +static inline u16 mlxsw_sp_pfc_delay_get(int mtu, u16 delay) +{ + delay = MLXSW_SP_BYTES_TO_CELLS(DIV_ROUND_UP(delay, BITS_PER_BYTE)); + return MLXSW_SP_CELL_FACTOR * delay + MLXSW_SP_BYTES_TO_CELLS(mtu); +} + struct mlxsw_sp_port; struct mlxsw_sp_upper { @@ -100,6 +118,40 @@ static inline bool mlxsw_sp_fid_is_vfid(u16 fid) return fid >= MLXSW_SP_VFID_BASE; } +struct mlxsw_sp_sb_pr { + enum mlxsw_reg_sbpr_mode mode; + u32 size; +}; + +struct mlxsw_cp_sb_occ { + u32 cur; + u32 max; +}; + +struct mlxsw_sp_sb_cm { + u32 min_buff; + u32 max_buff; + u8 pool; + struct mlxsw_cp_sb_occ occ; +}; + +struct mlxsw_sp_sb_pm { + u32 min_buff; + u32 max_buff; + struct mlxsw_cp_sb_occ occ; +}; + +#define MLXSW_SP_SB_POOL_COUNT 4 +#define MLXSW_SP_SB_TC_COUNT 8 + +struct mlxsw_sp_sb { + struct mlxsw_sp_sb_pr prs[2][MLXSW_SP_SB_POOL_COUNT]; + struct { + struct mlxsw_sp_sb_cm cms[2][MLXSW_SP_SB_TC_COUNT]; + struct mlxsw_sp_sb_pm pms[2][MLXSW_SP_SB_POOL_COUNT]; + } ports[MLXSW_PORT_MAX_PORTS]; +}; + struct mlxsw_sp { struct { struct list_head list; @@ -130,6 +182,7 @@ struct mlxsw_sp { struct mlxsw_sp_upper master_bridge; struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; u8 port_to_module[MLXSW_PORT_MAX_PORTS]; + struct mlxsw_sp_sb sb; }; static inline struct mlxsw_sp_upper * @@ -148,6 +201,7 @@ struct mlxsw_sp_port_pcpu_stats { }; struct mlxsw_sp_port { + struct mlxsw_core_port core_port; /* must be first */ struct net_device *dev; struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats; struct mlxsw_sp *mlxsw_sp; @@ -166,14 +220,28 @@ struct mlxsw_sp_port { struct mlxsw_sp_vfid *vfid; u16 vid; } vport; + struct { + u8 tx_pause:1, + rx_pause:1; + } link; + struct { + struct ieee_ets *ets; + struct ieee_maxrate *maxrate; + struct ieee_pfc *pfc; + } dcb; /* 802.1Q bridge VLANs */ unsigned long *active_vlans; unsigned long *untagged_vlans; /* VLAN interfaces */ struct list_head vports_list; - struct devlink_port devlink_port; }; +static inline bool +mlxsw_sp_port_is_pause_en(const struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause; +} + static inline struct mlxsw_sp_port * mlxsw_sp_port_lagged_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 port_index) { @@ -245,7 +313,39 @@ enum mlxsw_sp_flood_table { }; int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp); +void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port); +int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info); +int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type); +int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold); +int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold); +int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold); +int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold); +int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); +int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, + unsigned int sb_index); +int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max); +int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max); int mlxsw_sp_switchdev_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp); @@ -265,5 +365,33 @@ int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 vfid, bool set, bool only_uc); void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); +int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, + bool dwrr, u8 dwrr_weight); +int mlxsw_sp_port_prio_tc_set(struct mlxsw_sp_port *mlxsw_sp_port, + u8 switch_prio, u8 tclass); +int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, + u8 *prio_tc, bool pause_en, + struct ieee_pfc *my_pfc); +int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, + enum mlxsw_reg_qeec_hr hr, u8 index, + u8 next_index, u32 maxrate); + +#ifdef CONFIG_MLXSW_SPECTRUM_DCB + +int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port); +void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port); + +#else + +static inline int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return 0; +} + +static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{} + +#endif #endif diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index d59195e3f7fb..a3720a0fad7d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -34,36 +34,140 @@ #include <linux/kernel.h> #include <linux/types.h> +#include <linux/dcbnl.h> +#include <linux/if_ether.h> +#include <linux/list.h> #include "spectrum.h" #include "core.h" #include "port.h" #include "reg.h" -struct mlxsw_sp_pb { - u8 index; - u16 size; -}; +static struct mlxsw_sp_sb_pr *mlxsw_sp_sb_pr_get(struct mlxsw_sp *mlxsw_sp, + u8 pool, + enum mlxsw_reg_sbxx_dir dir) +{ + return &mlxsw_sp->sb.prs[dir][pool]; +} -#define MLXSW_SP_PB(_index, _size) \ - { \ - .index = _index, \ - .size = _size, \ +static struct mlxsw_sp_sb_cm *mlxsw_sp_sb_cm_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 pg_buff, + enum mlxsw_reg_sbxx_dir dir) +{ + return &mlxsw_sp->sb.ports[local_port].cms[dir][pg_buff]; +} + +static struct mlxsw_sp_sb_pm *mlxsw_sp_sb_pm_get(struct mlxsw_sp *mlxsw_sp, + u8 local_port, u8 pool, + enum mlxsw_reg_sbxx_dir dir) +{ + return &mlxsw_sp->sb.ports[local_port].pms[dir][pool]; +} + +static int mlxsw_sp_sb_pr_write(struct mlxsw_sp *mlxsw_sp, u8 pool, + enum mlxsw_reg_sbxx_dir dir, + enum mlxsw_reg_sbpr_mode mode, u32 size) +{ + char sbpr_pl[MLXSW_REG_SBPR_LEN]; + struct mlxsw_sp_sb_pr *pr; + int err; + + mlxsw_reg_sbpr_pack(sbpr_pl, pool, dir, mode, size); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); + if (err) + return err; + + pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + pr->mode = mode; + pr->size = size; + return 0; +} + +static int mlxsw_sp_sb_cm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pg_buff, enum mlxsw_reg_sbxx_dir dir, + u32 min_buff, u32 max_buff, u8 pool) +{ + char sbcm_pl[MLXSW_REG_SBCM_LEN]; + int err; + + mlxsw_reg_sbcm_pack(sbcm_pl, local_port, pg_buff, dir, + min_buff, max_buff, pool); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); + if (err) + return err; + if (pg_buff < MLXSW_SP_SB_TC_COUNT) { + struct mlxsw_sp_sb_cm *cm; + + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); + cm->min_buff = min_buff; + cm->max_buff = max_buff; + cm->pool = pool; } + return 0; +} + +static int mlxsw_sp_sb_pm_write(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pool, enum mlxsw_reg_sbxx_dir dir, + u32 min_buff, u32 max_buff) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + struct mlxsw_sp_sb_pm *pm; + int err; + + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, + min_buff, max_buff); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl); + if (err) + return err; + + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); + pm->min_buff = min_buff; + pm->max_buff = max_buff; + return 0; +} + +static int mlxsw_sp_sb_pm_occ_clear(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pool, enum mlxsw_reg_sbxx_dir dir, + struct list_head *bulk_list) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, true, 0, 0); + return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, + bulk_list, NULL, 0); +} + +static void mlxsw_sp_sb_pm_occ_query_cb(struct mlxsw_core *mlxsw_core, + char *sbpm_pl, size_t sbpm_pl_len, + unsigned long cb_priv) +{ + struct mlxsw_sp_sb_pm *pm = (struct mlxsw_sp_sb_pm *) cb_priv; + + mlxsw_reg_sbpm_unpack(sbpm_pl, &pm->occ.cur, &pm->occ.max); +} + +static int mlxsw_sp_sb_pm_occ_query(struct mlxsw_sp *mlxsw_sp, u8 local_port, + u8 pool, enum mlxsw_reg_sbxx_dir dir, + struct list_head *bulk_list) +{ + char sbpm_pl[MLXSW_REG_SBPM_LEN]; + struct mlxsw_sp_sb_pm *pm; + + pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, pool, dir); + mlxsw_reg_sbpm_pack(sbpm_pl, local_port, pool, dir, false, 0, 0); + return mlxsw_reg_trans_query(mlxsw_sp->core, MLXSW_REG(sbpm), sbpm_pl, + bulk_list, + mlxsw_sp_sb_pm_occ_query_cb, + (unsigned long) pm); +} -static const struct mlxsw_sp_pb mlxsw_sp_pbs[] = { - MLXSW_SP_PB(0, 208), - MLXSW_SP_PB(1, 208), - MLXSW_SP_PB(2, 208), - MLXSW_SP_PB(3, 208), - MLXSW_SP_PB(4, 208), - MLXSW_SP_PB(5, 208), - MLXSW_SP_PB(6, 208), - MLXSW_SP_PB(7, 208), - MLXSW_SP_PB(9, 208), +static const u16 mlxsw_sp_pbs[] = { + [0] = 2 * MLXSW_SP_BYTES_TO_CELLS(ETH_FRAME_LEN), + [9] = 2 * MLXSW_SP_BYTES_TO_CELLS(MLXSW_PORT_MAX_MTU), }; #define MLXSW_SP_PBS_LEN ARRAY_SIZE(mlxsw_sp_pbs) +#define MLXSW_SP_PB_UNUSED 8 static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) { @@ -73,194 +177,206 @@ static int mlxsw_sp_port_pb_init(struct mlxsw_sp_port *mlxsw_sp_port) mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0xffff, 0xffff / 2); for (i = 0; i < MLXSW_SP_PBS_LEN; i++) { - const struct mlxsw_sp_pb *pb; - - pb = &mlxsw_sp_pbs[i]; - mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pb->index, pb->size); + if (i == MLXSW_SP_PB_UNUSED) + continue; + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, i, mlxsw_sp_pbs[i]); } + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, + MLXSW_REG_PBMC_PORT_SHARED_BUF_IDX, 0); return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); } -#define MLXSW_SP_SB_BYTES_PER_CELL 96 +static int mlxsw_sp_port_pb_prio_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + char pptb_pl[MLXSW_REG_PPTB_LEN]; + int i; -struct mlxsw_sp_sb_pool { - u8 pool; - enum mlxsw_reg_sbpr_dir dir; - enum mlxsw_reg_sbpr_mode mode; - u32 size; -}; + mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, 0); + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), + pptb_pl); +} + +static int mlxsw_sp_port_headroom_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_pb_init(mlxsw_sp_port); + if (err) + return err; + return mlxsw_sp_port_pb_prio_init(mlxsw_sp_port); +} -#define MLXSW_SP_SB_POOL_INGRESS_SIZE \ - ((15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) / \ - MLXSW_SP_SB_BYTES_PER_CELL) -#define MLXSW_SP_SB_POOL_EGRESS_SIZE \ - ((14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) / \ - MLXSW_SP_SB_BYTES_PER_CELL) - -#define MLXSW_SP_SB_POOL(_pool, _dir, _mode, _size) \ - { \ - .pool = _pool, \ - .dir = _dir, \ - .mode = _mode, \ - .size = _size, \ +#define MLXSW_SP_SB_PR_INGRESS_SIZE \ + (15000000 - (2 * 20000 * MLXSW_PORT_MAX_PORTS)) +#define MLXSW_SP_SB_PR_INGRESS_MNG_SIZE (200 * 1000) +#define MLXSW_SP_SB_PR_EGRESS_SIZE \ + (14000000 - (8 * 1500 * MLXSW_PORT_MAX_PORTS)) + +#define MLXSW_SP_SB_PR(_mode, _size) \ + { \ + .mode = _mode, \ + .size = _size, \ } -#define MLXSW_SP_SB_POOL_INGRESS(_pool, _size) \ - MLXSW_SP_SB_POOL(_pool, MLXSW_REG_SBPR_DIR_INGRESS, \ - MLXSW_REG_SBPR_MODE_DYNAMIC, _size) - -#define MLXSW_SP_SB_POOL_EGRESS(_pool, _size) \ - MLXSW_SP_SB_POOL(_pool, MLXSW_REG_SBPR_DIR_EGRESS, \ - MLXSW_REG_SBPR_MODE_DYNAMIC, _size) - -static const struct mlxsw_sp_sb_pool mlxsw_sp_sb_pools[] = { - MLXSW_SP_SB_POOL_INGRESS(0, MLXSW_SP_SB_POOL_INGRESS_SIZE), - MLXSW_SP_SB_POOL_INGRESS(1, 0), - MLXSW_SP_SB_POOL_INGRESS(2, 0), - MLXSW_SP_SB_POOL_INGRESS(3, 0), - MLXSW_SP_SB_POOL_EGRESS(0, MLXSW_SP_SB_POOL_EGRESS_SIZE), - MLXSW_SP_SB_POOL_EGRESS(1, 0), - MLXSW_SP_SB_POOL_EGRESS(2, 0), - MLXSW_SP_SB_POOL_EGRESS(2, MLXSW_SP_SB_POOL_EGRESS_SIZE), +static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_ingress[] = { + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_SIZE)), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_INGRESS_MNG_SIZE)), }; -#define MLXSW_SP_SB_POOLS_LEN ARRAY_SIZE(mlxsw_sp_sb_pools) +#define MLXSW_SP_SB_PRS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_ingress) + +static const struct mlxsw_sp_sb_pr mlxsw_sp_sb_prs_egress[] = { + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, + MLXSW_SP_BYTES_TO_CELLS(MLXSW_SP_SB_PR_EGRESS_SIZE)), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), + MLXSW_SP_SB_PR(MLXSW_REG_SBPR_MODE_DYNAMIC, 0), +}; -static int mlxsw_sp_sb_pools_init(struct mlxsw_sp *mlxsw_sp) +#define MLXSW_SP_SB_PRS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_prs_egress) + +static int __mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_pr *prs, + size_t prs_len) { - char sbpr_pl[MLXSW_REG_SBPR_LEN]; int i; int err; - for (i = 0; i < MLXSW_SP_SB_POOLS_LEN; i++) { - const struct mlxsw_sp_sb_pool *pool; + for (i = 0; i < prs_len; i++) { + const struct mlxsw_sp_sb_pr *pr; - pool = &mlxsw_sp_sb_pools[i]; - mlxsw_reg_sbpr_pack(sbpr_pl, pool->pool, pool->dir, - pool->mode, pool->size); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbpr), sbpr_pl); + pr = &prs[i]; + err = mlxsw_sp_sb_pr_write(mlxsw_sp, i, dir, + pr->mode, pr->size); if (err) return err; } return 0; } -struct mlxsw_sp_sb_cm { - union { - u8 pg; - u8 tc; - } u; - enum mlxsw_reg_sbcm_dir dir; - u32 min_buff; - u32 max_buff; - u8 pool; -}; +static int mlxsw_sp_sb_prs_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; -#define MLXSW_SP_SB_CM(_pg_tc, _dir, _min_buff, _max_buff, _pool) \ - { \ - .u.pg = _pg_tc, \ - .dir = _dir, \ - .min_buff = _min_buff, \ - .max_buff = _max_buff, \ - .pool = _pool, \ + err = __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp_sb_prs_ingress, + MLXSW_SP_SB_PRS_INGRESS_LEN); + if (err) + return err; + return __mlxsw_sp_sb_prs_init(mlxsw_sp, MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_sb_prs_egress, + MLXSW_SP_SB_PRS_EGRESS_LEN); +} + +#define MLXSW_SP_SB_CM(_min_buff, _max_buff, _pool) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool = _pool, \ } -#define MLXSW_SP_SB_CM_INGRESS(_pg, _min_buff, _max_buff) \ - MLXSW_SP_SB_CM(_pg, MLXSW_REG_SBCM_DIR_INGRESS, \ - _min_buff, _max_buff, 0) - -#define MLXSW_SP_SB_CM_EGRESS(_tc, _min_buff, _max_buff) \ - MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBCM_DIR_EGRESS, \ - _min_buff, _max_buff, 0) - -#define MLXSW_SP_CPU_PORT_SB_CM_EGRESS(_tc) \ - MLXSW_SP_SB_CM(_tc, MLXSW_REG_SBCM_DIR_EGRESS, 104, 2, 3) - -static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms[] = { - MLXSW_SP_SB_CM_INGRESS(0, 10000 / MLXSW_SP_SB_BYTES_PER_CELL, 8), - MLXSW_SP_SB_CM_INGRESS(1, 0, 0), - MLXSW_SP_SB_CM_INGRESS(2, 0, 0), - MLXSW_SP_SB_CM_INGRESS(3, 0, 0), - MLXSW_SP_SB_CM_INGRESS(4, 0, 0), - MLXSW_SP_SB_CM_INGRESS(5, 0, 0), - MLXSW_SP_SB_CM_INGRESS(6, 0, 0), - MLXSW_SP_SB_CM_INGRESS(7, 0, 0), - MLXSW_SP_SB_CM_INGRESS(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff), - MLXSW_SP_SB_CM_EGRESS(0, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(1, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(2, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(3, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(4, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(5, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(6, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(7, 1500 / MLXSW_SP_SB_BYTES_PER_CELL, 9), - MLXSW_SP_SB_CM_EGRESS(8, 0, 0), - MLXSW_SP_SB_CM_EGRESS(9, 0, 0), - MLXSW_SP_SB_CM_EGRESS(10, 0, 0), - MLXSW_SP_SB_CM_EGRESS(11, 0, 0), - MLXSW_SP_SB_CM_EGRESS(12, 0, 0), - MLXSW_SP_SB_CM_EGRESS(13, 0, 0), - MLXSW_SP_SB_CM_EGRESS(14, 0, 0), - MLXSW_SP_SB_CM_EGRESS(15, 0, 0), - MLXSW_SP_SB_CM_EGRESS(16, 1, 0xff), +static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_ingress[] = { + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 8, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN, 0), + MLXSW_SP_SB_CM(0, 0, 0), /* dummy, this PG does not exist */ + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(20000), 1, 3), }; -#define MLXSW_SP_SB_CMS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms) +#define MLXSW_SP_SB_CMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_ingress) + +static const struct mlxsw_sp_sb_cm mlxsw_sp_sb_cms_egress[] = { + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(1500), 9, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(0, 0, 0), + MLXSW_SP_SB_CM(1, 0xff, 0), +}; + +#define MLXSW_SP_SB_CMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_cms_egress) + +#define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, 0) static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(0), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(1), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(2), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(3), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(4), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(5), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(6), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(7), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(8), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(9), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(10), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(11), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(12), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(13), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(14), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(15), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(16), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(17), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(18), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(19), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(20), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(21), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(22), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(23), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(24), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(25), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(26), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(27), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(28), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(29), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(30), - MLXSW_SP_CPU_PORT_SB_CM_EGRESS(31), + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_CPU_PORT_SB_CM, }; #define MLXSW_SP_CPU_PORT_SB_MCS_LEN \ ARRAY_SIZE(mlxsw_sp_cpu_port_sb_cms) -static int mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, - const struct mlxsw_sp_sb_cm *cms, - size_t cms_len) +static int __mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_cm *cms, + size_t cms_len) { - char sbcm_pl[MLXSW_REG_SBCM_LEN]; int i; int err; for (i = 0; i < cms_len; i++) { const struct mlxsw_sp_sb_cm *cm; + if (i == 8 && dir == MLXSW_REG_SBXX_DIR_INGRESS) + continue; /* PG number 8 does not exist, skip it */ cm = &cms[i]; - mlxsw_reg_sbcm_pack(sbcm_pl, local_port, cm->u.pg, cm->dir, - cm->min_buff, cm->max_buff, cm->pool); - err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbcm), sbcm_pl); + err = mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, i, dir, + cm->min_buff, cm->max_buff, + cm->pool); if (err) return err; } @@ -269,105 +385,120 @@ static int mlxsw_sp_sb_cms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, static int mlxsw_sp_port_sb_cms_init(struct mlxsw_sp_port *mlxsw_sp_port) { - return mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, - mlxsw_sp_port->local_port, mlxsw_sp_sb_cms, - MLXSW_SP_SB_CMS_LEN); + int err; + + err = __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp_sb_cms_ingress, + MLXSW_SP_SB_CMS_INGRESS_LEN); + if (err) + return err; + return __mlxsw_sp_sb_cms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_sb_cms_egress, + MLXSW_SP_SB_CMS_EGRESS_LEN); } static int mlxsw_sp_cpu_port_sb_cms_init(struct mlxsw_sp *mlxsw_sp) { - return mlxsw_sp_sb_cms_init(mlxsw_sp, 0, mlxsw_sp_cpu_port_sb_cms, - MLXSW_SP_CPU_PORT_SB_MCS_LEN); + return __mlxsw_sp_sb_cms_init(mlxsw_sp, 0, MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_cpu_port_sb_cms, + MLXSW_SP_CPU_PORT_SB_MCS_LEN); } -struct mlxsw_sp_sb_pm { - u8 pool; - enum mlxsw_reg_sbpm_dir dir; - u32 min_buff; - u32 max_buff; +#define MLXSW_SP_SB_PM(_min_buff, _max_buff) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + } + +static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_ingress[] = { + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX), }; -#define MLXSW_SP_SB_PM(_pool, _dir, _min_buff, _max_buff) \ - { \ - .pool = _pool, \ - .dir = _dir, \ - .min_buff = _min_buff, \ - .max_buff = _max_buff, \ - } +#define MLXSW_SP_SB_PMS_INGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_ingress) -#define MLXSW_SP_SB_PM_INGRESS(_pool, _min_buff, _max_buff) \ - MLXSW_SP_SB_PM(_pool, MLXSW_REG_SBPM_DIR_INGRESS, \ - _min_buff, _max_buff) - -#define MLXSW_SP_SB_PM_EGRESS(_pool, _min_buff, _max_buff) \ - MLXSW_SP_SB_PM(_pool, MLXSW_REG_SBPM_DIR_EGRESS, \ - _min_buff, _max_buff) - -static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms[] = { - MLXSW_SP_SB_PM_INGRESS(0, 0, 0xff), - MLXSW_SP_SB_PM_INGRESS(1, 0, 0), - MLXSW_SP_SB_PM_INGRESS(2, 0, 0), - MLXSW_SP_SB_PM_INGRESS(3, 0, 0), - MLXSW_SP_SB_PM_EGRESS(0, 0, 7), - MLXSW_SP_SB_PM_EGRESS(1, 0, 0), - MLXSW_SP_SB_PM_EGRESS(2, 0, 0), - MLXSW_SP_SB_PM_EGRESS(3, 0, 0), +static const struct mlxsw_sp_sb_pm mlxsw_sp_sb_pms_egress[] = { + MLXSW_SP_SB_PM(0, 7), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), + MLXSW_SP_SB_PM(0, MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN), }; -#define MLXSW_SP_SB_PMS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms) +#define MLXSW_SP_SB_PMS_EGRESS_LEN ARRAY_SIZE(mlxsw_sp_sb_pms_egress) -static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) +static int __mlxsw_sp_port_sb_pms_init(struct mlxsw_sp *mlxsw_sp, u8 local_port, + enum mlxsw_reg_sbxx_dir dir, + const struct mlxsw_sp_sb_pm *pms, + size_t pms_len) { - char sbpm_pl[MLXSW_REG_SBPM_LEN]; int i; int err; - for (i = 0; i < MLXSW_SP_SB_PMS_LEN; i++) { + for (i = 0; i < pms_len; i++) { const struct mlxsw_sp_sb_pm *pm; - pm = &mlxsw_sp_sb_pms[i]; - mlxsw_reg_sbpm_pack(sbpm_pl, mlxsw_sp_port->local_port, - pm->pool, pm->dir, - pm->min_buff, pm->max_buff); - err = mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, - MLXSW_REG(sbpm), sbpm_pl); + pm = &pms[i]; + err = mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, i, dir, + pm->min_buff, pm->max_buff); if (err) return err; } return 0; } +static int mlxsw_sp_port_sb_pms_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_INGRESS, + mlxsw_sp_sb_pms_ingress, + MLXSW_SP_SB_PMS_INGRESS_LEN); + if (err) + return err; + return __mlxsw_sp_port_sb_pms_init(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_port->local_port, + MLXSW_REG_SBXX_DIR_EGRESS, + mlxsw_sp_sb_pms_egress, + MLXSW_SP_SB_PMS_EGRESS_LEN); +} + struct mlxsw_sp_sb_mm { - u8 prio; u32 min_buff; u32 max_buff; u8 pool; }; -#define MLXSW_SP_SB_MM(_prio, _min_buff, _max_buff, _pool) \ - { \ - .prio = _prio, \ - .min_buff = _min_buff, \ - .max_buff = _max_buff, \ - .pool = _pool, \ +#define MLXSW_SP_SB_MM(_min_buff, _max_buff, _pool) \ + { \ + .min_buff = _min_buff, \ + .max_buff = _max_buff, \ + .pool = _pool, \ } static const struct mlxsw_sp_sb_mm mlxsw_sp_sb_mms[] = { - MLXSW_SP_SB_MM(0, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(1, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(2, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(3, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(4, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(5, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(6, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(7, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(8, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(9, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(10, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(11, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(12, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(13, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), - MLXSW_SP_SB_MM(14, 20000 / MLXSW_SP_SB_BYTES_PER_CELL, 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), + MLXSW_SP_SB_MM(MLXSW_SP_BYTES_TO_CELLS(20000), 0xff, 0), }; #define MLXSW_SP_SB_MMS_LEN ARRAY_SIZE(mlxsw_sp_sb_mms) @@ -382,7 +513,7 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) const struct mlxsw_sp_sb_mm *mc; mc = &mlxsw_sp_sb_mms[i]; - mlxsw_reg_sbmm_pack(sbmm_pl, mc->prio, mc->min_buff, + mlxsw_reg_sbmm_pack(sbmm_pl, i, mc->min_buff, mc->max_buff, mc->pool); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbmm), sbmm_pl); if (err) @@ -391,26 +522,39 @@ static int mlxsw_sp_sb_mms_init(struct mlxsw_sp *mlxsw_sp) return 0; } +#define MLXSW_SP_SB_SIZE (16 * 1024 * 1024) + int mlxsw_sp_buffers_init(struct mlxsw_sp *mlxsw_sp) { int err; - err = mlxsw_sp_sb_pools_init(mlxsw_sp); + err = mlxsw_sp_sb_prs_init(mlxsw_sp); if (err) return err; err = mlxsw_sp_cpu_port_sb_cms_init(mlxsw_sp); if (err) return err; err = mlxsw_sp_sb_mms_init(mlxsw_sp); + if (err) + return err; + return devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0, + MLXSW_SP_SB_SIZE, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_POOL_COUNT, + MLXSW_SP_SB_TC_COUNT, + MLXSW_SP_SB_TC_COUNT); +} - return err; +void mlxsw_sp_buffers_fini(struct mlxsw_sp *mlxsw_sp) +{ + devlink_sb_unregister(priv_to_devlink(mlxsw_sp->core), 0); } int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) { int err; - err = mlxsw_sp_port_pb_init(mlxsw_sp_port); + err = mlxsw_sp_port_headroom_init(mlxsw_sp_port); if (err) return err; err = mlxsw_sp_port_sb_cms_init(mlxsw_sp_port); @@ -420,3 +564,394 @@ int mlxsw_sp_port_buffers_init(struct mlxsw_sp_port *mlxsw_sp_port) return err; } + +static u8 pool_get(u16 pool_index) +{ + return pool_index % MLXSW_SP_SB_POOL_COUNT; +} + +static u16 pool_index_get(u8 pool, enum mlxsw_reg_sbxx_dir dir) +{ + u16 pool_index; + + pool_index = pool; + if (dir == MLXSW_REG_SBXX_DIR_EGRESS) + pool_index += MLXSW_SP_SB_POOL_COUNT; + return pool_index; +} + +static enum mlxsw_reg_sbxx_dir dir_get(u16 pool_index) +{ + return pool_index < MLXSW_SP_SB_POOL_COUNT ? + MLXSW_REG_SBXX_DIR_INGRESS : MLXSW_REG_SBXX_DIR_EGRESS; +} + +int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, + struct devlink_sb_pool_info *pool_info) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + + pool_info->pool_type = dir; + pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size); + pool_info->threshold_type = pr->mode; + return 0; +} + +int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, + unsigned int sb_index, u16 pool_index, u32 size, + enum devlink_sb_threshold_type threshold_type) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + enum mlxsw_reg_sbpr_mode mode = threshold_type; + u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size); + + return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size); +} + +#define MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET (-2) /* 3->1, 16->14 */ + +static u32 mlxsw_sp_sb_threshold_out(struct mlxsw_sp *mlxsw_sp, u8 pool, + enum mlxsw_reg_sbxx_dir dir, u32 max_buff) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + + if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) + return max_buff - MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; + return MLXSW_SP_CELLS_TO_BYTES(max_buff); +} + +static int mlxsw_sp_sb_threshold_in(struct mlxsw_sp *mlxsw_sp, u8 pool, + enum mlxsw_reg_sbxx_dir dir, u32 threshold, + u32 *p_max_buff) +{ + struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); + + if (pr->mode == MLXSW_REG_SBPR_MODE_DYNAMIC) { + int val; + + val = threshold + MLXSW_SP_SB_THRESHOLD_TO_ALPHA_OFFSET; + if (val < MLXSW_REG_SBXX_DYN_MAX_BUFF_MIN || + val > MLXSW_REG_SBXX_DYN_MAX_BUFF_MAX) + return -EINVAL; + *p_max_buff = val; + } else { + *p_max_buff = MLXSW_SP_BYTES_TO_CELLS(threshold); + } + return 0; +} + +int mlxsw_sp_sb_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, + pool, dir); + + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, pool, dir, + pm->max_buff); + return 0; +} + +int mlxsw_sp_sb_port_pool_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + u32 max_buff; + int err; + + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + threshold, &max_buff); + if (err) + return err; + + return mlxsw_sp_sb_pm_write(mlxsw_sp, local_port, pool, dir, + 0, max_buff); +} + +int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 *p_pool_index, u32 *p_threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = pool_type; + struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, + pg_buff, dir); + + *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir, + cm->max_buff); + *p_pool_index = pool_index_get(cm->pool, pool_type); + return 0; +} + +int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u16 pool_index, u32 threshold) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = pool_type; + u8 pool = pool_index; + u32 max_buff; + int err; + + err = mlxsw_sp_sb_threshold_in(mlxsw_sp, pool, dir, + threshold, &max_buff); + if (err) + return err; + + if (pool_type == DEVLINK_SB_POOL_TYPE_EGRESS) { + if (pool < MLXSW_SP_SB_POOL_COUNT) + return -EINVAL; + pool -= MLXSW_SP_SB_POOL_COUNT; + } else if (pool >= MLXSW_SP_SB_POOL_COUNT) { + return -EINVAL; + } + return mlxsw_sp_sb_cm_write(mlxsw_sp, local_port, pg_buff, dir, + 0, max_buff, pool); +} + +#define MASKED_COUNT_MAX \ + (MLXSW_REG_SBSR_REC_MAX_COUNT / (MLXSW_SP_SB_TC_COUNT * 2)) + +struct mlxsw_sp_sb_sr_occ_query_cb_ctx { + u8 masked_count; + u8 local_port_1; +}; + +static void mlxsw_sp_sb_sr_occ_query_cb(struct mlxsw_core *mlxsw_core, + char *sbsr_pl, size_t sbsr_pl_len, + unsigned long cb_priv) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; + u8 masked_count; + u8 local_port; + int rec_index = 0; + struct mlxsw_sp_sb_cm *cm; + int i; + + memcpy(&cb_ctx, &cb_priv, sizeof(cb_ctx)); + + masked_count = 0; + for (local_port = cb_ctx.local_port_1; + local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS); + mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, + &cm->occ.cur, &cm->occ.max); + } + if (++masked_count == cb_ctx.masked_count) + break; + } + masked_count = 0; + for (local_port = cb_ctx.local_port_1; + local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS); + mlxsw_reg_sbsr_rec_unpack(sbsr_pl, rec_index++, + &cm->occ.cur, &cm->occ.max); + } + if (++masked_count == cb_ctx.masked_count) + break; + } +} + +int mlxsw_sp_sb_occ_snapshot(struct mlxsw_core *mlxsw_core, + unsigned int sb_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + struct mlxsw_sp_sb_sr_occ_query_cb_ctx cb_ctx; + unsigned long cb_priv; + LIST_HEAD(bulk_list); + char *sbsr_pl; + u8 masked_count; + u8 local_port_1; + u8 local_port = 0; + int i; + int err; + int err2; + + sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL); + if (!sbsr_pl) + return -ENOMEM; + +next_batch: + local_port++; + local_port_1 = local_port; + masked_count = 0; + mlxsw_reg_sbsr_pack(sbsr_pl, false); + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); + } + for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { + err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS, + &bulk_list); + if (err) + goto out; + err = mlxsw_sp_sb_pm_occ_query(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS, + &bulk_list); + if (err) + goto out; + } + if (++masked_count == MASKED_COUNT_MAX) + goto do_query; + } + +do_query: + cb_ctx.masked_count = masked_count; + cb_ctx.local_port_1 = local_port_1; + memcpy(&cb_priv, &cb_ctx, sizeof(cb_ctx)); + err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl, + &bulk_list, mlxsw_sp_sb_sr_occ_query_cb, + cb_priv); + if (err) + goto out; + if (local_port < MLXSW_PORT_MAX_PORTS) + goto next_batch; + +out: + err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); + if (!err) + err = err2; + kfree(sbsr_pl); + return err; +} + +int mlxsw_sp_sb_occ_max_clear(struct mlxsw_core *mlxsw_core, + unsigned int sb_index) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); + LIST_HEAD(bulk_list); + char *sbsr_pl; + unsigned int masked_count; + u8 local_port = 0; + int i; + int err; + int err2; + + sbsr_pl = kmalloc(MLXSW_REG_SBSR_LEN, GFP_KERNEL); + if (!sbsr_pl) + return -ENOMEM; + +next_batch: + local_port++; + masked_count = 0; + mlxsw_reg_sbsr_pack(sbsr_pl, true); + for (i = 0; i < MLXSW_SP_SB_TC_COUNT; i++) { + mlxsw_reg_sbsr_pg_buff_mask_set(sbsr_pl, i, 1); + mlxsw_reg_sbsr_tclass_mask_set(sbsr_pl, i, 1); + } + for (; local_port < MLXSW_PORT_MAX_PORTS; local_port++) { + if (!mlxsw_sp->ports[local_port]) + continue; + mlxsw_reg_sbsr_ingress_port_mask_set(sbsr_pl, local_port, 1); + mlxsw_reg_sbsr_egress_port_mask_set(sbsr_pl, local_port, 1); + for (i = 0; i < MLXSW_SP_SB_POOL_COUNT; i++) { + err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_INGRESS, + &bulk_list); + if (err) + goto out; + err = mlxsw_sp_sb_pm_occ_clear(mlxsw_sp, local_port, i, + MLXSW_REG_SBXX_DIR_EGRESS, + &bulk_list); + if (err) + goto out; + } + if (++masked_count == MASKED_COUNT_MAX) + goto do_query; + } + +do_query: + err = mlxsw_reg_trans_query(mlxsw_core, MLXSW_REG(sbsr), sbsr_pl, + &bulk_list, NULL, 0); + if (err) + goto out; + if (local_port < MLXSW_PORT_MAX_PORTS) + goto next_batch; + +out: + err2 = mlxsw_reg_trans_bulk_wait(&bulk_list); + if (!err) + err = err2; + kfree(sbsr_pl); + return err; +} + +int mlxsw_sp_sb_occ_port_pool_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 pool_index, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pool = pool_get(pool_index); + enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); + struct mlxsw_sp_sb_pm *pm = mlxsw_sp_sb_pm_get(mlxsw_sp, local_port, + pool, dir); + + *p_cur = MLXSW_SP_CELLS_TO_BYTES(pm->occ.cur); + *p_max = MLXSW_SP_CELLS_TO_BYTES(pm->occ.max); + return 0; +} + +int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, + unsigned int sb_index, u16 tc_index, + enum devlink_sb_pool_type pool_type, + u32 *p_cur, u32 *p_max) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + mlxsw_core_port_driver_priv(mlxsw_core_port); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 local_port = mlxsw_sp_port->local_port; + u8 pg_buff = tc_index; + enum mlxsw_reg_sbxx_dir dir = pool_type; + struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, + pg_buff, dir); + + *p_cur = MLXSW_SP_CELLS_TO_BYTES(cm->occ.cur); + *p_max = MLXSW_SP_CELLS_TO_BYTES(cm->occ.max); + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c new file mode 100644 index 000000000000..0b323661c0b6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -0,0 +1,480 @@ +/* + * drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c + * Copyright (c) 2016 Mellanox Technologies. All rights reserved. + * Copyright (c) 2016 Ido Schimmel <idosch@mellanox.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/netdevice.h> +#include <linux/string.h> +#include <linux/bitops.h> +#include <net/dcbnl.h> + +#include "spectrum.h" +#include "reg.h" + +static u8 mlxsw_sp_dcbnl_getdcbx(struct net_device __always_unused *dev) +{ + return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE; +} + +static u8 mlxsw_sp_dcbnl_setdcbx(struct net_device __always_unused *dev, + u8 mode) +{ + return (mode != (DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE)) ? 1 : 0; +} + +static int mlxsw_sp_dcbnl_ieee_getets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(ets, mlxsw_sp_port->dcb.ets, sizeof(*ets)); + + return 0; +} + +static int mlxsw_sp_port_ets_validate(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + struct net_device *dev = mlxsw_sp_port->dev; + bool has_ets_tc = false; + int i, tx_bw_sum = 0; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + switch (ets->tc_tsa[i]) { + case IEEE_8021QAZ_TSA_STRICT: + break; + case IEEE_8021QAZ_TSA_ETS: + has_ets_tc = true; + tx_bw_sum += ets->tc_tx_bw[i]; + break; + default: + netdev_err(dev, "Only strict priority and ETS are supported\n"); + return -EINVAL; + } + + if (ets->prio_tc[i] >= IEEE_8021QAZ_MAX_TCS) { + netdev_err(dev, "Invalid TC\n"); + return -EINVAL; + } + } + + if (has_ets_tc && tx_bw_sum != 100) { + netdev_err(dev, "Total ETS bandwidth should equal 100\n"); + return -EINVAL; + } + + return 0; +} + +static int mlxsw_sp_port_pg_prio_map(struct mlxsw_sp_port *mlxsw_sp_port, + u8 *prio_tc) +{ + char pptb_pl[MLXSW_REG_PPTB_LEN]; + int i; + + mlxsw_reg_pptb_pack(pptb_pl, mlxsw_sp_port->local_port); + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_reg_pptb_prio_to_buff_set(pptb_pl, i, prio_tc[i]); + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pptb), + pptb_pl); +} + +static bool mlxsw_sp_ets_has_pg(u8 *prio_tc, u8 pg) +{ + int i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + if (prio_tc[i] == pg) + return true; + return false; +} + +static int mlxsw_sp_port_pg_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + u8 *old_prio_tc, u8 *new_prio_tc) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char pbmc_pl[MLXSW_REG_PBMC_LEN]; + int err, i; + + mlxsw_reg_pbmc_pack(pbmc_pl, mlxsw_sp_port->local_port, 0, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); + if (err) + return err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + u8 pg = old_prio_tc[i]; + + if (!mlxsw_sp_ets_has_pg(new_prio_tc, pg)) + mlxsw_reg_pbmc_lossy_buffer_pack(pbmc_pl, pg, 0); + } + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pbmc), pbmc_pl); +} + +static int mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); + struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets; + struct net_device *dev = mlxsw_sp_port->dev; + int err; + + /* Create the required PGs, but don't destroy existing ones, as + * traffic is still directed to them. + */ + err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, + ets->prio_tc, pause_en, + mlxsw_sp_port->dcb.pfc); + if (err) { + netdev_err(dev, "Failed to configure port's headroom\n"); + return err; + } + + err = mlxsw_sp_port_pg_prio_map(mlxsw_sp_port, ets->prio_tc); + if (err) { + netdev_err(dev, "Failed to set PG-priority mapping\n"); + goto err_port_prio_pg_map; + } + + err = mlxsw_sp_port_pg_destroy(mlxsw_sp_port, my_ets->prio_tc, + ets->prio_tc); + if (err) + netdev_warn(dev, "Failed to remove ununsed PGs\n"); + + return 0; + +err_port_prio_pg_map: + mlxsw_sp_port_pg_destroy(mlxsw_sp_port, ets->prio_tc, my_ets->prio_tc); + return err; +} + +static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_ets *ets) +{ + struct ieee_ets *my_ets = mlxsw_sp_port->dcb.ets; + struct net_device *dev = mlxsw_sp_port->dev; + int i, err; + + /* Egress configuration. */ + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + bool dwrr = ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS; + u8 weight = ets->tc_tx_bw[i]; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + 0, dwrr, weight); + if (err) { + netdev_err(dev, "Failed to link subgroup ETS element %d to group\n", + i); + goto err_port_ets_set; + } + } + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, + ets->prio_tc[i]); + if (err) { + netdev_err(dev, "Failed to map prio %d to TC %d\n", i, + ets->prio_tc[i]); + goto err_port_prio_tc_set; + } + } + + /* Ingress configuration. */ + err = mlxsw_sp_port_headroom_set(mlxsw_sp_port, ets); + if (err) + goto err_port_headroom_set; + + return 0; + +err_port_headroom_set: + i = IEEE_8021QAZ_MAX_TCS; +err_port_prio_tc_set: + for (i--; i >= 0; i--) + mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, my_ets->prio_tc[i]); + i = IEEE_8021QAZ_MAX_TCS; +err_port_ets_set: + for (i--; i >= 0; i--) { + bool dwrr = my_ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS; + u8 weight = my_ets->tc_tx_bw[i]; + + err = mlxsw_sp_port_ets_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i, + 0, dwrr, weight); + } + return err; +} + +static int mlxsw_sp_dcbnl_ieee_setets(struct net_device *dev, + struct ieee_ets *ets) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + err = mlxsw_sp_port_ets_validate(mlxsw_sp_port, ets); + if (err) + return err; + + err = __mlxsw_sp_dcbnl_ieee_setets(mlxsw_sp_port, ets); + if (err) + return err; + + memcpy(mlxsw_sp_port->dcb.ets, ets, sizeof(*ets)); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_getmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(maxrate, mlxsw_sp_port->dcb.maxrate, sizeof(*maxrate)); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev, + struct ieee_maxrate *maxrate) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct ieee_maxrate *my_maxrate = mlxsw_sp_port->dcb.maxrate; + int err, i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + i, 0, + maxrate->tc_maxrate[i]); + if (err) { + netdev_err(dev, "Failed to set maxrate for TC %d\n", i); + goto err_port_ets_maxrate_set; + } + } + + memcpy(mlxsw_sp_port->dcb.maxrate, maxrate, sizeof(*maxrate)); + + return 0; + +err_port_ets_maxrate_set: + for (i--; i >= 0; i--) + mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, + MLXSW_REG_QEEC_HIERARCY_SUBGROUP, + i, 0, my_maxrate->tc_maxrate[i]); + return err; +} + +static int mlxsw_sp_port_pfc_cnt_get(struct mlxsw_sp_port *mlxsw_sp_port, + u8 prio) +{ + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct ieee_pfc *my_pfc = mlxsw_sp_port->dcb.pfc; + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, + MLXSW_REG_PPCNT_PRIO_CNT, prio); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); + if (err) + return err; + + my_pfc->requests[prio] = mlxsw_reg_ppcnt_tx_pause_get(ppcnt_pl); + my_pfc->indications[prio] = mlxsw_reg_ppcnt_rx_pause_get(ppcnt_pl); + + return 0; +} + +static int mlxsw_sp_dcbnl_ieee_getpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err, i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + err = mlxsw_sp_port_pfc_cnt_get(mlxsw_sp_port, i); + if (err) { + netdev_err(dev, "Failed to get PFC count for priority %d\n", + i); + return err; + } + } + + memcpy(pfc, mlxsw_sp_port->dcb.pfc, sizeof(*pfc)); + + return 0; +} + +static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port, + struct ieee_pfc *pfc) +{ + char pfcc_pl[MLXSW_REG_PFCC_LEN]; + + mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en); + + return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), + pfcc_pl); +} + +static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, + struct ieee_pfc *pfc) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + int err; + + if (mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) { + netdev_err(dev, "PAUSE frames already enabled on port\n"); + return -EINVAL; + } + + err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, + mlxsw_sp_port->dcb.ets->prio_tc, + false, pfc); + if (err) { + netdev_err(dev, "Failed to configure port's headroom for PFC\n"); + return err; + } + + err = mlxsw_sp_port_pfc_set(mlxsw_sp_port, pfc); + if (err) { + netdev_err(dev, "Failed to configure PFC\n"); + goto err_port_pfc_set; + } + + memcpy(mlxsw_sp_port->dcb.pfc, pfc, sizeof(*pfc)); + + return 0; + +err_port_pfc_set: + __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, + mlxsw_sp_port->dcb.ets->prio_tc, false, + mlxsw_sp_port->dcb.pfc); + return err; +} + +static const struct dcbnl_rtnl_ops mlxsw_sp_dcbnl_ops = { + .ieee_getets = mlxsw_sp_dcbnl_ieee_getets, + .ieee_setets = mlxsw_sp_dcbnl_ieee_setets, + .ieee_getmaxrate = mlxsw_sp_dcbnl_ieee_getmaxrate, + .ieee_setmaxrate = mlxsw_sp_dcbnl_ieee_setmaxrate, + .ieee_getpfc = mlxsw_sp_dcbnl_ieee_getpfc, + .ieee_setpfc = mlxsw_sp_dcbnl_ieee_setpfc, + + .getdcbx = mlxsw_sp_dcbnl_getdcbx, + .setdcbx = mlxsw_sp_dcbnl_setdcbx, +}; + +static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->dcb.ets = kzalloc(sizeof(*mlxsw_sp_port->dcb.ets), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.ets) + return -ENOMEM; + + mlxsw_sp_port->dcb.ets->ets_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static void mlxsw_sp_port_ets_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.ets); +} + +static int mlxsw_sp_port_maxrate_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int i; + + mlxsw_sp_port->dcb.maxrate = kmalloc(sizeof(*mlxsw_sp_port->dcb.maxrate), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.maxrate) + return -ENOMEM; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port->dcb.maxrate->tc_maxrate[i] = MLXSW_REG_QEEC_MAS_DIS; + + return 0; +} + +static void mlxsw_sp_port_maxrate_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.maxrate); +} + +static int mlxsw_sp_port_pfc_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->dcb.pfc = kzalloc(sizeof(*mlxsw_sp_port->dcb.pfc), + GFP_KERNEL); + if (!mlxsw_sp_port->dcb.pfc) + return -ENOMEM; + + mlxsw_sp_port->dcb.pfc->pfc_cap = IEEE_8021QAZ_MAX_TCS; + + return 0; +} + +static void mlxsw_sp_port_pfc_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + kfree(mlxsw_sp_port->dcb.pfc); +} + +int mlxsw_sp_port_dcb_init(struct mlxsw_sp_port *mlxsw_sp_port) +{ + int err; + + err = mlxsw_sp_port_ets_init(mlxsw_sp_port); + if (err) + return err; + err = mlxsw_sp_port_maxrate_init(mlxsw_sp_port); + if (err) + goto err_port_maxrate_init; + err = mlxsw_sp_port_pfc_init(mlxsw_sp_port); + if (err) + goto err_port_pfc_init; + + mlxsw_sp_port->dev->dcbnl_ops = &mlxsw_sp_dcbnl_ops; + + return 0; + +err_port_pfc_init: + mlxsw_sp_port_maxrate_fini(mlxsw_sp_port); +err_port_maxrate_init: + mlxsw_sp_port_ets_fini(mlxsw_sp_port); + return err; +} + +void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port_pfc_fini(mlxsw_sp_port); + mlxsw_sp_port_maxrate_fini(mlxsw_sp_port); + mlxsw_sp_port_ets_fini(mlxsw_sp_port); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 9cd6f472234a..3710f19ed6bb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1438,8 +1438,8 @@ static void mlxsw_sp_fdb_notify_rec_process(struct mlxsw_sp *mlxsw_sp, static void mlxsw_sp_fdb_notify_work_schedule(struct mlxsw_sp *mlxsw_sp) { - schedule_delayed_work(&mlxsw_sp->fdb_notify.dw, - msecs_to_jiffies(mlxsw_sp->fdb_notify.interval)); + mlxsw_core_schedule_dw(&mlxsw_sp->fdb_notify.dw, + msecs_to_jiffies(mlxsw_sp->fdb_notify.interval)); } static void mlxsw_sp_fdb_notify_work(struct work_struct *work) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 7a60a26759b6..3842eab9449a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -43,7 +43,6 @@ #include <linux/device.h> #include <linux/skbuff.h> #include <linux/if_vlan.h> -#include <net/devlink.h> #include <net/switchdev.h> #include <generated/utsrelease.h> @@ -75,11 +74,11 @@ struct mlxsw_sx_port_pcpu_stats { }; struct mlxsw_sx_port { + struct mlxsw_core_port core_port; /* must be first */ struct net_device *dev; struct mlxsw_sx_port_pcpu_stats __percpu *pcpu_stats; struct mlxsw_sx *mlxsw_sx; u8 local_port; - struct devlink_port devlink_port; }; /* tx_hdr_version @@ -303,7 +302,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, u64 len; int err; - if (mlxsw_core_skb_transmit_busy(mlxsw_sx, &tx_info)) + if (mlxsw_core_skb_transmit_busy(mlxsw_sx->core, &tx_info)) return NETDEV_TX_BUSY; if (unlikely(skb_headroom(skb) < MLXSW_TXHDR_LEN)) { @@ -321,7 +320,7 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ - err = mlxsw_core_skb_transmit(mlxsw_sx, skb, &tx_info); + err = mlxsw_core_skb_transmit(mlxsw_sx->core, skb, &tx_info); if (!err) { pcpu_stats = this_cpu_ptr(mlxsw_sx_port->pcpu_stats); @@ -518,7 +517,8 @@ static void mlxsw_sx_port_get_stats(struct net_device *dev, int i; int err; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port); + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sx_port->local_port, + MLXSW_REG_PPCNT_IEEE_8023_CNT, 0); err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ppcnt), ppcnt_pl); for (i = 0; i < MLXSW_SX_PORT_HW_STATS_LEN; i++) data[i] = !err ? mlxsw_sx_port_hw_stats[i].getter(ppcnt_pl) : 0; @@ -955,9 +955,7 @@ mlxsw_sx_port_mac_learning_mode_set(struct mlxsw_sx_port *mlxsw_sx_port, static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) { - struct devlink *devlink = priv_to_devlink(mlxsw_sx->core); struct mlxsw_sx_port *mlxsw_sx_port; - struct devlink_port *devlink_port; struct net_device *dev; bool usable; int err; @@ -1011,14 +1009,6 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) goto port_not_usable; } - devlink_port = &mlxsw_sx_port->devlink_port; - err = devlink_port_register(devlink, devlink_port, local_port); - if (err) { - dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to register devlink port\n", - mlxsw_sx_port->local_port); - goto err_devlink_port_register; - } - err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port); if (err) { dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n", @@ -1076,11 +1066,19 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) goto err_register_netdev; } - devlink_port_type_eth_set(devlink_port, dev); + err = mlxsw_core_port_init(mlxsw_sx->core, &mlxsw_sx_port->core_port, + mlxsw_sx_port->local_port, dev, false, 0); + if (err) { + dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n", + mlxsw_sx_port->local_port); + goto err_core_port_init; + } mlxsw_sx->ports[local_port] = mlxsw_sx_port; return 0; +err_core_port_init: + unregister_netdev(dev); err_register_netdev: err_port_mac_learning_mode_set: err_port_stp_state_set: @@ -1089,8 +1087,6 @@ err_port_mtu_set: err_port_speed_set: err_port_swid_set: err_port_system_port_mapping_set: - devlink_port_unregister(&mlxsw_sx_port->devlink_port); -err_devlink_port_register: port_not_usable: err_port_module_check: err_dev_addr_get: @@ -1103,15 +1099,12 @@ err_alloc_stats: static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port) { struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port]; - struct devlink_port *devlink_port; if (!mlxsw_sx_port) return; - devlink_port = &mlxsw_sx_port->devlink_port; - devlink_port_type_clear(devlink_port); + mlxsw_core_port_fini(&mlxsw_sx_port->core_port); unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */ mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT); - devlink_port_unregister(devlink_port); free_percpu(mlxsw_sx_port->pcpu_stats); free_netdev(mlxsw_sx_port->dev); } @@ -1454,10 +1447,10 @@ static int mlxsw_sx_flood_init(struct mlxsw_sx *mlxsw_sx) return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sgcr), sgcr_pl); } -static int mlxsw_sx_init(void *priv, struct mlxsw_core *mlxsw_core, +static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core, const struct mlxsw_bus_info *mlxsw_bus_info) { - struct mlxsw_sx *mlxsw_sx = priv; + struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core); int err; mlxsw_sx->core = mlxsw_core; @@ -1504,9 +1497,9 @@ err_event_register: return err; } -static void mlxsw_sx_fini(void *priv) +static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core) { - struct mlxsw_sx *mlxsw_sx = priv; + struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core); mlxsw_sx_traps_fini(mlxsw_sx); mlxsw_sx_event_unregister(mlxsw_sx, MLXSW_TRAP_ID_PUDE); |